<h2>Iris dataset in Scikit-learn library</h2>

<h2>Load data</h2>

In [29]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Load data
url = "https://raw.githubusercontent.com/jhagelback/ml-workshop/master/data/diabetes.csv"
np_data = pd.read_csv(url).values
#print(np_data)

# Split data into X and y
X_raw = np_data[:,0:-1].astype(float)
y_raw = np_data[:,-1]
#print(X_raw)
#print(y_raw)

# Shuffle data
X_raw, y_raw = shuffle(X_raw, y_raw, random_state=0)
#print(y_raw)

# Convert class label strings to integers
encoder = LabelEncoder()
encoder.fit(y_raw)
y = encoder.transform(y_raw)

# Normalize data to avoid high input values
scaler = StandardScaler()
scaler.fit(X_raw)
X = scaler.transform(X_raw)

# Print some stuff
print("Example:")
print(X[0],"->",y_raw[0],"=",y[0])
print("Data shape:", X.shape)

Example:
[-0.84488505  2.44447821  0.35643175  1.40909441 -0.69289057  1.38436175
  2.784923   -0.95646168] -> YES = 1
Data shape: (768, 8)


<h2>Train-test split</h2>

In [30]:
from sklearn.model_selection import train_test_split

# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

# Print some stuff
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Training data shape: (614, 8)
Test data shape: (154, 8)


<h2>Train and evaluate model on training data</h2>

In [31]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Define neural network model
model = MLPClassifier(max_iter=1000, random_state=42)

# Train model on training data
model.fit(X_train, y_train)

# Evaluate on training data
y_pred = model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
conf_mx = confusion_matrix(y_train, y_pred)

# Print results
print("Accuracy: {0:.2f}%".format(accuracy * 100.0))
print("Confusion Matrix:")
print(conf_mx)

Accuracy: 83.55%
Confusion Matrix:
[[368  37]
 [ 64 145]]


<h2>Train on training data and evaluate model on test data</h2>

In [32]:
# Define neural network model
model = MLPClassifier(max_iter=100, random_state=42)

# Train model on training data
model.fit(X_train, y_train)

# Evaluate on training data
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_mx = confusion_matrix(y_test, y_pred)

# Print results
print("Accuracy: {0:.2f}%".format(accuracy * 100.0))
print("Confusion Matrix:")
print(conf_mx)

Accuracy: 71.43%
Confusion Matrix:
[[85 10]
 [34 25]]




## Random Forest

In [33]:
from sklearn.ensemble import RandomForestClassifier

# Define Random Forest model
model = RandomForestClassifier(n_estimators=25)

# Train model on training data
model.fit(X_train, y_train)

# Evaluate on training data
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_mx = confusion_matrix(y_test, y_pred)

# Print results
print("Accuracy: {0:.2f}%".format(accuracy * 100.0))
print("Confusion Matrix:")
print(conf_mx)

Accuracy: 72.73%
Confusion Matrix:
[[81 14]
 [28 31]]


<h2>Predict new examples</h2>

In [19]:
# Create two new examples
example = [
    [6.1,2.6,5.0,1.5]
]

# Normalize values
example = scaler.transform(example)

# Make prediction
res = model.predict(example)
print("Prediction:", res)

Prediction: [2]
