<h2>Diabetes dataset in Scikit-learn library</h2>

<h2>Load data</h2>

In [1]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Load data
np_data = pd.read_csv("data/diabetes.csv").values

# Split data into X and y
X_raw = np_data[:,0:-1].astype(float)
y_raw = np_data[:,-1]

# Shuffle data
X_raw, y_raw = shuffle(X_raw, y_raw, random_state=0)

# Convert class label strings to integers
encoder = LabelEncoder()
encoder.fit(y_raw)
y = encoder.transform(y_raw)

# Normalize data to avoid high input values
scaler = StandardScaler()
scaler.fit(X_raw)
X = scaler.transform(X_raw)

# Print some stuff
print("Example:")
print(X[0], "->", y_raw[0], "=", y[0])
print("")
print("Data shape:", X.shape)

Example:
[-0.84488505  2.44447821  0.35643175  1.40909441 -0.69289057  1.38436175
  2.784923   -0.95646168] -> YES = 1

Data shape: (768, 8)


<h2>Train-test split</h2>

In [2]:
from sklearn.model_selection import train_test_split

# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

# Print some stuff
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)

Training data shape: (614, 8)
Testing data shape: (154, 8)


<h2>Train and evaluate model on training data</h2>

In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Define neural network model
model = MLPClassifier(max_iter=2000, random_state=42)

# Train model on training data
model.fit(X_train, y_train)

# Evaluate on training data
y_pred = model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
conf_mx = confusion_matrix(y_train, y_pred)

# Print results
print("Accuracy: {0:.2f}%".format(accuracy * 100.0))
print("Confusion Matrix:")
print(conf_mx)

Accuracy: 84.36%
Confusion Matrix:
[[354  45]
 [ 51 164]]


<h2>Train on training data and evaluate model on test data</h2>

In [4]:
# Define neural network model
model = MLPClassifier(max_iter=2000, random_state=42)

# Train model on training
model.fit(X_train, y_train)

# Evaluate on test data
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_mx = confusion_matrix(y_test, y_pred)

# Print results
print("Accuracy: {0:.2f}%".format(accuracy * 100.0))
print("Confusion Matrix:")
print(conf_mx)

Accuracy: 75.97%
Confusion Matrix:
[[81 20]
 [17 36]]


<h2>Predict new examples</h2>

In [5]:
# Create two new examples
example = [
    [6,149,71,34,0,33.6,0.637,48],
    [1,83,67,28,0,27.6,0.359,32]
]

# Normalize values
example = scaler.transform(example)

# Make prediction
res = model.predict(example)
print("Prediction:", res)

Prediction: [1 0]
