In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier 
from sklearn import svm


# load dataset
df = pd.read_csv("diabetes.csv")

print(df.head())

#split dataset in features and target variable
feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']
X = df[feature_cols] # Features
y = df.Outcome # Target variable

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

print(X_train.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
     Pregnancies  Insulin   BMI  Age  Glucose  BloodPressure  \
88            15      110  37.1   43      136             70   
467            0      100  36.8   25       97             64   
550            1        0  27.4   21      116             70   
147            2      119  30.5   34      106  

In [2]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

y_pred_prob = clf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))

[[116  30]
 [ 41  44]]
Accuracy: 0.6926406926406926
AUC Score: 0.6560838033843674
AUC Score prob: 0.6560838033843674
Precision: 0.5945945945945946
Recall: 0.5176470588235295
F1 Score: 0.5534591194968554


In [3]:
rfc = RandomForestClassifier(n_estimators=100, max_features=3)

rfc = rfc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = rfc.predict(X_test)

y_pred_prob = rfc.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))

[[131  15]
 [ 33  52]]
Accuracy: 0.7922077922077922
AUC Score: 0.7545124899274779
AUC Score prob: 0.8573327961321514
Precision: 0.7761194029850746
Recall: 0.611764705882353
F1 Score: 0.6842105263157895


In [4]:
from keras import models
from keras import layers
network = models.Sequential()
network.add(layers.Dense(100, input_dim=7, activation='sigmoid'))
network.add(layers.Dense(1, activation='sigmoid'))

Using TensorFlow backend.


In [5]:
network.compile(optimizer='rmsprop',
loss='mse',
metrics=['accuracy'])

In [7]:
network.fit(X_train, y_train, epochs=20, batch_size=10)

#nn_pred= network.predict(X_test)
nn_pred = network.predict_classes(X_test)
nn_pred_prob= network.predict(X_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
print(confusion_matrix(y_test, nn_pred))
print("Accuracy:",metrics.accuracy_score(y_test, nn_pred))
print ("AUC Score:", roc_auc_score(y_test, nn_pred))
print ("AUC Score prob:", roc_auc_score(y_test, nn_pred_prob))
print ("Precision:", precision_score(y_test, nn_pred))
print ("Recall:", recall_score(y_test, nn_pred))
print ("F1 Score:", f1_score(y_test, nn_pred))

[[133  13]
 [ 52  33]]
Accuracy: 0.7186147186147186
AUC Score: 0.6495970991136181
AUC Score prob: 0.7642224012892828
Precision: 0.717391304347826
Recall: 0.38823529411764707
F1 Score: 0.5038167938931297


In [9]:
network = models.Sequential()
network.add(layers.Dense(100, input_dim=7, activation='sigmoid'))
network.add(layers.Dense(1, activation='sigmoid'))

network.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])

network.fit(X_train, y_train, epochs=20, batch_size=10)

#nn_pred= network.predict(X_test)
nn_pred = network.predict_classes(X_test)
nn_pred_prob= network.predict(X_test)

print(confusion_matrix(y_test, nn_pred))
print("Accuracy:",metrics.accuracy_score(y_test, nn_pred))
print ("AUC Score:", roc_auc_score(y_test, nn_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, nn_pred))
print ("Recall:", recall_score(y_test, nn_pred))
print ("F1 Score:", f1_score(y_test, nn_pred))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[120  26]
 [ 36  49]]
Accuracy: 0.7316017316017316
AUC Score: 0.699194198227236
AUC Score prob: 0.8573327961321514
Precision: 0.6533333333333333
Recall: 0.5764705882352941
F1 Score: 0.6124999999999999


In [10]:
network = models.Sequential()
network.add(layers.Dense(12, input_dim=7, activation='sigmoid'))
network.add(layers.Dense(7, activation='sigmoid'))
network.add(layers.Dense(1, activation='sigmoid'))

network.compile(optimizer='rmsprop',
loss='mse',
metrics=['accuracy'])

network.fit(X_train, y_train, epochs=20, batch_size=10)

#nn_pred= network.predict(X_test)
nn_pred = network.predict_classes(X_test)
nn_pred_prob= network.predict(X_test)

print(confusion_matrix(y_test, nn_pred))
print("Accuracy:",metrics.accuracy_score(y_test, nn_pred))
print ("AUC Score:", roc_auc_score(y_test, nn_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, nn_pred))
print ("Recall:", recall_score(y_test, nn_pred))
print ("F1 Score:", f1_score(y_test, nn_pred))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[141   5]
 [ 78   7]]
Accuracy: 0.6406926406926406
AUC Score: 0.5240531829170024
AUC Score prob: 0.8573327961321514
Precision: 0.5833333333333334
Recall: 0.08235294117647059
F1 Score: 0.14432989690721648


In [None]:
network = models.Sequential()
network.add(layers.Dense(12, input_dim=7, activation='sigmoid'))
network.add(layers.Dense(7, activation='sigmoid'))
network.add(layers.Dense(1, activation='sigmoid'))

network.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])

network.fit(X_train, y_train, epochs=20, batch_size=10)

#nn_pred= network.predict(X_test)
nn_pred = network.predict_classes(X_test)
nn_pred_prob= network.predict(X_test)

print(confusion_matrix(y_test, nn_pred))
print("Accuracy:",metrics.accuracy_score(y_test, nn_pred))
print ("AUC Score:", roc_auc_score(y_test, nn_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, nn_pred))
print ("Recall:", recall_score(y_test, nn_pred))
print ("F1 Score:", f1_score(y_test, nn_pred))