In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier 
from sklearn import svm


# load dataset
df = pd.read_csv("diabetes.csv")

print(df.head())

#split dataset in features and target variable
feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']
X = df[feature_cols] # Features
y = df.Outcome # Target variable

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

print(X_train.head())

In [None]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

y_pred_prob = clf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))

In [None]:
rfc = RandomForestClassifier(n_estimators=100, max_features=3)

rfc = rfc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = rfc.predict(X_test)

y_pred_prob = rfc.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))