In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from datetime import datetime

In [2]:
df = pd.read_csv('Preprocessed_data.csv', index_col = 0)
df

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income,Diabetes_binary
0,0,0,1,23,0,0,0,1,0,0,...,0,1,0,0,0,0,11,5,7,0
1,1,0,1,19,0,0,0,0,1,1,...,0,3,0,0,0,0,6,6,8,0
2,0,0,1,26,1,0,0,1,1,1,...,0,2,0,0,0,0,1,4,4,0
3,0,1,1,22,0,0,0,1,1,1,...,0,1,0,0,0,1,12,4,2,0
4,0,0,1,22,0,0,0,0,1,1,...,0,1,0,0,0,0,4,6,8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253659,0,1,1,37,0,0,0,0,0,1,...,0,4,0,0,0,0,6,4,1,1
253668,0,1,1,29,1,0,1,0,1,1,...,0,2,0,0,1,1,10,3,6,1
253670,1,1,1,25,0,0,1,0,1,0,...,0,5,15,0,1,0,13,6,4,1
253676,1,1,1,18,0,0,0,0,0,0,...,0,4,0,0,1,0,11,2,4,1


In [3]:
X = df.iloc[:,0:-1]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 1)

In [4]:
accuracy_list = []
accuracy_depth_list = []
accuracy_criterion_list = []
precision_list = []
precision_depth_list = []
precision_criterion_list = []
f1_list = []
f1_depth_list = []
f1_criterion_list = []

## Trial 1

In [5]:
start = datetime.now()

In [6]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [7]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [8]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=10): 0.7492785605160415


In [9]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [10]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6798065121651866


In [11]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [12]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.7659649122807017


## Trial 2

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 2)

In [14]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [15]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [16]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=9): 0.7491653935381656


In [17]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [18]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6786647314949202


In [19]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [20]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.7856209150326797


## Trial 3

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 3)

In [22]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [23]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [24]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7482034742262208


In [25]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [26]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6780881082637489


In [27]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [28]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.7856782652546647


## Trial 4

In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 4)

In [30]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [31]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [32]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=10): 0.7479771402704691


In [33]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [34]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6790176647996553


In [35]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [36]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.7893189612934836


## Trial 5

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 5)

In [38]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [39]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [40]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.749561477960731


In [41]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [42]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6816618911174785


In [43]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [44]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 0.7679003804911795


## Trial 6

In [45]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 6)

In [46]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [47]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [48]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7532959882306343


In [49]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [50]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6832709473078029


In [51]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [52]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 0.7838235294117647


## Trial 7

In [53]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 7)

In [54]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [55]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [56]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7544842414983308


In [57]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [58]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6845647297199507


In [59]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [60]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 0.7655259822560203


## Trial 8

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 8)

In [62]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [63]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [64]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7507497312284276


In [65]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [66]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6838261875497073


In [67]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [68]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.7710196779964222


## Trial 9

In [69]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 9)

In [70]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [71]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [72]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7549369094098342


In [73]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [74]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6885599310146594


In [75]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [76]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=1): 0.8116279069767441


## Trial 10

In [77]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 10)

In [78]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [79]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [80]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7529564872970067


In [81]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [82]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6878178041968057


In [83]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [84]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 0.7652315190901706


## Results

In [85]:
data = {'Accuracy': accuracy_list, 'Accuracy Depth': accuracy_depth_list, 'Accuracy Criterion': accuracy_criterion_list,
        'Precision': precision_list, 'Precision Depth': precision_depth_list, 'Precision Criterion': precision_criterion_list,
        'F1_micro':f1_list, 'F1_micro Depth': f1_depth_list, 'F1_micro Criterion': f1_criterion_list}

In [86]:
trainingResults  = pd.DataFrame(data = data)
pd.options.display.max_colwidth = 100
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.749279,10,entropy,0.765965,1,entropy,0.679807,10,gini
1,0.749165,9,gini,0.785621,1,entropy,0.678665,10,entropy
2,0.748203,10,gini,0.785678,1,entropy,0.678088,10,entropy
3,0.747977,10,entropy,0.789319,1,entropy,0.679018,10,entropy
4,0.749561,10,gini,0.7679,1,gini,0.681662,10,entropy
5,0.753296,10,gini,0.783824,1,gini,0.683271,10,entropy
6,0.754484,10,gini,0.765526,1,gini,0.684565,10,entropy
7,0.75075,10,gini,0.77102,1,entropy,0.683826,10,entropy
8,0.754937,10,gini,0.811628,1,entropy,0.68856,10,entropy
9,0.752956,10,gini,0.765232,1,gini,0.687818,10,entropy


In [87]:
trainingResults.to_csv('RF_trainingResults.csv')

### Testing Model

In [2]:
df = pd.read_csv('Preprocessed_data.csv', index_col = 0)
trainingResults = pd.read_csv('RF_trainingResults.csv', index_col = 0)
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.749279,10,entropy,0.765965,1,entropy,0.679807,10,gini
1,0.749165,9,gini,0.785621,1,entropy,0.678665,10,entropy
2,0.748203,10,gini,0.785678,1,entropy,0.678088,10,entropy
3,0.747977,10,entropy,0.789319,1,entropy,0.679018,10,entropy
4,0.749561,10,gini,0.7679,1,gini,0.681662,10,entropy
5,0.753296,10,gini,0.783824,1,gini,0.683271,10,entropy
6,0.754484,10,gini,0.765526,1,gini,0.684565,10,entropy
7,0.75075,10,gini,0.77102,1,entropy,0.683826,10,entropy
8,0.754937,10,gini,0.811628,1,entropy,0.68856,10,entropy
9,0.752956,10,gini,0.765232,1,gini,0.687818,10,entropy


In [3]:
X = df.iloc[:,0:-1]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 11)

In [4]:
# initialize our classifier
max_depth = [10]
criterion = ["entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [5]:
print("Accuracy: " + str(acc[0]) + '\n'
      + "Precision: " + str(precision[0]) + '\n'
      + "F1: " + str(f1[0]))

Accuracy: 0.7521643184518757
Precision: 0.6944728015166982
F1: 0.6849827387802072
