In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from datetime import datetime

In [2]:
df = pd.read_csv('Preprocessed_pca.csv', index_col = 0)
df

Unnamed: 0,PhysHlth,BMI,MentHlth,Income,Diabetes_binary
0,-6.667397,-6.392123,-1.111675,-0.458052,0
1,-3.863936,-7.178304,3.095224,-0.072141,0
2,-0.834798,0.081819,-3.494618,-4.087203,0
3,7.085908,-6.207388,-6.370345,-2.893083,0
4,-4.274580,7.404026,0.413793,1.106787,0
...,...,...,...,...,...
253659,-3.320172,9.505420,0.697311,0.792966,1
253668,-5.022866,1.523446,-0.484450,-2.206810,1
253670,2.529385,-4.995248,11.196980,-6.451818,1
253676,-6.559798,-9.188122,-1.661467,-3.481757,1


In [3]:
X = df[['PhysHlth', 'BMI', 'MentHlth', 'Income']]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 1)

In [4]:
accuracy_list = []
accuracy_depth_list = []
accuracy_criterion_list = []
precision_list = []
precision_depth_list = []
precision_criterion_list = []
f1_list = []
f1_depth_list = []
f1_criterion_list = []

## Trial 1

In [5]:
start = datetime.now()

In [6]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [8]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7156113845979744


In [9]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [10]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6344716400410376


In [11]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [12]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=6): 0.6596119929453262


## Trial 2

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 2)

In [14]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [16]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=9): 0.7165167204209811


In [17]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [18]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=9): 0.6281212841854934


In [19]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [20]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=9): 0.6554986815573135


In [5]:
accuracy_list.append(0.7156113845979744)
accuracy_depth_list.append(10)
accuracy_criterion_list.append("gini")
f1_list.append(0.6344716400410376)
f1_depth_list.append(10)
f1_criterion_list.append("gini")
precision_list.append(0.6596119929453262)
precision_depth_list.append(6)
precision_criterion_list.append("gini")

accuracy_list.append(0.7165167204209811)
accuracy_depth_list.append(9)
accuracy_criterion_list.append("gini")
f1_list.append(0.6281212841854934)
f1_depth_list.append(9)
f1_criterion_list.append("gini")
precision_list.append(0.6554986815573135)
precision_depth_list.append(9)
precision_criterion_list.append("gini")

## Trial 3

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 3)

In [7]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [9]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=9): 0.7136875459740848


In [10]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [11]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6307636097917427


In [12]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [13]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=8): 0.6521471448360782


## Trial 4

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 4)

In [15]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [17]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7161206359984157


In [18]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [19]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6304219533785778


In [20]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [21]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=7): 0.6525948262180606


## Trial 5

In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 5)

In [23]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [25]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7139138799298365


In [26]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [27]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6380747126436783


In [28]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [29]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=6): 0.6511118070779831


## Trial 6

In [30]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 6)

In [31]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [33]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=8): 0.7162338029762915


In [34]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [35]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6382143376472299


In [36]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [37]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=8): 0.652676399026764


## Trial 7

In [38]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 7)

In [39]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [40]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [41]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=10): 0.7225145702484015


In [42]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [43]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6391525049657912


In [44]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [45]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=8): 0.6684458398744113


## Trial 8

In [46]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 8)

In [47]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [48]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [49]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=9): 0.7148757992417812


In [50]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [51]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.6316408259240208


In [52]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [53]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=7): 0.6580237154150198


## Trial 9

In [54]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 9)

In [55]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [56]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [57]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7214394839585809


In [58]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [59]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.636884643644379


In [60]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [61]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=10): 0.661451142813315


## Trial 10

In [62]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 10)

In [63]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [64]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
randomF = RandomForestClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [65]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.7183273920669948


In [66]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
randomF = RandomForestClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [67]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.6334728648013512


In [68]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
randomF = RandomForestClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(randomF, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [69]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=6): 0.6651605615620462


## Results

In [70]:
data = {'Accuracy': accuracy_list, 'Accuracy Depth': accuracy_depth_list, 'Accuracy Criterion': accuracy_criterion_list,
        'Precision': precision_list, 'Precision Depth': precision_depth_list, 'Precision Criterion': precision_criterion_list,
        'F1_micro':f1_list, 'F1_micro Depth': f1_depth_list, 'F1_micro Criterion': f1_criterion_list}

In [71]:
trainingResults  = pd.DataFrame(data = data)
pd.options.display.max_colwidth = 100
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.715611,10,gini,0.659612,6,gini,0.634472,10,gini
1,0.716517,9,gini,0.655499,9,gini,0.628121,9,gini
2,0.713688,9,gini,0.652147,8,gini,0.630764,10,gini
3,0.716121,10,gini,0.652595,7,gini,0.630422,10,gini
4,0.713914,10,gini,0.651112,6,entropy,0.638075,10,gini
5,0.716234,8,gini,0.652676,8,gini,0.638214,10,gini
6,0.722515,10,entropy,0.668446,8,gini,0.639153,10,gini
7,0.714876,9,gini,0.658024,7,gini,0.631641,10,entropy
8,0.721439,10,gini,0.661451,10,entropy,0.636885,10,gini
9,0.718327,10,gini,0.665161,6,entropy,0.633473,10,gini


In [72]:
trainingResults.to_csv('RF_pca_trainingResults.csv')

### Testing Model

In [74]:
df = pd.read_csv('Preprocessed_pca.csv', index_col = 0)
trainingResults = pd.read_csv('RF_pca_trainingResults.csv', index_col = 0)
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.715611,10,gini,0.659612,6,gini,0.634472,10,gini
1,0.716517,9,gini,0.655499,9,gini,0.628121,9,gini
2,0.713688,9,gini,0.652147,8,gini,0.630764,10,gini
3,0.716121,10,gini,0.652595,7,gini,0.630422,10,gini
4,0.713914,10,gini,0.651112,6,entropy,0.638075,10,gini
5,0.716234,8,gini,0.652676,8,gini,0.638214,10,gini
6,0.722515,10,entropy,0.668446,8,gini,0.639153,10,gini
7,0.714876,9,gini,0.658024,7,gini,0.631641,10,entropy
8,0.721439,10,gini,0.661451,10,entropy,0.636885,10,gini
9,0.718327,10,gini,0.665161,6,entropy,0.633473,10,gini


In [75]:
X = df[['PhysHlth', 'BMI', 'MentHlth', 'Income']]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 11)

In [76]:
# initialize our classifier
max_depth = [10]
criterion = ["gini"]

acc = []
f1 = []
precision = []

for i in max_depth:
    randomF = RandomForestClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(randomF, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [77]:
print("Accuracy: " + str(acc[0]) + '\n'
      + "Precision: " + str(precision[0]) + '\n'
      + "F1: " + str(f1[0]))

Accuracy: 0.7170259718214225
Precision: 0.6558975921975008
F1: 0.6325225953413183
