In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.tree import DecisionTreeClassifier
from datetime import datetime

In [2]:
df = pd.read_csv('Preprocessed_pca.csv', index_col = 0)
df

Unnamed: 0,PhysHlth,BMI,MentHlth,Income,Diabetes_binary
0,-6.667397,-6.392123,-1.111675,-0.458052,0
1,-3.863936,-7.178304,3.095224,-0.072141,0
2,-0.834798,0.081819,-3.494618,-4.087203,0
3,7.085908,-6.207388,-6.370345,-2.893083,0
4,-4.274580,7.404026,0.413793,1.106787,0
...,...,...,...,...,...
253659,-3.320172,9.505420,0.697311,0.792966,1
253668,-5.022866,1.523446,-0.484450,-2.206810,1
253670,2.529385,-4.995248,11.196980,-6.451818,1
253676,-6.559798,-9.188122,-1.661467,-3.481757,1


In [3]:
X = df[['PhysHlth', 'BMI', 'MentHlth', 'Income']]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 1)

In [4]:
accuracy_list = []
accuracy_depth_list = []
accuracy_criterion_list = []
precision_list = []
precision_depth_list = []
precision_criterion_list = []
f1_list = []
f1_depth_list = []
f1_criterion_list = []

## Trial 1

In [5]:
start = datetime.now()

In [6]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [8]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=8): 0.7073501952130369


In [9]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [10]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6430315006393119


In [11]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [12]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=4): 0.6655553240258387


## Trial 2

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 2)

In [14]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [16]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=9): 0.7067277768347197


In [17]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [18]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6299460704114404


In [19]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [20]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=7): 0.6505127356930201


## Trial 3

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 3)

In [22]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [24]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=6): 0.7066146098568438


In [25]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [26]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6384078884078884


In [27]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [28]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=5): 0.6451721809588116


## Trial 4

In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 4)

In [30]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [32]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=7): 0.7046341877440163


In [33]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [34]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=8): 0.6389484607402284


In [35]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [36]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=3): 0.6594076655052264


## Trial 5

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 5)

In [38]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [39]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [40]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=9): 0.7013523453856165


In [41]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [42]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6391680706442806


In [43]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [44]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=5): 0.6791714614499424


## Trial 6

In [45]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 6)

In [46]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [47]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [48]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=8): 0.7084252815028574


In [49]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [50]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6415715798141395


In [51]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [52]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=3): 0.6670496983625395


## Trial 7

In [53]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 7)

In [54]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [55]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [56]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=8): 0.7128953771289538


In [57]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [58]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=7): 0.6383099390330356


In [59]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [60]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=3): 0.6719072164948454


## Trial 8

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 8)

In [62]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [63]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [64]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=8): 0.7071238612572851


In [65]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [66]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6436781609195402


In [67]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [68]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=5): 0.6973486743371686


## Trial 9

In [69]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 9)

In [70]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [71]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [72]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=8): 0.7127822101510779


In [73]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [74]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6414063427960566


In [75]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [76]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=3): 0.6593296648324162


## Trial 10

In [77]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 10)

In [78]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

  _warn_prf(average, modifier, msg_start, len(result))


In [79]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [80]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=9): 0.7104057036156849


In [81]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [82]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=2): 0.6485151426051161


In [83]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [84]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=4): 0.6647998340593237


## Results

In [85]:
data = {'Accuracy': accuracy_list, 'Accuracy Depth': accuracy_depth_list, 'Accuracy Criterion': accuracy_criterion_list,
        'Precision': precision_list, 'Precision Depth': precision_depth_list, 'Precision Criterion': precision_criterion_list,
        'F1_micro':f1_list, 'F1_micro Depth': f1_depth_list, 'F1_micro Criterion': f1_criterion_list}

In [86]:
trainingResults  = pd.DataFrame(data = data)
pd.options.display.max_colwidth = 100
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.70735,8,entropy,0.665555,4,gini,0.643032,2,gini
1,0.706728,9,entropy,0.650513,7,gini,0.629946,2,gini
2,0.706615,6,gini,0.645172,5,gini,0.638408,2,gini
3,0.704634,7,gini,0.659408,3,gini,0.638948,8,entropy
4,0.701352,9,gini,0.679171,5,gini,0.639168,2,gini
5,0.708425,8,entropy,0.66705,3,gini,0.641572,2,gini
6,0.712895,8,entropy,0.671907,3,gini,0.63831,7,gini
7,0.707124,8,entropy,0.697349,5,gini,0.643678,2,gini
8,0.712782,8,gini,0.65933,3,gini,0.641406,2,gini
9,0.710406,9,entropy,0.6648,4,gini,0.648515,2,gini


In [87]:
trainingResults.to_csv('DT_pca_trainingResults.csv')

### Testing Model

In [90]:
df = pd.read_csv('Preprocessed_pca.csv', index_col = 0)
trainingResults = pd.read_csv('DT_pca_trainingResults.csv', index_col = 0)
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.70735,8,entropy,0.665555,4,gini,0.643032,2,gini
1,0.706728,9,entropy,0.650513,7,gini,0.629946,2,gini
2,0.706615,6,gini,0.645172,5,gini,0.638408,2,gini
3,0.704634,7,gini,0.659408,3,gini,0.638948,8,entropy
4,0.701352,9,gini,0.679171,5,gini,0.639168,2,gini
5,0.708425,8,entropy,0.66705,3,gini,0.641572,2,gini
6,0.712895,8,entropy,0.671907,3,gini,0.63831,7,gini
7,0.707124,8,entropy,0.697349,5,gini,0.643678,2,gini
8,0.712782,8,gini,0.65933,3,gini,0.641406,2,gini
9,0.710406,9,entropy,0.6648,4,gini,0.648515,2,gini


In [91]:
X = df[['PhysHlth', 'BMI', 'MentHlth', 'Income']]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 11)

In [96]:
# initialize our classifier
max_depth = [8]
criterion = ["gini"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [97]:
print("Accuracy: " + str(acc[0]) + '\n'
      + "Precision: " + str(precision[0]) + '\n'
      + "F1: " + str(f1[0]))

Accuracy: 0.7099530357041816
Precision: 0.6547945205479452
F1: 0.6131904618170841
