In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.tree import DecisionTreeClassifier
from datetime import datetime



In [2]:
df = pd.read_csv('Preprocessed_data3.csv', index_col = 0)
df

Unnamed: 0,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,race_AfricanAmerican,race_Asian,...,glimepiride-pioglitazone_No,glimepiride-pioglitazone_Steady,metformin-rosiglitazone_No,metformin-rosiglitazone_Steady,metformin-pioglitazone_No,metformin-pioglitazone_Steady,change_Ch,change_No,diabetesMed_No,diabetesMed_Yes
0,5,42,4,13,0,0,1,9,1,0,...,1,0,1,0,1,0,0,1,0,1
1,2,56,0,9,0,0,0,8,0,0,...,1,0,1,0,1,0,0,1,1,0
2,1,44,0,8,0,0,0,8,0,0,...,1,0,1,0,1,0,0,1,1,0
3,1,73,0,16,0,0,1,7,1,0,...,1,0,1,0,1,0,1,0,0,1
4,1,22,0,13,0,0,0,9,0,0,...,1,0,1,0,1,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58502,2,47,1,12,0,0,1,9,0,0,...,1,0,1,0,1,0,0,1,0,1
58503,5,45,0,14,0,0,0,6,0,0,...,1,0,1,0,1,0,0,1,1,0
58504,1,30,3,5,1,0,0,4,0,0,...,1,0,1,0,1,0,0,1,1,0
58505,1,34,0,13,0,0,0,4,0,0,...,1,0,1,0,1,0,1,0,0,1


In [3]:
X = df.iloc[:,0:-2]
y = df.loc[:, 'diabetesMed_Yes']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 1)

In [4]:
accuracy_list = []
accuracy_depth_list = []
accuracy_criterion_list = []
precision_list = []
precision_depth_list = []
precision_criterion_list = []
f1_list = []
f1_depth_list = []
f1_criterion_list = []

## Trial 1

In [5]:
start = datetime.now()

In [6]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [7]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [8]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=10): 0.9994872671338233


In [9]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [10]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9995693985933688


In [11]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [12]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 2

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 2)

In [14]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [15]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [16]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9989745342676466


In [17]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [18]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9991451773756946


In [19]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [20]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 3

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 3)

In [22]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [23]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [24]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9985472568791659


In [25]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [26]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9987996893313564


In [27]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [28]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 4

In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 4)

In [30]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [31]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [32]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9986327123568621


In [33]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [34]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.9988535396961881


In [35]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [36]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 5

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 5)

In [38]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [39]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [40]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9988036233122544


In [41]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [42]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9989971346704871


In [43]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [44]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 6

In [45]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 6)

In [46]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [47]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [48]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9987181678345582


In [49]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [50]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.998935339626659


In [51]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [52]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 7

In [53]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 7)

In [54]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [55]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [56]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.999230900700735


In [57]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [58]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9993632826317652


In [59]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [60]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 8

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 8)

In [62]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [63]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [64]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9990599897453427


In [65]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [66]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.999216245101532


In [67]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [68]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 9

In [69]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 9)

In [70]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [71]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [72]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9991454452230388


In [73]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [74]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=10): 0.9993039120144787


In [75]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [76]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Trial 10

In [77]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 10)

In [78]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [79]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [80]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= gini,depth=10): 0.9991454452230388


In [81]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=10),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [10]})

In [82]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= gini, depth=10): 0.9992872416250891


In [83]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

GridSearchCV(estimator=DecisionTreeClassifier(max_depth=1),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1]})

In [84]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= gini, depth=1): 1.0


## Results

In [85]:
data = {'Accuracy': accuracy_list, 'Accuracy Depth': accuracy_depth_list, 'Accuracy Criterion': accuracy_criterion_list,
        'Precision': precision_list, 'Precision Depth': precision_depth_list, 'Precision Criterion': precision_criterion_list,
        'F1_micro':f1_list, 'F1_micro Depth': f1_depth_list, 'F1_micro Criterion': f1_criterion_list}

In [86]:
trainingResults  = pd.DataFrame(data = data)
pd.options.display.max_colwidth = 100
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.999487,10,entropy,1.0,1,gini,0.999569,10,gini
1,0.998975,10,gini,1.0,1,gini,0.999145,10,gini
2,0.998547,10,gini,1.0,1,gini,0.9988,10,gini
3,0.998633,10,gini,1.0,1,gini,0.998854,10,entropy
4,0.998804,10,gini,1.0,1,gini,0.998997,10,gini
5,0.998718,10,gini,1.0,1,gini,0.998935,10,gini
6,0.999231,10,gini,1.0,1,gini,0.999363,10,gini
7,0.99906,10,gini,1.0,1,gini,0.999216,10,gini
8,0.999145,10,gini,1.0,1,gini,0.999304,10,entropy
9,0.999145,10,gini,1.0,1,gini,0.999287,10,gini


In [87]:
trainingResults.to_csv('DT_trainingResults3.csv')

### Testing Model

In [88]:
df = pd.read_csv('Preprocessed_data3.csv', index_col = 0)
trainingResults = pd.read_csv('DT_trainingResults3.csv', index_col = 0)
trainingResults

Unnamed: 0,Accuracy,Accuracy Depth,Accuracy Criterion,Precision,Precision Depth,Precision Criterion,F1_micro,F1_micro Depth,F1_micro Criterion
0,0.999487,10,entropy,1.0,1,gini,0.999569,10,gini
1,0.998975,10,gini,1.0,1,gini,0.999145,10,gini
2,0.998547,10,gini,1.0,1,gini,0.9988,10,gini
3,0.998633,10,gini,1.0,1,gini,0.998854,10,entropy
4,0.998804,10,gini,1.0,1,gini,0.998997,10,gini
5,0.998718,10,gini,1.0,1,gini,0.998935,10,gini
6,0.999231,10,gini,1.0,1,gini,0.999363,10,gini
7,0.99906,10,gini,1.0,1,gini,0.999216,10,gini
8,0.999145,10,gini,1.0,1,gini,0.999304,10,entropy
9,0.999145,10,gini,1.0,1,gini,0.999287,10,gini


In [89]:
X = df.iloc[:,0:-1]
y = df.loc[:, 'diabetesMed_Yes']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 11)

In [90]:
# initialize our classifier
max_depth = [9]
criterion = ["entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [91]:
print("Accuracy: " + str(acc[0]) + '\n'
      + "Precision: " + str(precision[0]) + '\n'
      + "F1: " + str(f1[0]))

Accuracy: 1.0
Precision: 1.0
F1: 1.0
