# Decision Tree Demo

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.tree import DecisionTreeClassifier
from datetime import datetime

In [2]:
df = pd.read_csv('Preprocessed_data.csv', index_col = 0)
df

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income,Diabetes_binary
0,0,0,1,23,0,0,0,1,0,0,...,0,1,0,0,0,0,11,5,7,0
1,1,0,1,19,0,0,0,0,1,1,...,0,3,0,0,0,0,6,6,8,0
2,0,0,1,26,1,0,0,1,1,1,...,0,2,0,0,0,0,1,4,4,0
3,0,1,1,22,0,0,0,1,1,1,...,0,1,0,0,0,1,12,4,2,0
4,0,0,1,22,0,0,0,0,1,1,...,0,1,0,0,0,0,4,6,8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253659,0,1,1,37,0,0,0,0,0,1,...,0,4,0,0,0,0,6,4,1,1
253668,0,1,1,29,1,0,1,0,1,1,...,0,2,0,0,1,1,10,3,6,1
253670,1,1,1,25,0,0,1,0,1,0,...,0,5,15,0,1,0,13,6,4,1
253676,1,1,1,18,0,0,0,0,0,0,...,0,4,0,0,1,0,11,2,4,1


In [3]:
X = df.iloc[:,0:-1]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, shuffle=True, random_state = 1)

In [4]:
accuracy_list = []
accuracy_depth_list = []
accuracy_criterion_list = []
precision_list = []
precision_depth_list = []
precision_criterion_list = []
f1_list = []
f1_depth_list = []
f1_criterion_list = []

In [5]:
# initialize our classifier
max_depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
criterion = ["gini", "entropy"]

acc = []
f1 = []
precision = []

for i in max_depth:
    decisionT = DecisionTreeClassifier()
    parameters = {"criterion":criterion,"max_depth":[i]}
    search_results =  GridSearchCV(decisionT, parameters)
    search_results.fit(X_train, Y_train)
    acc.append(search_results.score(X_test, Y_test))
    Y_Predicted = search_results.predict(X_test)
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))

In [6]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[acc.index(max(acc))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[acc.index(max(acc))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [7]:
#get best parameter for metric and print
opt_depth = search_results.best_params_["max_depth"]
opt_accuracy_criterion = search_results.best_params_["criterion"]
accuracy = search_results.score(X_test, Y_test)

accuracy_list.append(accuracy)
accuracy_depth_list.append(opt_depth)
accuracy_criterion_list.append(opt_accuracy_criterion)

print(f'Accuracy for optimum classifier(criterion= {opt_accuracy_criterion},depth={opt_depth}): {accuracy}')

Accuracy for optimum classifier(criterion= entropy,depth=9): 0.7427148757992418


In [8]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[f1.index(max(f1))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[f1.index(max(f1))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [9]:
#get best parameter for metric and print
opt_f1_depth = search_results.best_params_["max_depth"]
opt_f1_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)

f1_list.append(f1_Score)
f1_depth_list.append(opt_f1_depth)
f1_criterion_list.append(opt_f1_criterion)

print(f'F1 Score for optimum classifier(criterion= {opt_f1_criterion}, depth={opt_f1_depth}): {f1_Score}')

F1 Score for optimum classifier(criterion= entropy, depth=9): 0.6749964301013852


In [10]:
# initialize our classifier
parameters = {"criterion":["gini", "entropy"],"max_depth":[max_depth[precision.index(max(precision))]]}
decisionT = DecisionTreeClassifier(max_depth = max_depth[precision.index(max(precision))])
search_results =  GridSearchCV(decisionT, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [11]:
#get best parameter for metric and print
opt_precision_depth = search_results.best_params_["max_depth"]
opt_precision_criterion = search_results.best_params_["criterion"]

Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)

precision_list.append(Precision_Score)
precision_depth_list.append(opt_precision_depth)
precision_criterion_list.append(opt_precision_criterion)

print(f'Precision Score for optimum classifier(criterion= {opt_precision_criterion}, depth={opt_precision_depth}): {Precision_Score}')

Precision Score for optimum classifier(criterion= entropy, depth=9): 0.6849116710107154
