In [24]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.tree import plot_tree
from sklearn.model_selection import cross_val_score, KFold
from timeit import default_timer as timer
import time
from statistics import *
from sklearn.metrics import matthews_corrcoef
import warnings
import math
warnings.filterwarnings('ignore')
import numpy as np

In [25]:
path = r"/Users/nasim/Desktop/data/HeartFailurePrediction.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [26]:
X = df[['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
       'ejection_fraction', 'high_blood_pressure', 'platelets',
       'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time']]
y = df['DEATH_EVENT']

In [27]:
# Python program to get average of a list
def average_list(lst):
    return mean(lst)

In [28]:
# Cross Validation 20 folds : Gini
def create_dt_gini(criter='gini'):
    a = timer()
    
    kf = KFold(n_splits=20)
    scores = []
    mcc_scores = []
    
    dt = DecisionTreeClassifier(criterion=criter)
    target_names = ['0', '1',]
    
    for train_index, test_index in kf.split(X):
        #print("Train index: {0}, \nTest index: {1}".format(train_index, test_index))
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        dt.fit(X_train, y_train)
        
        y_pred = dt.predict(X_test)
        
        mcc_scores.append(matthews_corrcoef(y_test, y_pred))
        
        print(classification_report(y_test, y_pred, target_names=target_names))
        
        scores.append(dt.score(X_test, y_test))
        
    b = timer()
    

    delta = b - a
    
    
    accuracy = str(np.mean(scores))
    generation_time = str(delta)
    mcc_avg = average_list(mcc_scores)
    return accuracy,generation_time, mcc_avg

In [29]:
accuracies = []
times = []
mccs = []
for _ in range(30):
    #change criterion to 'entropy' for info gain calculation and 'gini' for gini index calculation
    acc, gtime, mcc_av = create_dt_gini(criter = "gini")
    accuracies.append(acc)
    times.append(gtime)
    mccs.append(mcc_av)

conveted_accuracies = [float(x) for x in accuracies]
converted_times =  [float(x) for x in times]
converted_mccs = [float(x) for x in mccs]

avg_accuracy = average_list(conveted_accuracies)
avg_time = average_list(converted_times)
avg_mcc = average_list(converted_mccs)

print('*'*50)
print('Evaluating for information gain')
print('Accuracy: {}'.format(avg_accuracy*100))
print('Mcc: {}'.format(avg_mcc))
print('Average generation time : {} sec'.format(avg_time))
print('*'*50)

              precision    recall  f1-score   support

           0       0.17      1.00      0.29         1
           1       1.00      0.64      0.78        14

    accuracy                           0.67        15
   macro avg       0.58      0.82      0.53        15
weighted avg       0.94      0.67      0.75        15

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00        13

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.79      0.92      0.85        12

    accuracy                           0.73        15
   macro avg       0.39      0.46      0.42        15
weighted avg       0.63      0.73      0.68        15

              preci

              precision    recall  f1-score   support

           0       1.00      0.86      0.92        14
           1       0.00      0.00      0.00         0

    accuracy                           0.86        14
   macro avg       0.50      0.43      0.46        14
weighted avg       1.00      0.86      0.92        14

              precision    recall  f1-score   support

           0       0.17      1.00      0.29         1
           1       1.00      0.64      0.78        14

    accuracy                           0.67        15
   macro avg       0.58      0.82      0.53        15
weighted avg       0.94      0.67      0.75        15

              precision    recall  f1-score   support

           0       0.33      0.50      0.40         2
           1       0.92      0.85      0.88        13

    accuracy                           0.80        15
   macro avg       0.62      0.67      0.64        15
weighted avg       0.84      0.80      0.82        15

              preci

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.25      0.33      0.29         3

    accuracy                           0.67        15
   macro avg       0.53      0.54      0.53        15
weighted avg       0.70      0.67      0.68        15

              precision    recall  f1-score   support

           0       0.87      1.00      0.93        13
           1       0.00      0.00      0.00         2

    accuracy                           0.87        15
   macro avg       0.43      0.50      0.46        15
weighted avg       0.75      0.87      0.80        15

              precision    recall  f1-score   support

           0       0.50      0.86      0.63         7
           1       0.67      0.25      0.36         8

    accuracy                           0.53        15
   macro avg       0.58      0.55      0.50        15
weighted avg       0.59      0.53      0.49        15

              preci

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.79      0.92      0.85        12

    accuracy                           0.73        15
   macro avg       0.39      0.46      0.42        15
weighted avg       0.63      0.73      0.68        15

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.92      0.92      0.92        13

    accuracy                           0.87        15
   macro avg       0.71      0.71      0.71        15
weighted avg       0.87      0.87      0.87        15

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.62      0.80      0.70        10

    accuracy                           0.53        15
   macro avg       0.31      0.40      0.35        15
weighted avg       0.41      0.53      0.46        15

              preci

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       0.33      0.33      0.33         3

    accuracy                           0.73        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.73      0.73      0.73        15

              precision    recall  f1-score   support

           0       0.87      1.00      0.93        13
           1       0.00      0.00      0.00         2

    accuracy                           0.87        15
   macro avg       0.43      0.50      0.46        15
weighted avg       0.75      0.87      0.80        15

              precision    recall  f1-score   support

           0       0.50      0.86      0.63         7
           1       0.67      0.25      0.36         8

    accuracy                           0.53        15
   macro avg       0.58      0.55      0.50        15
weighted avg       0.59      0.53      0.49        15

              preci

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00        13

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.79      0.92      0.85        12

    accuracy                           0.73        15
   macro avg       0.39      0.46      0.42        15
weighted avg       0.63      0.73      0.68        15

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.92      0.92      0.92        13

    accuracy                           0.87        15
   macro avg       0.71      0.71      0.71        15
weighted avg       0.87      0.87      0.87        15

              preci

              precision    recall  f1-score   support

           0       0.50      0.86      0.63         7
           1       0.67      0.25      0.36         8

    accuracy                           0.53        15
   macro avg       0.58      0.55      0.50        15
weighted avg       0.59      0.53      0.49        15

              precision    recall  f1-score   support

           0       0.92      0.79      0.85        14
           1       0.00      0.00      0.00         1

    accuracy                           0.73        15
   macro avg       0.46      0.39      0.42        15
weighted avg       0.86      0.73      0.79        15

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       0.33      0.33      0.33         3

    accuracy                           0.73        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.73      0.73      0.73        15

              preci

              precision    recall  f1-score   support

           0       0.77      0.91      0.83        11
           1       0.50      0.25      0.33         4

    accuracy                           0.73        15
   macro avg       0.63      0.58      0.58        15
weighted avg       0.70      0.73      0.70        15

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.50      1.00      0.67         2

    accuracy                           0.87        15
   macro avg       0.75      0.92      0.79        15
weighted avg       0.93      0.87      0.88        15

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.75      1.00      0.86         3

    accuracy                           0.93        15
   macro avg       0.88      0.96      0.91        15
weighted avg       0.95      0.93      0.94        15

              preci

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       0.33      0.33      0.33         3

    accuracy                           0.73        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.73      0.73      0.73        15

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.00      0.00      0.00         1

    accuracy                           0.80        15
   macro avg       0.46      0.43      0.44        15
weighted avg       0.86      0.80      0.83        15

              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       0.00      0.00      0.00         1

    accuracy                           0.93        15
   macro avg       0.47      0.50      0.48        15
weighted avg       0.87      0.93      0.90        15

              preci

              precision    recall  f1-score   support

           0       0.91      0.77      0.83        13
           1       0.25      0.50      0.33         2

    accuracy                           0.73        15
   macro avg       0.58      0.63      0.58        15
weighted avg       0.82      0.73      0.77        15

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.75      1.00      0.86         3

    accuracy                           0.93        15
   macro avg       0.88      0.96      0.91        15
weighted avg       0.95      0.93      0.94        15

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.25      0.33      0.29         3

    accuracy                           0.67        15
   macro avg       0.53      0.54      0.53        15
weighted avg       0.70      0.67      0.68        15

              preci

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.79      0.92      0.85        12

    accuracy                           0.73        15
   macro avg       0.39      0.46      0.42        15
weighted avg       0.63      0.73      0.68        15

              precision    recall  f1-score   support

           0       0.33      0.50      0.40         2
           1       0.92      0.85      0.88        13

    accuracy                           0.80        15
   macro avg       0.62      0.67      0.64        15
weighted avg       0.84      0.80      0.82        15

              precision    recall  f1-score   support

           0       0.50      0.40      0.44         5
           1       0.73      0.80      0.76        10

    accuracy                           0.67        15
   macro avg       0.61      0.60      0.60        15
weighted avg       0.65      0.67      0.66        15

              preci

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       0.33      0.33      0.33         3

    accuracy                           0.73        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.73      0.73      0.73        15

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.00      0.00      0.00         1

    accuracy                           0.80        15
   macro avg       0.46      0.43      0.44        15
weighted avg       0.86      0.80      0.83        15

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.00      0.00      0.00         1

    accuracy                           0.80        15
   macro avg       0.46      0.43      0.44        15
weighted avg       0.86      0.80      0.83        15

              preci

              precision    recall  f1-score   support

           0       0.90      0.69      0.78        13
           1       0.20      0.50      0.29         2

    accuracy                           0.67        15
   macro avg       0.55      0.60      0.53        15
weighted avg       0.81      0.67      0.72        15

              precision    recall  f1-score   support

           0       0.92      0.92      0.92        12
           1       0.67      0.67      0.67         3

    accuracy                           0.87        15
   macro avg       0.79      0.79      0.79        15
weighted avg       0.87      0.87      0.87        15

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       0.33      0.33      0.33         3

    accuracy                           0.73        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.73      0.73      0.73        15

              preci

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.67      1.00      0.80        10

    accuracy                           0.67        15
   macro avg       0.33      0.50      0.40        15
weighted avg       0.44      0.67      0.53        15

              precision    recall  f1-score   support

           0       0.91      0.83      0.87        12
           1       0.50      0.67      0.57         3

    accuracy                           0.80        15
   macro avg       0.70      0.75      0.72        15
weighted avg       0.83      0.80      0.81        15

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.00      0.00      0.00         1

    accuracy                           0.80        15
   macro avg       0.46      0.43      0.44        15
weighted avg       0.86      0.80      0.83        15

              preci

              precision    recall  f1-score   support

           0       1.00      0.93      0.97        15
           1       0.00      0.00      0.00         0

    accuracy                           0.93        15
   macro avg       0.50      0.47      0.48        15
weighted avg       1.00      0.93      0.97        15

              precision    recall  f1-score   support

           0       1.00      0.86      0.92        14
           1       0.00      0.00      0.00         0

    accuracy                           0.86        14
   macro avg       0.50      0.43      0.46        14
weighted avg       1.00      0.86      0.92        14

              precision    recall  f1-score   support

           0       0.17      1.00      0.29         1
           1       1.00      0.64      0.78        14

    accuracy                           0.67        15
   macro avg       0.58      0.82      0.53        15
weighted avg       0.94      0.67      0.75        15

              preci

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.33      0.50         3

    accuracy                           0.87        15
   macro avg       0.93      0.67      0.71        15
weighted avg       0.89      0.87      0.84        15

              precision    recall  f1-score   support

           0       1.00      0.86      0.92        14
           1       0.33      1.00      0.50         1

    accuracy                           0.87        15
   macro avg       0.67      0.93      0.71        15
weighted avg       0.96      0.87      0.89        15

              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       0.00      0.00      0.00         1

    accuracy                           0.93        15
   macro avg       0.47      0.50      0.48        15
weighted avg       0.87      0.93      0.90        15

              preci

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.25      0.33      0.29         3

    accuracy                           0.67        15
   macro avg       0.53      0.54      0.53        15
weighted avg       0.70      0.67      0.68        15

              precision    recall  f1-score   support

           0       0.86      0.92      0.89        13
           1       0.00      0.00      0.00         2

    accuracy                           0.80        15
   macro avg       0.43      0.46      0.44        15
weighted avg       0.74      0.80      0.77        15

              precision    recall  f1-score   support

           0       0.45      0.71      0.56         7
           1       0.50      0.25      0.33         8

    accuracy                           0.47        15
   macro avg       0.48      0.48      0.44        15
weighted avg       0.48      0.47      0.44        15

              preci

              precision    recall  f1-score   support

           0       0.87      1.00      0.93        13
           1       0.00      0.00      0.00         2

    accuracy                           0.87        15
   macro avg       0.43      0.50      0.46        15
weighted avg       0.75      0.87      0.80        15

              precision    recall  f1-score   support

           0       1.00      0.93      0.97        15
           1       0.00      0.00      0.00         0

    accuracy                           0.93        15
   macro avg       0.50      0.47      0.48        15
weighted avg       1.00      0.93      0.97        15

              precision    recall  f1-score   support

           0       1.00      0.86      0.92        14
           1       0.00      0.00      0.00         0

    accuracy                           0.86        14
   macro avg       0.50      0.43      0.46        14
weighted avg       1.00      0.86      0.92        14

              preci

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.62      0.80      0.70        10

    accuracy                           0.53        15
   macro avg       0.31      0.40      0.35        15
weighted avg       0.41      0.53      0.46        15

              precision    recall  f1-score   support

           0       0.90      0.75      0.82        12
           1       0.40      0.67      0.50         3

    accuracy                           0.73        15
   macro avg       0.65      0.71      0.66        15
weighted avg       0.80      0.73      0.75        15

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.00      0.00      0.00         1

    accuracy                           0.80        15
   macro avg       0.46      0.43      0.44        15
weighted avg       0.86      0.80      0.83        15

              preci

In [30]:
accuracies = []
times = []
mccs = []
for _ in range(30):
    #change criterion to 'entropy' for info gain calculation and 'gini' for gini index calculation
    acc, gtime, mcc_av = create_dt_gini(criter = "entropy")
    accuracies.append(acc)
    times.append(gtime)
    mccs.append(mcc_av)

conveted_accuracies = [float(x) for x in accuracies]
converted_times =  [float(x) for x in times]
converted_mccs = [float(x) for x in mccs]

avg_accuracy = average_list(conveted_accuracies)
avg_time = average_list(converted_times)
avg_mcc = average_list(converted_mccs)

print('*'*50)
print('Evaluating for gini')
print('Accuracy: {}'.format(avg_accuracy * 100))
print('Mcc: {}'.format(avg_mcc))
print('Average generation time : {} sec'.format(avg_time))
print('*'*50)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.90      0.64      0.75        14

    accuracy                           0.60        15
   macro avg       0.45      0.32      0.38        15
weighted avg       0.84      0.60      0.70        15

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.83      0.77      0.80        13

    accuracy                           0.67        15
   macro avg       0.42      0.38      0.40        15
weighted avg       0.72      0.67      0.69        15

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.79      0.92      0.85        12

    accuracy                           0.73        15
   macro avg       0.39      0.46      0.42        15
weighted avg       0.63      0.73      0.68        15

              preci

ValueError: Number of classes, 1, does not match size of target_names, 2. Try specifying the labels parameter