In [None]:
import math
import numpy as npy
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.tree import plot_tree
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv("heart.csv")

In [None]:
data_set = df.drop('target', axis='columns')
data_target = df['target']

var_columns = [c for c in data_set.columns]

In [None]:
x = data_set
y = data_target
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=9)

In [None]:
model_tree = DecisionTreeClassifier(criterion='entropy')
model_tree.fit(x_train, y_train)

In [None]:
plt.figure(figsize=(15,7.5))

plot_tree(model_tree, feature_names=var_columns, class_names=["not HD"," HD"], rounded=True, filled=True)

plt.show()

In [None]:
y_train_p = model_tree.predict(x_train)
y_test_p = model_tree.predict(x_test)

In [None]:
print(f'Accuracy of the training sample = {accuracy_score(y_train, y_train_p)*100}%')
print(f'Accuracy of the test sample = {round(accuracy_score(y_test, y_test_p), 2)*100}%')

In [None]:
print('Confusion Matrix of training data')
metrics.ConfusionMatrixDisplay.from_predictions(y_train, y_train_p, display_labels=["not HD"," HD"])

In [None]:
print('Confusion Matrix of test data')
metrics.ConfusionMatrixDisplay.from_predictions(y_test, y_test_p, display_labels=[ " not HD"," HD"])

In [None]:
path = model_tree.cost_complexity_pruning_path(x_train, y_train)
alphas = path['ccp_alphas']

alphas

In [None]:
acc_train = []
acc_test = []

for alpha in alphas:
    temp_tree = DecisionTreeClassifier(ccp_alpha=alpha)
    temp_tree.fit(x_train, y_train)

    y_train_p, y_test_p = temp_tree.predict(x_train), temp_tree.predict(x_test)

    acc_train.append(accuracy_score(y_train, y_train_p))
    acc_test.append(accuracy_score(y_test, y_test_p))

plt.figure(figsize=(14,7))
plt.scatter(alphas,acc_train)
plt.scatter(alphas,acc_test)
plt.plot(alphas,acc_train,label='train_accuracy',drawstyle="steps-post")
plt.plot(alphas,acc_test,label='test_accuracy',drawstyle="steps-post")
plt.xticks(ticks=npy.arange(0.00,0.2,0.005, ), rotation=45)
plt.title('Accuracy vs alpha')
plt.legend()
plt.xlabel('alphas') 
plt.ylabel('accuracy') 
plt.show()

In [None]:
tree = DecisionTreeClassifier(ccp_alpha=0.03,  criterion='entropy')
tree.fit(x_train,y_train)
y_train_p1 = tree.predict(x_train)
y_test_p1 = tree.predict(x_test)

In [None]:
plt.figure(figsize=(15,7.5))

plot_tree(tree, feature_names=var_columns, class_names=['NOT HD','HD'], rounded=True, filled=True)

plt.show()

In [None]:
print(f'Accuracy of the training sample = {round(accuracy_score(y_train, y_train_p1), 2)*100}%')
print(f'Accuracy of the test sample = {round(accuracy_score(y_test, y_test_p1), 2)*100}%')

In [None]:
print('Confusion Matrix of training data and test data')
metrics.ConfusionMatrixDisplay.from_predictions(y_train, y_train_p1, display_labels=[ " not HD"," HD"])
metrics.ConfusionMatrixDisplay.from_predictions(y_test, y_test_p1, display_labels=[ " not HD"," HD"])