# Imports

In [1]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

import tensorflow as tf

In [2]:
df = pd.read_csv("11-heart.csv")

# Splitting Data

In [3]:
X = df.drop('output',axis='columns')
y= df['output']

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [5]:
# Normalization and Standardization

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

scaler2 = StandardScaler()
X_train_scaled2 = scaler2.fit_transform(X_train)
X_test_scaled2 = scaler2.transform(X_test)

**Decision Tree Classifier**

In [6]:
DT = DecisionTreeClassifier()
DT.fit(X_train_scaled,y_train)

y_prediction = DT.predict(X_test_scaled)
tree_train_acc = round(accuracy_score(y_train,DT.predict(X_train_scaled))*100,2)
tree_test_acc = round(accuracy_score(y_test,y_prediction)*100,2)

print("With normalization:")
print('Decision Tree Train Accuracy = ' , tree_train_acc,'%')
print('Decision tree Test Accuracy = ' , tree_test_acc,'%')
print()
print(classification_report(y_test,DT.predict(X_test_scaled), digits= 4))

With normalization:
Decision Tree Train Accuracy =  100.0 %
Decision tree Test Accuracy =  81.97 %

              precision    recall  f1-score   support

           0     0.7500    0.9310    0.8308        29
           1     0.9200    0.7188    0.8070        32

    accuracy                         0.8197        61
   macro avg     0.8350    0.8249    0.8189        61
weighted avg     0.8392    0.8197    0.8183        61



In [7]:
DT2 = DecisionTreeClassifier()
DT2.fit(X_train_scaled2,y_train)

y_prediction2 = DT2.predict(X_test_scaled2)
tree_train_acc2 = round(accuracy_score(y_train,DT2.predict(X_train_scaled2))*100,2)
tree_test_acc2 = round(accuracy_score(y_test,y_prediction2)*100,2)

print("With standardization:")
print('Decision Tree Train Accuracy = ' , tree_train_acc2,'%')
print('Decision tree Test Accuracy = ' , tree_test_acc2,'%')
print()
print(classification_report(y_test,DT2.predict(X_test_scaled2), digits= 4))

With standardization:
Decision Tree Train Accuracy =  100.0 %
Decision tree Test Accuracy =  80.33 %

              precision    recall  f1-score   support

           0     0.7576    0.8621    0.8065        29
           1     0.8571    0.7500    0.8000        32

    accuracy                         0.8033        61
   macro avg     0.8074    0.8060    0.8032        61
weighted avg     0.8098    0.8033    0.8031        61



**K-Nearest Neighbors Classifier**

In [8]:
KNN = KNeighborsClassifier()
KNN.fit(X_train_scaled,y_train)

y_prediction = KNN.predict(X_test_scaled)
tree_train_acc = round(accuracy_score(y_train,KNN.predict(X_train_scaled))*100,2)
tree_test_acc = round(accuracy_score(y_test,y_prediction)*100,2)

print("With normalization:")
print('KNN Train Accuracy = ' , tree_train_acc,'%')
print('KNN Test Accuracy = ' , tree_test_acc,'%')
print()
print(classification_report(y_test,KNN.predict(X_test_scaled), digits= 4))


With normalization:
KNN Train Accuracy =  84.71 %
KNN Test Accuracy =  83.61 %

              precision    recall  f1-score   support

           0     0.8065    0.8621    0.8333        29
           1     0.8667    0.8125    0.8387        32

    accuracy                         0.8361        61
   macro avg     0.8366    0.8373    0.8360        61
weighted avg     0.8380    0.8361    0.8362        61



In [9]:
KNN2 = KNeighborsClassifier()
KNN2.fit(X_train_scaled2,y_train)

y_prediction2 = KNN2.predict(X_test_scaled2)
tree_train_acc2 = round(accuracy_score(y_train,KNN2.predict(X_train_scaled2))*100,2)
tree_test_acc2 = round(accuracy_score(y_test,y_prediction2)*100,2)

print("With standardization:")
print('KNN Train Accuracy = ' , tree_train_acc2,'%')
print('KNN Test Accuracy = ' , tree_test_acc2,'%')
print()
print(classification_report(y_test,KNN2.predict(X_test_scaled2), digits= 4))

With standardization:
KNN Train Accuracy =  85.54 %
KNN Test Accuracy =  90.16 %

              precision    recall  f1-score   support

           0     0.8710    0.9310    0.9000        29
           1     0.9333    0.8750    0.9032        32

    accuracy                         0.9016        61
   macro avg     0.9022    0.9030    0.9016        61
weighted avg     0.9037    0.9016    0.9017        61



**Random Forest**

In [10]:
RF = RandomForestClassifier()
RF.fit(X_train_scaled,y_train)

y_prediction = RF.predict(X_test_scaled)
tree_train_acc = round(accuracy_score(y_train,RF.predict(X_train_scaled))*100,2)
tree_test_acc = round(accuracy_score(y_test,y_prediction)*100,2)

print("With normalization:")
print('Random Forest Train Accuracy = ' , tree_train_acc,'%')
print('Random Forest Test Accuracy = ' , tree_test_acc,'%')
print()
print(classification_report(y_test,RF.predict(X_test_scaled), digits= 4))

With normalization:
Random Forest Train Accuracy =  100.0 %
Random Forest Test Accuracy =  86.89 %

              precision    recall  f1-score   support

           0     0.8889    0.8276    0.8571        29
           1     0.8529    0.9062    0.8788        32

    accuracy                         0.8689        61
   macro avg     0.8709    0.8669    0.8680        61
weighted avg     0.8700    0.8689    0.8685        61



In [11]:
RF2 = RandomForestClassifier()
RF2.fit(X_train_scaled2,y_train)

y_prediction2 = RF2.predict(X_test_scaled2)
tree_train_acc2 = round(accuracy_score(y_train,RF2.predict(X_train_scaled2))*100,2)
tree_test_acc2 = round(accuracy_score(y_test,y_prediction2)*100,2)

print("With standardization:")
print('Random Forest Train Accuracy = ' , tree_train_acc2,'%')
print('Random Forest Test Accuracy = ' , tree_test_acc2,'%')
print()
print(classification_report(y_test,RF2.predict(X_test_scaled2), digits= 4))

With standardization:
Random Forest Train Accuracy =  100.0 %
Random Forest Test Accuracy =  81.97 %

              precision    recall  f1-score   support

           0     0.8000    0.8276    0.8136        29
           1     0.8387    0.8125    0.8254        32

    accuracy                         0.8197        61
   macro avg     0.8194    0.8200    0.8195        61
weighted avg     0.8203    0.8197    0.8198        61



**Logistic Regression**

In [12]:
LR = LogisticRegression()
LR.fit(X_train_scaled,y_train)

y_prediction = LR.predict(X_test_scaled)
tree_train_acc = round(accuracy_score(y_train,LR.predict(X_train_scaled))*100,2)
tree_test_acc = round(accuracy_score(y_test,y_prediction)*100,2)

print("With normalization:")
print('Logistic Regression Train Accuracy = ' , tree_train_acc,'%')
print('Logistic Regression Test Accuracy = ' , tree_test_acc,'%')
print()
print(classification_report(y_test,LR.predict(X_test_scaled), digits= 4))

With normalization:
Logistic Regression Train Accuracy =  84.71 %
Logistic Regression Test Accuracy =  85.25 %

              precision    recall  f1-score   support

           0     0.8333    0.8621    0.8475        29
           1     0.8710    0.8438    0.8571        32

    accuracy                         0.8525        61
   macro avg     0.8522    0.8529    0.8523        61
weighted avg     0.8531    0.8525    0.8525        61



In [13]:
LR2 = LogisticRegression()
LR2.fit(X_train_scaled2,y_train)

y_prediction2 = LR2.predict(X_test_scaled2)
tree_train_acc2 = round(accuracy_score(y_train,LR2.predict(X_train_scaled2))*100,2)
tree_test_acc2 = round(accuracy_score(y_test,y_prediction2)*100,2)

print("With standardization:")
print('Logistic Regression Train Accuracy = ' , tree_train_acc2,'%')
print('Logistic Regression Test Accuracy = ' , tree_test_acc2,'%')
print()
print(classification_report(y_test,LR2.predict(X_test_scaled2), digits= 4))

With standardization:
Logistic Regression Train Accuracy =  86.36 %
Logistic Regression Test Accuracy =  85.25 %

              precision    recall  f1-score   support

           0     0.8333    0.8621    0.8475        29
           1     0.8710    0.8438    0.8571        32

    accuracy                         0.8525        61
   macro avg     0.8522    0.8529    0.8523        61
weighted avg     0.8531    0.8525    0.8525        61



**Neural Network**

In [14]:
ann = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=8, activation='relu'),   # Input layer
    tf.keras.layers.Dense(units=3, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(units=3, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(units=1, activation='sigmoid') # Output layer
])
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann.fit(X_train_scaled, y_train, batch_size = 16, epochs = 32)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.src.callbacks.History at 0x25278739450>

In [15]:
y_pre = ann.predict(X_test_scaled)
y_pred = (y_pre > 0.5)
cm = confusion_matrix(y_test, y_pred)

acc = round(accuracy_score(y_test,y_pred)*100,2)
print('With normalization:')
print('Neural Network Test Accuracy = ' , acc,'%')
print()
print('Confusion matrix:')
print(cm)
print(classification_report(y_test, y_pred, digits= 4))

With normalization:
Neural Network Test Accuracy =  83.61 %

Confusion matrix:
[[27  2]
 [ 8 24]]
              precision    recall  f1-score   support

           0     0.7714    0.9310    0.8438        29
           1     0.9231    0.7500    0.8276        32

    accuracy                         0.8361        61
   macro avg     0.8473    0.8405    0.8357        61
weighted avg     0.8510    0.8361    0.8353        61



In [18]:
ann2 = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=8, activation='relu'),   # Input layer
    tf.keras.layers.Dense(units=3, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(units=3, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(units=1, activation='sigmoid') # Output layer
])
ann2.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann2.fit(X_train_scaled2, y_train, batch_size = 16, epochs = 48)

Epoch 1/48
Epoch 2/48
Epoch 3/48
Epoch 4/48
Epoch 5/48
Epoch 6/48
Epoch 7/48
Epoch 8/48
Epoch 9/48
Epoch 10/48
Epoch 11/48
Epoch 12/48
Epoch 13/48
Epoch 14/48
Epoch 15/48
Epoch 16/48
Epoch 17/48
Epoch 18/48
Epoch 19/48
Epoch 20/48
Epoch 21/48
Epoch 22/48
Epoch 23/48
Epoch 24/48
Epoch 25/48
Epoch 26/48
Epoch 27/48
Epoch 28/48
Epoch 29/48
Epoch 30/48
Epoch 31/48
Epoch 32/48
Epoch 33/48
Epoch 34/48
Epoch 35/48
Epoch 36/48
Epoch 37/48
Epoch 38/48
Epoch 39/48
Epoch 40/48
Epoch 41/48
Epoch 42/48
Epoch 43/48
Epoch 44/48
Epoch 45/48
Epoch 46/48
Epoch 47/48
Epoch 48/48


<keras.src.callbacks.History at 0x2527fe9ca50>

In [19]:
y_pre = ann2.predict(X_test_scaled2)
y_pred = (y_pre > 0.5)
cm = confusion_matrix(y_test, y_pred)

acc2 = round(accuracy_score(y_test,y_pred)*100,2)
print('With standardization:')
print('Neural Network Test Accuracy = ' , acc2,'%')
print()
print('Confusion matrix:')
print(cm)
print(classification_report(y_test, y_pred, digits= 4))

With standardization:
Neural Network Test Accuracy =  88.52 %

Confusion matrix:
[[26  3]
 [ 4 28]]
              precision    recall  f1-score   support

           0     0.8667    0.8966    0.8814        29
           1     0.9032    0.8750    0.8889        32

    accuracy                         0.8852        61
   macro avg     0.8849    0.8858    0.8851        61
weighted avg     0.8858    0.8852    0.8853        61

