In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#using SVM and Random Forest
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
#lable encoding
from sklearn.preprocessing import LabelEncoder
#one hot encoding
from sklearn.preprocessing import OneHotEncoder


In [4]:
#load the data
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [None]:
#check for missing values
data.isnull().sum()

In [8]:
# check for duplicates
data.duplicated().sum()

0

In [11]:
#data encoding
onehot = OneHotEncoder()
label = LabelEncoder()
endcoded_columns = data.select_dtypes(include=['object']).columns
print(endcoded_columns)
#encode the columns
for col in endcoded_columns:
    data[col] = label.fit_transform(data[col])
data.head()    

Index(['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope'], dtype='object')


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,1,1,140,289,0,1,172,0,0.0,2,0
1,49,0,2,160,180,0,1,156,0,1.0,1,1
2,37,1,1,130,283,0,2,98,0,0.0,2,0
3,48,0,0,138,214,0,1,108,1,1.5,1,1
4,54,1,2,150,195,0,1,122,0,0.0,2,0


In [15]:
#split the data
X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']
print(X.shape)
print(y.shape)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
(x_train.head())

(918, 11)
(918,)


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
795,42,1,2,120,240,1,1,194,0,0.8,0
25,36,1,2,130,209,0,1,178,0,0.0,2
84,56,1,0,150,213,1,1,125,1,1.0,1
10,37,0,2,130,211,0,1,142,0,0.0,2
344,51,1,0,120,0,1,1,104,0,0.0,1


In [16]:
#feature scaling
scaler = StandardScaler()
features_to_scale = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR']
x_train[features_to_scale] = scaler.fit_transform(x_train[features_to_scale])
x_test[features_to_scale] = scaler.transform(x_test[features_to_scale])
x_train.head()


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
795,-1.245067,1,2,-0.708985,0.372803,1,1,2.284353,0,0.8,0
25,-1.886236,1,2,-0.166285,0.086146,0,1,1.652241,0,0.0,2
84,0.250993,1,0,0.919115,0.123134,1,1,-0.441628,1,1.0,1
10,-1.779375,0,2,-0.166285,0.10464,0,1,0.229991,0,0.0,2
344,-0.283314,1,0,-0.708985,-1.846478,1,1,-1.271274,0,0.0,1


In [17]:
#SVM
svm = SVC()
svm.fit(x_train, y_train)
y_pred = svm.predict(x_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))


Accuracy:  0.8586956521739131
Confusion Matrix: 
 [[66 11]
 [15 92]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.86      0.84        77
           1       0.89      0.86      0.88       107

    accuracy                           0.86       184
   macro avg       0.85      0.86      0.86       184
weighted avg       0.86      0.86      0.86       184



In [18]:
#Random Forest
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))


Accuracy:  0.9021739130434783
Confusion Matrix: 
 [[70  7]
 [11 96]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.86      0.91      0.89        77
           1       0.93      0.90      0.91       107

    accuracy                           0.90       184
   macro avg       0.90      0.90      0.90       184
weighted avg       0.90      0.90      0.90       184

