# Heart Disease Prediction using Machine Learning Classifiers

## Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve, roc_auc_score

## Load Data

In [None]:
df = pd.read_csv('/content/drive/MyDrive/datasets/heart_disease.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num,target_binary
0,63.000000,1,1,145.000000,233.000000,1,2,150.000000,0,2.300000,3,0.0,6.0,0,0
1,67.000000,1,4,160.000000,286.000000,0,2,108.000000,1,1.500000,2,3.0,3.0,2,1
2,67.000000,1,4,120.000000,229.000000,0,2,129.000000,1,2.600000,2,2.0,7.0,1,1
3,37.000000,1,3,130.000000,250.000000,0,0,187.000000,0,3.500000,3,0.0,3.0,0,0
4,41.000000,0,2,130.000000,204.000000,0,2,172.000000,0,1.400000,1,0.0,3.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,60.072205,1,3,115.422056,181.768595,0,0,128.155858,1,1.001941,2,1.0,7.0,2,1
1020,70.928404,1,2,166.727221,244.993631,1,0,108.481311,0,0.698337,2,1.0,7.0,2,1
1021,57.332875,1,2,105.075177,233.146324,0,2,140.342007,0,1.519138,1,0.0,7.0,2,1
1022,40.881554,0,4,125.068578,154.370678,1,2,123.756294,0,1.936684,2,1.0,7.0,2,1


In [None]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num,target_binary
0,63.0,1,1,145.0,233.0,1,2,150.0,0,2.3,3,0.0,6.0,0,0
1,67.0,1,4,160.0,286.0,0,2,108.0,1,1.5,2,3.0,3.0,2,1
2,67.0,1,4,120.0,229.0,0,2,129.0,1,2.6,2,2.0,7.0,1,1
3,37.0,1,3,130.0,250.0,0,0,187.0,0,3.5,3,0.0,3.0,0,0
4,41.0,0,2,130.0,204.0,0,2,172.0,0,1.4,1,0.0,3.0,0,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1024 entries, 0 to 1023
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   age            1024 non-null   float64
 1   sex            1024 non-null   int64  
 2   cp             1024 non-null   int64  
 3   trestbps       1024 non-null   float64
 4   chol           1024 non-null   float64
 5   fbs            1024 non-null   int64  
 6   restecg        1024 non-null   int64  
 7   thalach        1024 non-null   float64
 8   exang          1024 non-null   int64  
 9   oldpeak        1024 non-null   float64
 10  slope          1024 non-null   int64  
 11  ca             1024 non-null   float64
 12  thal           1024 non-null   float64
 13  num            1024 non-null   int64  
 14  target_binary  1024 non-null   int64  
dtypes: float64(7), int64(8)
memory usage: 120.1 KB


In [None]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num,target_binary
count,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0,1024.0
mean,54.532202,0.685547,3.149414,131.478621,246.965108,0.165039,1.001953,149.638107,0.345703,1.090142,1.619141,0.681641,4.701172,0.923828,0.458984
std,9.2734,0.464525,0.971812,17.50623,52.561237,0.371397,0.993624,23.48026,0.475829,1.072306,0.620176,0.947402,1.941107,1.070377,0.498558
min,18.0,0.0,1.0,84.869643,100.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0
25%,48.193472,0.0,3.0,120.0,211.0,0.0,0.0,133.555581,0.0,0.152062,1.0,0.0,3.0,0.0,0.0
50%,55.0,1.0,3.0,130.012111,244.996816,0.0,1.0,151.5247,0.0,0.854977,2.0,0.0,3.0,0.0,0.0
75%,61.196437,1.0,4.0,142.412148,280.232245,0.0,2.0,165.642437,1.0,1.630471,2.0,1.0,7.0,2.0,1.0
max,78.79503,1.0,4.0,200.0,564.0,1.0,2.0,227.196592,1.0,6.2,3.0,3.0,7.0,4.0,1.0


## Define Features and Target

In [None]:
X = df.drop('target_binary', axis=1)
y = df['target_binary']

## Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Scale Features

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Model Training

In [None]:
# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)

In [None]:
# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

In [None]:
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)