# importation and setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, precision_score, accuracy_score, confusion_matrix, f1_score

# data preprocessing

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [3]:
df = df.dropna()

In [12]:
X = np.array(df.drop(columns=['target']))
y = np.array(df['target'])

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#  model buildiing

In [18]:
reg = 0.01
lr_model = LogisticRegression(C=1/reg, solver='liblinear')
lr_model.fit(X_train, y_train)
pred = lr_model.predict(X_test)

recall = recall_score(y_test, pred)
precision = precision_score(y_test, pred)
accuracy = accuracy_score(y_test, pred)
con_mat = confusion_matrix(y_test, pred)

print(f"Model: {lr_model}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
con_mat

Model: LogisticRegression(C=100.0, solver='liblinear')
Recall: 0.9345794392523364
Precision: 0.8264462809917356
Accuracy: 0.8634146341463415


array([[ 77,  21],
       [  7, 100]], dtype=int64)

In [20]:
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
pred = dt_model.predict(X_test)

recall = recall_score(y_test, pred)
precision = precision_score(y_test, pred)
accuracy = accuracy_score(y_test, pred)
con_mat = confusion_matrix(y_test, pred)

print(f"Model: {dt_model}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
con_mat

Model: DecisionTreeClassifier()
Recall: 1.0
Precision: 1.0
Accuracy: 1.0


array([[ 98,   0],
       [  0, 107]], dtype=int64)

In [22]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
pred = rf_model.predict(X_test)

recall = recall_score(y_test, pred)
precision = precision_score(y_test, pred)
accuracy = accuracy_score(y_test, pred)
con_mat = confusion_matrix(y_test, pred)

print(f"Model: {rf_model}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
con_mat

Model: RandomForestClassifier()
Recall: 1.0
Precision: 1.0
Accuracy: 1.0


array([[ 98,   0],
       [  0, 107]], dtype=int64)

In [24]:
gb_model = GradientBoostingClassifier()
gb_model.fit(X_train, y_train)
pred = gb_model.predict(X_test)

recall = recall_score(y_test, pred)
precision = precision_score(y_test, pred)
accuracy = accuracy_score(y_test, pred)
con_mat = confusion_matrix(y_test, pred)

print(f"Model: {gb_model}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
con_mat

Model: GradientBoostingClassifier()
Recall: 1.0
Precision: 0.981651376146789
Accuracy: 0.9902439024390244


array([[ 96,   2],
       [  0, 107]], dtype=int64)