In [8]:
#                                         Classification Metrics

In [22]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [25]:
df = pd.read_csv('csv files/heart_disease_uci.csv')
df.dropna(inplace=True)
df = pd.get_dummies(df, drop_first=True)
df.sample(3)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 0:-1], df.iloc[:, -1], test_size=0.2, random_state=2)

clf1 = LogisticRegression(max_iter=5000, solver='saga')  # Increase max_iter and change solver
clf2 = DecisionTreeClassifier()


clf1.fit(X_train,y_train)
clf2.fit(X_train,y_train)

y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)

print("Accuracy of Logistic Regression",accuracy_score(y_test,y_pred1))
print("Accuracy of Decision Trees",accuracy_score(y_test,y_pred2))

Accuracy of Logistic Regression 0.7666666666666667
Accuracy of Decision Trees 0.9333333333333333




In [28]:
confusion_matrix(y_test,y_pred1)

array([[34,  2],
       [12, 12]], dtype=int64)

In [29]:
print("Logistic Regression Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))

Logistic Regression Confusion Matrix



Unnamed: 0,0,1
0,34,2
1,12,12


In [30]:
print("Decision Tree Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))

Decision Tree Confusion Matrix



Unnamed: 0,0,1
0,34,2
1,2,22


In [32]:
result = pd.DataFrame()
result['Actual Label'] = y_test
result['Logistic Regression Prediction'] = y_pred1
result['Decision Tree Prediction'] = y_pred2
result.sample(10)

Unnamed: 0,Actual Label,Logistic Regression Prediction,Decision Tree Prediction
176,True,False,False
66,False,False,False
10,False,False,True
286,False,True,True
100,False,False,False
254,False,False,False
127,True,True,True
92,True,False,True
11,False,False,False
143,True,True,True


In [34]:
from sklearn.metrics import recall_score,precision_score,f1_score

print("For Logistic regression Model")
print("-"*50)
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred1))
print("Recall - ",recall_score(y_test,y_pred1))
print("F1 score - ",f1_score(y_test,y_pred1))




print("For DT Model")
print("-"*50)
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred2))
print("Recall - ",recall_score(y_test,y_pred2))
print("F1 score - ",f1_score(y_test,y_pred2))

For Logistic regression Model
--------------------------------------------------
    0   1
0  34   2
1  12  12
--------------------------------------------------
Precision -  0.8571428571428571
Recall -  0.5
F1 score -  0.631578947368421
For DT Model
--------------------------------------------------
    0   1
0  34   2
1   2  22
--------------------------------------------------
Precision -  0.9166666666666666
Recall -  0.9166666666666666
F1 score -  0.9166666666666666


In [35]:
precision_score(y_test,y_pred1,average=None)

array([0.73913043, 0.85714286])

In [36]:
precision_score(y_test,y_pred2,average=None)

array([0.94444444, 0.91666667])

In [37]:
recall_score(y_test,y_pred2,average=None)

array([0.94444444, 0.91666667])