In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [4]:
data['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [5]:
data.shape

(303, 14)

In [6]:
X = data.drop(columns='target', axis = 1) 
y = data['target']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [8]:
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression()

In [9]:
train_predict = model.predict(X_train)
train_score = round(accuracy_score(y_train, train_predict)*100,2)
print(f'Train accuracy score: {train_score}%')

Train accuracy score: 83.06%


In [10]:
test_predict = model.predict(X_test)
test_score = round(accuracy_score(y_test, test_predict)*100,2)
print(f'Test accuracy score: {test_score}%')

Train accuracy score: 90.16%


In [11]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [13]:
train_precision = round(precision_score(y_train, train_predict)*100,2)
test_precision = round(precision_score(y_test, test_predict)*100,2)

print(f'Train precision score: {train_precision}%')
print(f'Test precision score: {test_precision}%')

Train precision score: 81.05%
Test precision score: 82.86%


In [14]:
train_recall = round(recall_score(y_train, train_predict)*100,2)
test_recall = round(recall_score(y_test, test_predict)*100,2)

print(f'Train recall score: {train_recall}%')
print(f'Test recall score: {test_recall}%')

Train recall score: 91.18%
Test recall score: 100.0%


In [15]:
train_f1 = round(f1_score(y_train, train_predict)*100,2)
test_f1 = round(f1_score(y_test, test_predict)*100,2)

print(f'Train f1 score: {train_f1}%')
print(f'Test f1 score: {test_f1}%')

Train f1 score: 85.81%
Test f1 score: 90.62%


In [19]:
def precision_recall_f1_score(true_labels, pred_labels, test = True):
    if test == True:
        condition = 'Test'
    else:
        condition = 'Train'
    precision_value = round(precision_score(true_labels, pred_labels)*100,2)
    recall_value = round(recall_score(true_labels, pred_labels)*100,2)
    f1_value = round(f1_score(true_labels, pred_labels)*100,2)

    print(f'{condition} summary:')
    print(10*'=')

    print(f'Precision score: {precision_value}%')
    print(f'Recall score: {recall_value}%')
    print(f'F1 score: {f1_value}%')

In [20]:
precision_recall_f1_score(y_train, train_predict, False)

Train summary:
Precision score: 81.05%
Recall score: 91.18%
F1 score: 85.81%


In [21]:
precision_recall_f1_score(y_test, test_predict)

Test summary:
Precision score: 82.86%
Recall score: 100.0%
F1 score: 90.62%
