In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, ShuffleSplit

In [2]:
df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

In [3]:
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [4]:
df_na= df.isna().sum()
print(df_na)

age                         0
anaemia                     0
creatinine_phosphokinase    0
diabetes                    0
ejection_fraction           0
high_blood_pressure         0
platelets                   0
serum_creatinine            0
serum_sodium                0
sex                         0
smoking                     0
time                        0
DEATH_EVENT                 0
dtype: int64


In [5]:
df = pd.get_dummies(df, columns=['anaemia', 'diabetes', 'high_blood_pressure', 'sex', 'smoking'], drop_first=True)
df.head()

Unnamed: 0,age,creatinine_phosphokinase,ejection_fraction,platelets,serum_creatinine,serum_sodium,time,DEATH_EVENT,anaemia_1,diabetes_1,high_blood_pressure_1,sex_1,smoking_1
0,75.0,582,20,265000.0,1.9,130,4,1,False,False,True,True,False
1,55.0,7861,38,263358.03,1.1,136,6,1,False,False,False,True,False
2,65.0,146,20,162000.0,1.3,129,7,1,False,False,False,True,True
3,50.0,111,20,210000.0,1.9,137,7,1,True,False,False,True,False
4,65.0,160,20,327000.0,2.7,116,8,1,True,True,False,False,False


In [6]:
X = df.drop('DEATH_EVENT', axis=1)  
y = df['DEATH_EVENT']  

In [7]:
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

logistic_params = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  
    'solver': ['liblinear', 'saga']        
}

logistic_grid = GridSearchCV(LogisticRegression(max_iter=10000), logistic_params, cv=cv, scoring='accuracy')

logistic_grid.fit(X, y)

In [8]:
print("Best Parameters:", logistic_grid.best_params_)
print("Best Cross-Validation Score:", logistic_grid.best_score_)

Best Parameters: {'C': 0.1, 'solver': 'liblinear'}
Best Cross-Validation Score: 0.8533333333333333


In [9]:
import pickle

with open('logistic_model_Heart_failure.pkl', 'wb') as file:
    pickle.dump(logistic_grid, file)

print("Model saved to 'logistic_model_Heart_failure.pkl'")

Model saved to 'logistic_model_Heart_failure.pkl'
