In [58]:
# Predicting Mortality by Heart Failure

## Import Libraries

In [59]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense

## Import Data

In [60]:
data = pd.read_csv('heart_failure_clinical_records_dataset.csv')
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


## Inspect Data

In [61]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB


In [62]:
# Distribution of DEATH_EVENT
Counter(data['DEATH_EVENT'])

Counter({0: 203, 1: 96})

## Extract Features

In [63]:
# y
y = data['DEATH_EVENT']

# x
x_features = []

for i in data.columns:
    if i != 'DEATH_EVENT':
        x_features.append(i)

x = data[x_features]

In [64]:
x = pd.get_dummies(x)

In [65]:
# Split x and y in training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(x, 
                                                    y, 
                                                    train_size=0.8,
                                                   test_size=0.2,
                                                   random_state=40)

In [66]:
# Standardise x variables


ct = ColumnTransformer([('numeric', StandardScaler(), ['age',
                                                     'creatinine_phosphokinase',
                                                     'ejection_fraction',
                                                     'platelets',
                                                     'serum_creatinine',
                                                     'serum_sodium',
                                                     'time'])])
X_train = ct.fit_transform(X_train)
X_test = ct.fit_transform(X_test)

In [67]:
# Prepare labels for classification
le = LabelEncoder()

Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.fit_transform(Y_test.astype(str))

Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

## Model

### Design the Model

In [68]:
model = Sequential()
model.add(InputLayer(input_shape=(X_train.shape[1])))
model.add(Dense(12, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

### Fit the Model

In [69]:
model.fit(X_train, Y_train, epochs=100, batch_size=16)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x284c872d0>

### Evaluate the Model

In [70]:
loss, acc = model.evaluate(X_test, Y_test, verbose=0)

print("Loss:", loss, " ", "Accuracy:", acc)

Loss: 0.3400396704673767   Accuracy: 0.7833333611488342


In [71]:
y_estimate = model.predict(X_test)
y_estimate = np.argmax(y_estimate, axis=1)
y_true = np.argmax(Y_test, axis=1)

classification_report(y_true, y_estimate)



'              precision    recall  f1-score   support\n\n           0       0.83      0.85      0.84        40\n           1       0.68      0.65      0.67        20\n\n    accuracy                           0.78        60\n   macro avg       0.76      0.75      0.75        60\nweighted avg       0.78      0.78      0.78        60\n'