# Heart Failure

Codacademy Exercise: Deep Learning Classification

Predict the survival of patients with heart failure.

In [None]:
import pandas as pd
from collections import Counter

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
# Load the data into pandas
data = pd.read_csv('heart_failure.csv')

# print columns and their respective types
print('Data Columns and Types')
print(data.info())

# print the class distribution
print('\nClass Distribution')
print(Counter(data['death_event']))

# print the first five entries in the dataset and the summary stats
print('\nDataset')
print(data.head(5))
print('\nSummary Stats')
print(data.describe())

In [None]:
# extract the label column
y = data['death_event']

# extract the features
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure',
          'platelets','serum_creatinine','serum_sodium','sex','smoking','time']]

# apply one-hot-encoding to the categorical columns
x = pd.get_dummies(x)

# split the data into a training set and a test set
# common sizes for a test set range between 0.20 and 0.35
TEST_SIZE = 0.3
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=TEST_SIZE, random_state=42)

# standardize the numerical features
numerical_features = x.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns
ct = ColumnTransformer([('numeric', StandardScaler(), numerical_columns)], remainder='passthrough')
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)

# encode the labels into integers
le = LabelEncoder()

# convert the integer encoded labels into binary integers
Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.transform(Y_test.astype(str))

# convert the integer encoded labels into binary vectors
Y_train = tf.keras.utils.to_categorical(Y_train, dtype = 'int64')
Y_test = tf.keras.utils.to_categorical(Y_test, dtype = 'int64')

In [None]:
# build the model
num_features = X_train.shape[1]
num_classes = 2
model = Sequential()
model.add(InputLayer(input_shape=(num_features)))
model.add(Dense(12, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# train the model
model.fit(X_train, Y_train, epochs=100, batch_size=16, verbose=1)

In [None]:
# evaluate the trained model with the test set
loss, acc = model.evaluate(X_test, Y_test, verbose=0)
print("Loss", loss, "Accuracy:", acc)

In [None]:
import numpy as np
from sklearn.metrics import classification_report

# get additional statistics
y_estimate = model.predict(X_test)
y_estimate = np.argmax(y_estimate, axis = 1)
y_true = np.argmax(Y_test, axis = 1)
print(classification_report(y_true, y_estimate))