In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import io
import requests
import pandas as pd
from pathlib import Path

In [2]:
### This is the local file path, but we decided to use an S3 bucket instead
# readpath = Path.cwd() / 'Data' / 'heart_failure_clinical_records_dataset.csv'
url="https://final-project-heart.s3.amazonaws.com/heart_failure_clinical_records_dataset.csv"
s=requests.get(url).content
df = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [3]:
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


### Formatting Dataset for Model with All Features

In [4]:
target = df["DEATH_EVENT"]
target_names = ["living", "deceased"]
data = df.drop("DEATH_EVENT", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=3)

### Formatting Dataset for Model with Patient Known Features

In [6]:
patient_known_data = df.drop(["time",
                              "creatinine_phosphokinase", 
                              "ejection_fraction", 
                              "platelets", 
                              "serum_creatinine", 
                              "serum_sodium", 
                              "DEATH_EVENT"], axis=1)
feature_names = patient_known_data.columns
patient_known_data.head()

Unnamed: 0,age,anaemia,diabetes,high_blood_pressure,sex,smoking
0,75.0,0,0,1,1,0
1,55.0,0,0,0,1,0
2,65.0,0,0,0,1,1
3,50.0,1,0,0,1,0
4,65.0,1,1,0,0,0


In [7]:
X_train_kd, X_test_kd, y_train_kd, y_test_kd = train_test_split(patient_known_data, target, test_size=0.20, random_state=4)

### Scale using StandardScaler
StandardScaler() is a sklearn.preprocessing class that allows the user to "Standardize features by removing the mean and scaling to unit variance."

In [8]:
std_scaler = StandardScaler()
std_scaler.fit(X_train)
X_train_std = std_scaler.transform(X_train)
X_test_std = std_scaler.transform(X_test)

std_scaler_kd = StandardScaler()
std_scaler_kd.fit(X_train_kd)
X_train_std_kd = std_scaler_kd.transform(X_train_kd)
X_test_std_kd = std_scaler_kd.transform(X_test_kd)

### Scale using MinMaxScaler
MinMaxScaler() is a sklearn.preprocessing class that allows the user to "Transform features by scaling each feature to a given range."

In [9]:
mm_scaler = StandardScaler()
mm_scaler.fit(X_train)
X_train_mm = mm_scaler.transform(X_train)
X_test_mm = mm_scaler.transform(X_test)

mm_scaler_kd = StandardScaler()
mm_scaler_kd.fit(X_train_kd)
X_train_mm_kd = mm_scaler_kd.transform(X_train_kd)
X_test_mm_kd = mm_scaler_kd.transform(X_test_kd)

## All Features Data 
#### Create, Fit, Predict, and Score MLP Classifier with StandardScaler

In [10]:
mlp_std = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', random_state=1, max_iter=2500)
mlp_std.fit(X_train_std, y_train)
predict_train_std = mlp_std.predict(X_train_std)
predict_test_std=mlp_std.predict(X_test_std)

In [11]:
predictions = mlp_std.predict(X_test_std)
report = classification_report(y_test, predictions, target_names=target_names, output_dict=True)
mlp_report = pd.DataFrame(report).T
mlp_report.to_html('./templates/MLP_Classifier_all_features_StdScaler_report.html')
print(confusion_matrix(y_train,predict_train_std))
mlp_report.head()

[[162   1]
 [  0  76]]


Unnamed: 0,precision,recall,f1-score,support
living,0.882353,0.75,0.810811,40.0
deceased,0.615385,0.8,0.695652,20.0
accuracy,0.766667,0.766667,0.766667,0.766667
macro avg,0.748869,0.775,0.753231,60.0
weighted avg,0.793363,0.766667,0.772425,60.0


#### Create, Fit, Predict, and Score MLP Classifier with MinMaxScaler data

In [12]:
mlp_mm = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', random_state=2, max_iter=2500)
mlp_mm.fit(X_train_mm, y_train)
predict_train_mm = mlp_mm.predict(X_train_mm)
predict_test_mm=mlp_mm.predict(X_test_mm)

In [13]:
predictions = mlp_mm.predict(X_test_mm)
report = classification_report(y_test, predictions, target_names=target_names, output_dict=True)
mlp_report = pd.DataFrame(report).T
mlp_report.to_html('./templates/MLP_Classifier_all_features_MinMaxScaler_report.html')
print(confusion_matrix(y_train,predict_train_mm))
mlp_report.head()

[[163   0]
 [  1  75]]


Unnamed: 0,precision,recall,f1-score,support
living,0.810811,0.75,0.779221,40.0
deceased,0.565217,0.65,0.604651,20.0
accuracy,0.716667,0.716667,0.716667,0.716667
macro avg,0.688014,0.7,0.691936,60.0
weighted avg,0.728946,0.716667,0.721031,60.0


## Patient Known Data
#### Create, Fit, Predict, and Score MLP Classifier with StandardScaler data

In [14]:
mlp_std_kd = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', random_state=3, max_iter=2500)
mlp_std_kd.fit(X_train_std_kd, y_train_kd)
predict_train_std_kd = mlp_std_kd.predict(X_train_std_kd)
predict_test_std_kd=mlp_std_kd.predict(X_test_std_kd)

In [15]:
predictions = mlp_std_kd.predict(X_test_std_kd)
report = classification_report(y_test_kd, predictions, target_names=target_names, output_dict=True)
mlp_report = pd.DataFrame(report).T
mlp_report.to_html('./templates/MLP_Classifier_patient_known_features_StdScaler_report.html')
print(confusion_matrix(y_train_kd,predict_train_std_kd))
mlp_report.head()

[[151  10]
 [ 49  29]]


Unnamed: 0,precision,recall,f1-score,support
living,0.698113,0.880952,0.778947,42.0
deceased,0.285714,0.111111,0.16,18.0
accuracy,0.65,0.65,0.65,0.65
macro avg,0.491914,0.496032,0.469474,60.0
weighted avg,0.574394,0.65,0.593263,60.0


### Create, Fit, Predict, and Score MLP Classifier with MinMaxScaler data

In [16]:
mlp_mm_kd = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', random_state=4, max_iter=2500)
mlp_mm_kd.fit(X_train_mm_kd, y_train_kd)
predict_train_mm_kd = mlp_mm_kd.predict(X_train_mm_kd)
predict_test_mm_kd=mlp_mm_kd.predict(X_test_mm_kd)

In [17]:
predictions = mlp_mm_kd.predict(X_test_mm_kd)
report = classification_report(y_test_kd, predictions, target_names=target_names, output_dict=True)
mlp_report = pd.DataFrame(report).T
mlp_report.to_html('./templates/MLP_Classifier_patient_known_features_MinMaxScaler_report.html')
print(confusion_matrix(y_train_kd,predict_train_mm_kd))
mlp_report.head()

[[147  14]
 [ 42  36]]


Unnamed: 0,precision,recall,f1-score,support
living,0.730769,0.904762,0.808511,42.0
deceased,0.5,0.222222,0.307692,18.0
accuracy,0.7,0.7,0.7,0.7
macro avg,0.615385,0.563492,0.558101,60.0
weighted avg,0.661538,0.7,0.658265,60.0


In [18]:
# X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=5)
# scaler = MinMaxScaler()
# scaler.fit(X_train)
# X_train = scaler.transform(X_train)
# X_test = scaler.transform(X_test)
# mlp = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', random_state=5, max_iter=1000)
# mlp.fit(X_train, y_train)
# mlp.score(X_test, y_test)

In [19]:
# predict_train = mlp.predict(X_train)
# predict_test=mlp.predict(X_test)
# print(confusion_matrix(y_train,predict_train))
# predictions = mlp.predict(X_test)
# report = classification_report(y_test, predictions, target_names=target_names, output_dict=True)
# mlp_report = pd.DataFrame(report).T
# mlp_report.to_html('./templates/MLP_Classifier_patient_known_features_report.html')
# mlp_report.head()