### Simple Neural network based on demographic data for cardiovascular death prediction

In [32]:
import tensorflow as tf
import keras 
import pandas as pd

#### Load data and clean data from demographics file

In [60]:
demographics = pd.read_csv('Demographics.csv')
demographics.head()

Unnamed: 0.1,Unnamed: 0,SUBJECT_ID,GENDER,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,HEART_ATTACK_FLAG,ATHERO_DIAGNOSIS_FLAG,HEART_DEATH_FLAG,CAUSE
0,0,31,M,2036-05-17,2108-08-30,2108-08-22 23:27:00,72.312329,WHITE,MARRIED,UKNOWN,...,Medicare,TRANSFER FROM HOSP/EXTRAM,1,0,1,0,0,0,0,STATUS EPILEPTICUS
1,1,56,F,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,UKNOWN,UKNOWN,...,Medicare,EMERGENCY ROOM ADMIT,1,0,1,1,0,0,0,HEAD BLEED
2,2,61,M,2063-10-21,2119-02-03,2119-01-04 18:12:00,55.241096,WHITE,MARRIED,UKNOWN,...,Private,CLINIC REFERRAL/PREMATURE,2,0,1,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA
3,3,67,M,2084-06-05,2157-12-02,2157-12-02 00:45:00,73.539726,WHITE,SINGLE,UKNOWN,...,Medicare,EMERGENCY ROOM ADMIT,2,0,1,0,0,0,0,SUBARACHNOID HEMORRHAGE
4,4,84,F,2151-10-21,2196-04-17,2196-04-14 04:02:00,44.512329,WHITE,MARRIED,UKNOWN,...,Private,EMERGENCY ROOM ADMIT,2,0,1,0,0,0,0,"GLIOBLASTOMA,NAUSEA"


In [61]:
# First, define atherosclerosis diagnoses from non-atherosclerosis diagnoses
athero_pre = demographics[demographics['OLD_FLAG']==0]
athero_pos = athero_pre[athero_pre['ATHERO_DIAGNOSIS_FLAG']== 1]
athero_neg = athero_pre[athero_pre['ATHERO_DIAGNOSIS_FLAG']==0]

# Clean data sets
del athero_neg['CAUSE']
del athero_pos['CAUSE']

del athero_neg['ATHERO_DIAGNOSIS_FLAG']
del athero_pos['ATHERO_DIAGNOSIS_FLAG']

del athero_neg['OLD_FLAG']
del athero_pos['OLD_FLAG']

del athero_neg['OUTSIDE_DEATH_FLAG']
del athero_pos['OUTSIDE_DEATH_FLAG']

del athero_neg['SUBJECT_ID']
del athero_pos['SUBJECT_ID']

del athero_neg['DOB']
del athero_pos['DOB']

del athero_neg['DOD']
del athero_pos['DOD']

In [62]:
athero_pos['DOA']
del athero_pos['DOA']
del athero_neg['DOA']

athero_neg['HEART_ATTACK_FLAG']
del athero_neg['HEART_ATTACK_FLAG']
del athero_pos['HEART_ATTACK_FLAG']

del athero_pos['Unnamed: 0']
athero_pos.head()

Unnamed: 0,GENDER,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,DEATH_FLAG,HEART_DEATH_FLAG
9,M,69.641096,WHITE,MARRIED,UKNOWN,CATHOLIC,Private,TRANSFER FROM HOSP/EXTRAM,4,1,0
12,F,69.005479,WHITE,MARRIED,ENGL,PROTESTANT QUAKER,Medicare,EMERGENCY ROOM ADMIT,2,1,0
17,M,87.882192,WHITE,MARRIED,UKNOWN,JEWISH,Medicare,EMERGENCY ROOM ADMIT,2,1,0
19,F,76.871233,WHITE,MARRIED,PORT,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,4,1,1
22,F,85.726027,BLACK/AFRICAN AMERICAN,WIDOWED,UKNOWN,CATHOLIC,Medicare,EMERGENCY ROOM ADMIT,2,1,0


In [63]:
# Create Outcome data sets
athero_heartdeath = pd.Series(athero_pos['HEART_DEATH_FLAG'])
athero_death = pd.Series(athero_pos['DEATH_FLAG'])

In [64]:
del athero_pos['HEART_DEATH_FLAG']
del athero_pos['DEATH_FLAG']

In [65]:
# Get dummies
athero_pos = pd.get_dummies(athero_pos, columns=['GENDER','ETHNICITY','MARITAL_STATUS', 'LANGUAGE', 'RELIGION', 'INSURANCE', 'ADMISSION_LOCATION'])

In [66]:
# Check outcome numbers
print(athero_heartdeath.value_counts())
print(athero_death.value_counts())

0    10001
1      202
Name: HEART_DEATH_FLAG, dtype: int64
0    6576
1    3627
Name: DEATH_FLAG, dtype: int64


In [67]:
# Normalize data
from sklearn import preprocessing
athero_pos = preprocessing.scale(athero_pos)

In [68]:
# Test/ train 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(athero_pos, athero_death, test_size=0.20, random_state=42)

#### Create simple neural network as a baseline model

In [69]:
from keras.models import Sequential
from keras.layers import Dense
import numpy
# fix random seed for reproducibility
numpy.random.seed(0)

In [70]:
# Create model
model = Sequential()
model.add(Dense(80, input_dim=121 , activation = 'relu'))
model.add(Dense(60, activation = 'relu'))
model.add(Dense(40, activation = 'relu'))
model.add(Dense(20, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

In [71]:
# Compile model
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [72]:
# checkpoint
from keras.callbacks import ModelCheckpoint
filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [73]:
model.fit(X_train, y_train, batch_size=20, nb_epoch=150, verbose=1, callbacks=callbacks_list, validation_data=(X_test, y_test), shuffle=True)



Train on 8162 samples, validate on 2041 samples
Epoch 1/150

Epoch 00001: val_acc improved from -inf to 0.73640, saving model to weights.best.hdf5
Epoch 2/150

Epoch 00002: val_acc improved from 0.73640 to 0.73983, saving model to weights.best.hdf5
Epoch 3/150

Epoch 00003: val_acc did not improve
Epoch 4/150

Epoch 00004: val_acc improved from 0.73983 to 0.74473, saving model to weights.best.hdf5
Epoch 5/150

Epoch 00005: val_acc did not improve
Epoch 6/150

Epoch 00006: val_acc did not improve
Epoch 7/150

Epoch 00007: val_acc did not improve
Epoch 8/150

Epoch 00008: val_acc did not improve
Epoch 9/150

Epoch 00009: val_acc did not improve
Epoch 10/150

Epoch 00010: val_acc did not improve
Epoch 11/150

Epoch 00011: val_acc did not improve
Epoch 12/150

Epoch 00012: val_acc did not improve
Epoch 13/150

Epoch 00013: val_acc did not improve
Epoch 14/150

Epoch 00014: val_acc did not improve
Epoch 15/150

Epoch 00015: val_acc did not improve
Epoch 16/150

Epoch 00016: val_acc did not 


Epoch 00046: val_acc did not improve
Epoch 47/150

Epoch 00047: val_acc did not improve
Epoch 48/150

Epoch 00048: val_acc did not improve
Epoch 49/150

Epoch 00049: val_acc did not improve
Epoch 50/150

Epoch 00050: val_acc did not improve
Epoch 51/150

Epoch 00051: val_acc did not improve
Epoch 52/150

Epoch 00052: val_acc did not improve
Epoch 53/150

Epoch 00053: val_acc did not improve
Epoch 54/150

Epoch 00054: val_acc did not improve
Epoch 55/150

Epoch 00055: val_acc did not improve
Epoch 56/150

Epoch 00056: val_acc did not improve
Epoch 57/150

Epoch 00057: val_acc did not improve
Epoch 58/150

Epoch 00058: val_acc did not improve
Epoch 59/150

Epoch 00059: val_acc did not improve
Epoch 60/150

Epoch 00060: val_acc did not improve
Epoch 61/150

Epoch 00061: val_acc did not improve
Epoch 62/150

Epoch 00062: val_acc did not improve
Epoch 63/150

Epoch 00063: val_acc did not improve
Epoch 64/150

Epoch 00064: val_acc did not improve
Epoch 65/150

Epoch 00065: val_acc did not i


Epoch 00093: val_acc did not improve
Epoch 94/150

Epoch 00094: val_acc did not improve
Epoch 95/150

Epoch 00095: val_acc did not improve
Epoch 96/150

Epoch 00096: val_acc did not improve
Epoch 97/150

Epoch 00097: val_acc did not improve
Epoch 98/150

Epoch 00098: val_acc did not improve
Epoch 99/150

Epoch 00099: val_acc did not improve
Epoch 100/150

Epoch 00100: val_acc did not improve
Epoch 101/150

Epoch 00101: val_acc did not improve
Epoch 102/150

Epoch 00102: val_acc did not improve
Epoch 103/150

Epoch 00103: val_acc did not improve
Epoch 104/150

Epoch 00104: val_acc did not improve
Epoch 105/150

Epoch 00105: val_acc did not improve
Epoch 106/150

Epoch 00106: val_acc did not improve
Epoch 107/150

Epoch 00107: val_acc did not improve
Epoch 108/150

Epoch 00108: val_acc did not improve
Epoch 109/150

Epoch 00109: val_acc did not improve
Epoch 110/150

Epoch 00110: val_acc did not improve
Epoch 111/150

Epoch 00111: val_acc did not improve
Epoch 112/150

Epoch 00112: val_


Epoch 00140: val_acc did not improve
Epoch 141/150

Epoch 00141: val_acc did not improve
Epoch 142/150

Epoch 00142: val_acc did not improve
Epoch 143/150

Epoch 00143: val_acc did not improve
Epoch 144/150

Epoch 00144: val_acc did not improve
Epoch 145/150

Epoch 00145: val_acc did not improve
Epoch 146/150

Epoch 00146: val_acc did not improve
Epoch 147/150

Epoch 00147: val_acc did not improve
Epoch 148/150

Epoch 00148: val_acc did not improve
Epoch 149/150

Epoch 00149: val_acc did not improve
Epoch 150/150

Epoch 00150: val_acc did not improve


<keras.callbacks.History at 0x1205695f8>

In [74]:
# Load model 
model.load_weights("weights.best.hdf5")

# estimate accuracy on test data set using loaded weights
scores = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 74.47%


#### So with a simple neural network focused on demographic data only we have an accuracy of 74%, a 5% improvement over XGBoost