<a href="https://colab.research.google.com/github/caltunay/dd_competition_heart_disease/blob/master/Deep_Learning_Keras_Heart_Disease_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, precision_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV

In [0]:
train_val = pd.read_csv('train_values.csv')
train_lab = pd.read_csv('train_labels.csv')

test_val = pd.read_csv('test_values.csv')

In [0]:
def slope_type(df):
    
    """Convert slope_of_peak_exercise_st_segment
    column to string/categorical values"""
    
    if df['slope_of_peak_exercise_st_segment'] == 1:
        return 'Upslope'
    
    elif df['slope_of_peak_exercise_st_segment'] == 2:
        return 'Flat'
    
    else: 
        return 'Downslope'

In [0]:
train_val['slope_type'] = train_val.apply(slope_type, axis = 1)
test_val['slope_type'] = test_val.apply(slope_type, axis = 1)




In [0]:
def chest_pain_type(df):
    
    """Convert chest pain
    type to string/categorical values"""
    
   
    if df['chest_pain_type'] == 1:
        return 'typical angina'

    elif df['chest_pain_type'] == 2:
        return 'atypical angina'
    
    elif df['chest_pain_type'] == 3:
        return 'non-anginal angina'
    
    else:
        return 'asymptomatic'

In [0]:
train_val['chest_pain'] = train_val.apply(chest_pain_type, axis = 1)

In [0]:
test_val['chest_pain'] = test_val.apply(chest_pain_type, axis = 1)

In [0]:
def vessel_type(df):

    """Convert major vessels to categorical"""
    
    if df['num_major_vessels'] == 0:
        return 'Color 0'
    elif df['num_major_vessels'] == 1:
        return 'Color 1'
    elif df['num_major_vessels'] == 2:
        return 'Color 2'
    else:
        return 'Color 3'

In [0]:
train_val['vessel_type'] = train_val.apply(vessel_type, axis = 1)
train_val['blood_sugar'] = train_val['fasting_blood_sugar_gt_120_mg_per_dl'].map({1:"Over 120mg", 0:"Under 120mg"})
train_val['ekg'] = train_val['resting_ekg_results'].map({0:'normal', 1: 'abnormality', 2: 'hypertrophy'})
train_val['chest_pain_with_exercise'] = train_val['exercise_induced_angina'].map({0:'None', 1:'Exist'}) 
test_val['vessel_type'] = test_val.apply(vessel_type, axis = 1)


In [0]:
test_val['blood_sugar'] = test_val['fasting_blood_sugar_gt_120_mg_per_dl'].map({1:"Over 120mg", 0:"Under 120mg"})
test_val['ekg'] = test_val['resting_ekg_results'].map({0:'normal', 1: 'abnormality', 2: 'hypertrophy'})
test_val['chest_pain_with_exercise'] = test_val['exercise_induced_angina'].map({0:'None', 1:'Exist'}) 


In [0]:
drop_cols = 'slope_of_peak_exercise_st_segment chest_pain_type num_major_vessels fasting_blood_sugar_gt_120_mg_per_dl resting_ekg_results exercise_induced_angina'.split()

train_clean = train_val.drop(labels = drop_cols, axis = 1)
test_clean = test_val.drop(labels = drop_cols, axis = 1)


In [0]:
train_clean['sex'] = train_val['sex'].map({0: 'female', 1: 'male'})
test_clean['sex'] = test_val['sex'].map({0: 'female', 1: 'male'})

In [0]:
obj_cols = ['thal', 'sex', 'slope_type', 'chest_pain', 'vessel_type', 'blood_sugar', 'ekg', 'chest_pain_with_exercise']

train_dummied = pd.get_dummies(data = train_clean, prefix_sep = '|', drop_first = True, columns = obj_cols)
test_dummied = pd.get_dummies(data = test_clean, prefix_sep = '|', drop_first = True, columns = obj_cols)

In [0]:
train_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,serum_cholesterol_mg_per_dl,oldpeak_eq_st_depression,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None
0,0z64un,128,308,0.0,45,170,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1
1,ryoo3j,110,214,1.6,54,158,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1
2,yt1s1x,125,304,0.0,77,162,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0
3,l2xjde,152,223,0.0,40,181,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1
4,oyt4ek,178,270,4.2,59,145,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1


In [0]:
test_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,serum_cholesterol_mg_per_dl,oldpeak_eq_st_depression,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None
0,olalu7,170,288,0.2,59,159,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1
1,z9n6mx,138,183,1.4,35,182,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1
2,5k4413,120,177,2.5,43,120,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0
3,mrg7q5,102,318,0.0,60,160,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1
4,uki4do,138,166,3.6,61,125,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0


In [0]:
ser, eq_depression_bin = pd.qcut(x = train_dummied['oldpeak_eq_st_depression'], q = 3, labels = ['low', 'mid', 'high'], retbins = True)

In [0]:
train_dummied = pd.merge(left = train_dummied, right = ser, left_index = True, right_index = True)

In [0]:
train_dummied.drop(labels = 'oldpeak_eq_st_depression_x', axis = 1, inplace = True)

In [0]:
train_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,serum_cholesterol_mg_per_dl,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None,oldpeak_eq_st_depression_y
0,0z64un,128,308,45,170,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,low
1,ryoo3j,110,214,54,158,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,high
2,yt1s1x,125,304,77,162,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,low
3,l2xjde,152,223,40,181,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,low
4,oyt4ek,178,270,59,145,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,high


In [0]:
serum_ser, serum_bin = pd.qcut(x = train_dummied['serum_cholesterol_mg_per_dl'], q = 4, labels = ['low', 'mid-low', 'mid-high', 'high'], retbins = True)
# train_clean = pd.merge(left = train_clean, right = serum_ser, left_index = True, right_index = True)
# train_clean.drop(labels = 'serum_cholesterol_mg_per_dl_x', axis = 1, inplace = True)
# train_clean.head()

In [0]:
train_dummied = pd.merge(left = train_dummied, right = serum_ser, left_index = True, right_index = True)
train_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,serum_cholesterol_mg_per_dl_x,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None,oldpeak_eq_st_depression_y,serum_cholesterol_mg_per_dl_y
0,0z64un,128,308,45,170,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,low,high
1,ryoo3j,110,214,54,158,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,high,mid-low
2,yt1s1x,125,304,77,162,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,low,high
3,l2xjde,152,223,40,181,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,low,mid-low
4,oyt4ek,178,270,59,145,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,high,mid-high


In [0]:
train_dummied.drop(labels = 'serum_cholesterol_mg_per_dl_x', axis = 1, inplace = True)
train_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None,oldpeak_eq_st_depression_y,serum_cholesterol_mg_per_dl_y
0,0z64un,128,45,170,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,low,high
1,ryoo3j,110,54,158,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,high,mid-low
2,yt1s1x,125,77,162,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,low,high
3,l2xjde,152,40,181,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,low,mid-low
4,oyt4ek,178,59,145,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,high,mid-high


In [0]:
test_dummied.head()

test_dep = pd.cut(test_dummied['oldpeak_eq_st_depression'], bins = eq_depression_bin, labels = ['low', 'mid', 'high'], include_lowest = True)

test_dummied = pd.merge(left = test_dummied, right = test_dep, left_index = True, right_index = True)
test_dummied.drop(labels = 'oldpeak_eq_st_depression_x', axis = 1, inplace = True)


In [0]:
test_temp2 = pd.cut(test_dummied['serum_cholesterol_mg_per_dl'], bins = serum_bin, labels = ['low', 'mid-low', 'mid-high', 'high'], include_lowest = True )

test_dummied = pd.merge(left = test_dummied, right = test_temp2, left_index = True, right_index = True)
test_dummied.drop(labels = 'serum_cholesterol_mg_per_dl_x', axis = 1, inplace = True)

test_dummied.head()

Unnamed: 0,patient_id,resting_blood_pressure,age,max_heart_rate_achieved,thal|normal,thal|reversible_defect,sex|male,slope_type|Flat,slope_type|Upslope,chest_pain|atypical angina,chest_pain|non-anginal angina,chest_pain|typical angina,vessel_type|Color 1,vessel_type|Color 2,vessel_type|Color 3,blood_sugar|Under 120mg,ekg|hypertrophy,ekg|normal,chest_pain_with_exercise|None,oldpeak_eq_st_depression_y,serum_cholesterol_mg_per_dl_y
0,olalu7,170,59,159,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,mid,high
1,z9n6mx,138,35,182,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,mid,low
2,5k4413,120,43,120,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,high,low
3,mrg7q5,102,60,160,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,low,high
4,uki4do,138,61,125,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,high,low


In [0]:
train_dummied = pd.get_dummies(data = train_dummied, columns = ['oldpeak_eq_st_depression_y', 'serum_cholesterol_mg_per_dl_y'], drop_first = True, prefix_sep= '|')
test_dummied = pd.get_dummies(data = test_dummied, columns = ['oldpeak_eq_st_depression_y', 'serum_cholesterol_mg_per_dl_y'], drop_first = True, prefix_sep= '|')

In [0]:
train_dummied.shape

(180, 24)

In [0]:
test_dummied.shape

(90, 24)

In [0]:
ssx = StandardScaler()

In [0]:
x = train_dummied.drop(labels = 'patient_id', axis = 1).values
y = train_lab['heart_disease_present'].values

In [0]:
scaled_x = ssx.fit_transform(x)

In [0]:
import keras 

Using TensorFlow backend.


In [0]:
from keras.models import Sequential
from keras.layers import Dense

In [0]:
clf = Sequential()

W0807 05:54:46.985857 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.



In [0]:
clf.add(Dense(input_dim = 23, output_dim = 12, activation = 'relu', init = 'uniform'))

  """Entry point for launching an IPython kernel.
W0807 05:57:07.333960 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0807 05:57:07.339692 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



In [0]:
clf.add(Dense(output_dim = 12, activation = 'relu', init = 'uniform'))

  """Entry point for launching an IPython kernel.


In [0]:
clf.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))

  """Entry point for launching an IPython kernel.


In [0]:
clf.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')

W0807 06:03:02.620445 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0807 06:03:02.655609 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0807 06:03:02.663616 140038394906496 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [0]:
clf.fit(scaled_x, y, epochs = 150, batch_size = 10)

W0807 06:04:06.307267 140038394906496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7f5cf4f5cf60>

In [0]:
from sklearn.metrics import classification_report

In [0]:
x_test = test_dummied.drop(labels = 'patient_id', axis = 1).values
x_test_scaled = ssx.transform(x_test)

In [0]:
yhat = clf.predict(x_test_scaled)

In [0]:
patients = test_dummied[['patient_id']]

predictions_df = pd.merge(left = patients, right = pd.DataFrame(yhat), left_index = True, right_index = True)
predictions_df

Unnamed: 0,patient_id,0
0,olalu7,9.881518e-01
1,z9n6mx,5.960464e-08
2,5k4413,9.999954e-01
3,mrg7q5,6.297082e-03
4,uki4do,9.999940e-01
5,kev1sk,2.175570e-06
6,9n6let,2.210733e-02
7,jxmtyg,9.996166e-01
8,51s2ff,3.072220e-02
9,wi9mcs,3.190875e-03


In [0]:
predictions_df.rename({0:'heart_disease_present'}, inplace = True)
predictions_df.set_index('patient_id',inplace = True)
predictions_df.head()

Unnamed: 0_level_0,0
patient_id,Unnamed: 1_level_1
olalu7,0.9881518
z9n6mx,5.960464e-08
5k4413,0.9999954
mrg7q5,0.006297082
uki4do,0.999994


In [0]:
predictions_df.rename({0:'heart_disease_present'}, inplace = True, axis = 1)
predictions_df.to_csv('ANN - Heart Disease.csv')