### Import dependencies

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score

### Import and split dataset for training and testing

In [33]:
df = pd.read_csv("./Data/heart_2020_cleaned.csv")
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [34]:
df.columns

Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',
       'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',
       'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',
       'Asthma', 'KidneyDisease', 'SkinCancer'],
      dtype='object')

In [35]:
df['HeartDisease'] = df['HeartDisease'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Smoking'] = df['Smoking'].apply(lambda x: 1 if x == 'Yes' else 0)
df['AlcoholDrinking'] = df['AlcoholDrinking'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Stroke'] = df['Smoking'].apply(lambda x: 1 if x == 'Yes' else 0)
df['DiffWalking'] = df['DiffWalking'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Diabetic'] = df['Diabetic'].apply(lambda x: 1 if x == 'Yes' else 0)
df['PhysicalActivity'] = df['PhysicalActivity'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Asthma'] = df['Asthma'].apply(lambda x: 1 if x == 'Yes' else 0)
df['KidneyDisease'] = df['KidneyDisease'].apply(lambda x: 1 if x == 'Yes' else 0)
df['SkinCancer'] = df['SkinCancer'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Sex'] = df['Sex'].apply(lambda x: 1 if x == 'Female' else 0)

df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,16.6,1,0,0,3.0,30.0,0,1,55-59,White,1,1,Very good,5.0,1,0,1
1,0,20.34,0,0,0,0.0,0.0,0,1,80 or older,White,0,1,Very good,7.0,0,0,0
2,0,26.58,1,0,0,20.0,30.0,0,0,65-69,White,1,1,Fair,8.0,1,0,0
3,0,24.21,0,0,0,0.0,0.0,0,1,75-79,White,0,0,Good,6.0,0,0,1
4,0,23.71,0,0,0,28.0,0.0,1,1,40-44,White,0,1,Very good,8.0,0,0,0


In [36]:
df['AgeCategory'].unique()

array(['55-59', '80 or older', '65-69', '75-79', '40-44', '70-74',
       '60-64', '50-54', '45-49', '18-24', '35-39', '30-34', '25-29'],
      dtype=object)

In [37]:
df = pd.get_dummies(df, columns=['AgeCategory', 'Race', 'GenHealth'])
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,Diabetic,...,Race_Asian,Race_Black,Race_Hispanic,Race_Other,Race_White,GenHealth_Excellent,GenHealth_Fair,GenHealth_Good,GenHealth_Poor,GenHealth_Very good
0,0,16.6,1,0,0,3.0,30.0,0,1,1,...,0,0,0,0,1,0,0,0,0,1
1,0,20.34,0,0,0,0.0,0.0,0,1,0,...,0,0,0,0,1,0,0,0,0,1
2,0,26.58,1,0,0,20.0,30.0,0,0,1,...,0,0,0,0,1,0,1,0,0,0
3,0,24.21,0,0,0,0.0,0.0,0,1,0,...,0,0,0,0,1,0,0,1,0,0
4,0,23.71,0,0,0,28.0,0.0,1,1,0,...,0,0,0,0,1,0,0,0,0,1


In [40]:
df1 = df.drop(['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime'], axis=1)
df2 = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]
df2.head()

Unnamed: 0,BMI,PhysicalHealth,MentalHealth,SleepTime
0,16.6,3.0,30.0,5.0
1,20.34,0.0,0.0,7.0
2,26.58,20.0,30.0,8.0
3,24.21,0.0,0.0,6.0
4,23.71,28.0,0.0,8.0


In [43]:
from sklearn.preprocessing import MinMaxScaler

In [46]:
minmax = MinMaxScaler()
mms = minmax.fit_transform(df2)
df3 = pd.DataFrame(mms)
df3.columns = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']
df3.head()

Unnamed: 0,BMI,PhysicalHealth,MentalHealth,SleepTime
0,0.055294,0.1,1.0,0.173913
1,0.100447,0.0,0.0,0.26087
2,0.175782,0.666667,1.0,0.304348
3,0.147169,0.0,0.0,0.217391
4,0.141132,0.933333,0.0,0.304348


In [47]:
df4 = df1.join(df3)
df4.head()

Unnamed: 0,HeartDisease,Smoking,AlcoholDrinking,Stroke,DiffWalking,Sex,Diabetic,PhysicalActivity,Asthma,KidneyDisease,...,Race_White,GenHealth_Excellent,GenHealth_Fair,GenHealth_Good,GenHealth_Poor,GenHealth_Very good,BMI,PhysicalHealth,MentalHealth,SleepTime
0,0,1,0,0,0,1,1,1,1,0,...,1,0,0,0,0,1,0.055294,0.1,1.0,0.173913
1,0,0,0,0,0,1,0,1,0,0,...,1,0,0,0,0,1,0.100447,0.0,0.0,0.26087
2,0,1,0,0,0,0,1,1,1,0,...,1,0,1,0,0,0,0.175782,0.666667,1.0,0.304348
3,0,0,0,0,0,1,0,0,0,0,...,1,0,0,1,0,0,0.147169,0.0,0.0,0.217391
4,0,0,0,0,1,1,0,1,0,0,...,1,0,0,0,0,1,0.141132,0.933333,0.0,0.304348


In [48]:
X = df4.drop('HeartDisease', axis=1)
y = df4['HeartDisease']
X.head()

Unnamed: 0,Smoking,AlcoholDrinking,Stroke,DiffWalking,Sex,Diabetic,PhysicalActivity,Asthma,KidneyDisease,SkinCancer,...,Race_White,GenHealth_Excellent,GenHealth_Fair,GenHealth_Good,GenHealth_Poor,GenHealth_Very good,BMI,PhysicalHealth,MentalHealth,SleepTime
0,1,0,0,0,1,1,1,1,0,1,...,1,0,0,0,0,1,0.055294,0.1,1.0,0.173913
1,0,0,0,0,1,0,1,0,0,0,...,1,0,0,0,0,1,0.100447,0.0,0.0,0.26087
2,1,0,0,0,0,1,1,1,0,0,...,1,0,1,0,0,0,0.175782,0.666667,1.0,0.304348
3,0,0,0,0,1,0,0,0,0,1,...,1,0,0,1,0,0,0.147169,0.0,0.0,0.217391
4,0,0,0,1,1,0,1,0,0,0,...,1,0,0,0,0,1,0.141132,0.933333,0.0,0.304348


In [50]:
y.head(10)

0    0
1    0
2    0
3    0
4    0
5    1
6    0
7    0
8    0
9    0
Name: HeartDisease, dtype: int64

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.30)

In [52]:
X_train.head()

Unnamed: 0,Smoking,AlcoholDrinking,Stroke,DiffWalking,Sex,Diabetic,PhysicalActivity,Asthma,KidneyDisease,SkinCancer,...,Race_White,GenHealth_Excellent,GenHealth_Fair,GenHealth_Good,GenHealth_Poor,GenHealth_Very good,BMI,PhysicalHealth,MentalHealth,SleepTime
280475,0,0,0,0,1,0,1,0,0,0,...,1,1,0,0,0,0,0.113968,0.0,0.0,0.26087
253627,0,0,0,0,1,0,1,0,0,0,...,1,1,0,0,0,0,0.180249,0.0,0.0,0.304348
93746,0,0,0,0,1,0,1,0,0,0,...,1,0,0,0,0,1,0.208137,0.0,0.0,0.304348
4529,0,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,1,0.178196,0.0,0.0,0.304348
298218,1,0,0,0,1,0,1,0,0,0,...,0,0,0,1,0,0,0.156224,0.0,0.5,0.217391


In [53]:
y_train.head()

280475    0
253627    0
93746     0
4529      0
298218    0
Name: HeartDisease, dtype: int64

### Build and compile model

In [54]:
model = Sequential([
    layers.Dense(units=32, activation="relu", input_dim=len(X_train.columns)),
    layers.Dense(units=64, activation="relu"),
    layers.Dense(units=1, activation="sigmoid")
])

In [55]:
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics='accuracy')

### Fit and predict our model

In [56]:
model.fit(X_train, y_train, epochs=200, batch_size=32)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/20

Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x7fa1f3f4f8e0>

In [57]:
y_pred = model.predict(X_test)
y_pred = [0 if val < 0.5 else 1 for val in y_pred]

In [58]:
y_pred

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [59]:
accuracy_score(y_pred, y_test)

0.9146853729974255

### Save the model

In [60]:
# model.save(tfmodel)