In [1]:
import os
from sklearn import preprocessing
import pandas as pd
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

In [2]:
DATA_PATH = "/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv"
VALID_SPLIT = 0.1
TEST_SPLIT = 0.1
FEATURES = ["age", "sex", "cp", "trtbps", "chol", "fbs", "restecg", "thalachh", "exng", "oldpeak", "slp", "caa", "thall"]
N_FEATURES = len(FEATURES)
EPOCHS = 500
NB_NEURONS = 4096
DROPOUT_VALUE = 0.3

In [3]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/heart-attack-analysis-prediction-dataset/o2Saturation.csv
/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv


In [4]:
def preprocess():
    data = pd.read_csv(DATA_PATH)
    base_data = data
    preprocess_data = pd.DataFrame(np.array([]))
    for col in data.columns:
        if col != "output":
            preprocess_data[col] = (data[col] - data[col].min()) / (data[col].max() - data[col].min())
        else:
            preprocess_data[col] = data[col]
    print(data["output"].value_counts())
    return base_data, preprocess_data

In [5]:
base_data, preprocess_data = preprocess()

1    165
0    138
Name: output, dtype: int64


In [6]:
preprocess_data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,0.0,,,,,,,
age,303.0,0.528465,0.18921,0.0,0.385417,0.541667,0.666667,1.0
sex,303.0,0.683168,0.466011,0.0,0.0,1.0,1.0,1.0
cp,303.0,0.322332,0.344017,0.0,0.0,0.333333,0.666667,1.0
trtbps,303.0,0.354941,0.165454,0.0,0.245283,0.339623,0.433962,1.0
chol,303.0,0.274575,0.118335,0.0,0.194064,0.260274,0.339041,1.0
fbs,303.0,0.148515,0.356198,0.0,0.0,0.0,0.0,1.0
restecg,303.0,0.264026,0.26293,0.0,0.0,0.5,0.5,1.0
thalachh,303.0,0.600358,0.174849,0.0,0.477099,0.625954,0.725191,1.0
exng,303.0,0.326733,0.469794,0.0,0.0,0.0,1.0,1.0


In [7]:
data_tmp, valid_data = train_test_split(preprocess_data, test_size=VALID_SPLIT)
train_data, test_data = train_test_split(data_tmp, test_size=TEST_SPLIT)

In [8]:
print(train_data.shape)
print(valid_data.shape)
print(test_data.shape)

(244, 15)
(31, 15)
(28, 15)


In [9]:
def build_model():
    model = Sequential()
    model.add(Dense(N_FEATURES, input_shape=train_data[FEATURES].shape, activation="linear"))
    model.add(Dense(NB_NEURONS, activation="relu"))
    model.add(Dropout(DROPOUT_VALUE))
    model.add(Dense(NB_NEURONS, activation="relu"))
    model.add(Dropout(DROPOUT_VALUE))
    model.add(Dense(NB_NEURONS, activation="relu"))
    model.add(Dropout(DROPOUT_VALUE))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"])
    model.summary()
    return model

In [10]:
def get_callbacks():   

    early_stopping = EarlyStopping(monitor="val_loss", patience = 50 , verbose = 1, restore_best_weights = True)
    
    model_cp = ModelCheckpoint('model.h5', 
                                 save_best_only = True, 
                                 save_weights_only = True,
                                 monitor = 'val_loss', 
                                 mode = 'min', verbose = 1)
    
    return [early_stopping, model_cp]

In [11]:
model = build_model()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 244, 13)           182       
_________________________________________________________________
dense_1 (Dense)              (None, 244, 4096)         57344     
_________________________________________________________________
dropout (Dropout)            (None, 244, 4096)         0         
_________________________________________________________________
dense_2 (Dense)              (None, 244, 4096)         16781312  
_________________________________________________________________
dropout_1 (Dropout)          (None, 244, 4096)         0         
_________________________________________________________________
dense_3 (Dense)              (None, 244, 4096)         16781312  
_________________________________________________________________
dropout_2 (Dropout)          (None, 244, 4096)         0

In [None]:
history = model.fit(train_data[FEATURES], train_data["output"], 
                    epochs=EPOCHS, 
                    validation_data=(valid_data[FEATURES], valid_data["output"]),
                    callbacks = get_callbacks())

Epoch 1/500

Epoch 00001: val_loss improved from inf to 0.69084, saving model to model.h5
Epoch 2/500

Epoch 00002: val_loss improved from 0.69084 to 0.68799, saving model to model.h5
Epoch 3/500

Epoch 00003: val_loss improved from 0.68799 to 0.68505, saving model to model.h5
Epoch 4/500

Epoch 00004: val_loss improved from 0.68505 to 0.68255, saving model to model.h5
Epoch 5/500

Epoch 00005: val_loss improved from 0.68255 to 0.67999, saving model to model.h5
Epoch 6/500

Epoch 00006: val_loss improved from 0.67999 to 0.67732, saving model to model.h5
Epoch 7/500

Epoch 00007: val_loss improved from 0.67732 to 0.67481, saving model to model.h5
Epoch 8/500

Epoch 00008: val_loss improved from 0.67481 to 0.67236, saving model to model.h5
Epoch 9/500

Epoch 00009: val_loss improved from 0.67236 to 0.66974, saving model to model.h5
Epoch 10/500

Epoch 00010: val_loss improved from 0.66974 to 0.66726, saving model to model.h5
Epoch 11/500

Epoch 00011: val_loss improved from 0.66726 to 0.

In [None]:
#Train set data
loss_curve = history.history["loss"]
acc_curve = history.history["accuracy"]

#Validation set data
val_loss_curve = history.history["val_loss"]
val_acc_curve = history.history["val_accuracy"]

#Loss plot
plt.plot(loss_curve, label="Train set")
plt.plot(val_loss_curve, label="Validation set")
plt.legend(loc='upper right')
plt.title("Loss")
plt.show()

#Accuracy plot
plt.plot(acc_curve, label="Train set")
plt.plot(val_acc_curve, label="Validation set")
plt.legend(loc='lower right')
plt.title("Accuracy")
plt.show()

In [None]:
predictions = model.predict(test_data[FEATURES])
for prediction in predictions:
    if prediction > 0.5:
        print("Risky")
    else:
        print("Safe")