In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import datetime
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras import regularizers
from tensorflow.keras.optimizers import SGD
import keras
from keras.callbacks import EarlyStopping, TensorBoard

In [2]:
filepath =  r"C:\Users\User\Downloads\Heart Disease Prediction Project\heart.csv"

heart_disease = pd.read_csv(filepath)
heart_disease.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
5,58,0,0,100,248,0,0,122,0,1.0,1,0,2,1
6,58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
7,55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
8,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
9,54,1,0,122,286,0,0,116,1,3.2,1,2,2,0


In [3]:
heart_disease = heart_disease.drop(['age','sex'], axis = 1)
heart_disease.head(10)

Unnamed: 0,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,125,212,0,1,168,0,1.0,2,2,3,0
1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,0,145,174,0,1,125,1,2.6,0,0,3,0
3,0,148,203,0,1,161,0,0.0,2,1,3,0
4,0,138,294,1,1,106,0,1.9,1,3,2,0
5,0,100,248,0,0,122,0,1.0,1,0,2,1
6,0,114,318,0,2,140,0,4.4,0,3,1,0
7,0,160,289,0,0,145,1,0.8,1,1,3,0
8,0,120,249,0,0,144,0,0.8,2,0,3,0
9,0,122,286,0,0,116,1,3.2,1,2,2,0


In [4]:
heart_disease_features = heart_disease.drop('target',axis=1)
heart_disease_label = heart_disease['target']

In [5]:
print(f"The shape of features: {heart_disease_features.shape}")
print(f"The shape of label: {heart_disease_label.shape}")

The shape of features: (1025, 11)
The shape of label: (1025,)


In [6]:
#5. One hot encode label
#convert to number encoding
heart_disease_label_OH = pd.get_dummies(heart_disease_label)

#Check the one-hot label
print("---------------One-hot Label-----------------")
print(heart_disease_label_OH.shape)

---------------One-hot Label-----------------
(1025, 2)


In [7]:
numpy_features = heart_disease_features.to_numpy()
numpy_label = heart_disease_label_OH.to_numpy()

In [8]:
print(f"The shape of features: {numpy_features.shape}")
print(f"The shape of label: {numpy_label.shape}")

The shape of features: (1025, 11)
The shape of label: (1025, 2)


In [9]:
SEED = 12345
features_train, features_iter, label_train, label_iter = train_test_split(numpy_features,
                                                                         numpy_label,
                                                                         test_size = 0.4,
                                                                         random_state = SEED)

features_val, features_test, label_val, label_test = train_test_split(features_iter,label_iter,
                                                                      test_size=0.5,
                                                                      random_state=SEED)

In [10]:
standardizer = StandardScaler()

features_train = standardizer.fit_transform(features_train)
features_val = standardizer.transform(features_val)
features_test = standardizer.transform(features_test)

In [11]:
print(f"Features train shape: {features_train.shape},Label train shape: {label_train.shape}")
print(f"Features validation shape: {features_val.shape}, Label validation shape: {label_val.shape}")
print(f"Features test shape: {features_test.shape}, Label test shape: {label_test.shape}")

Features train shape: (615, 11),Label train shape: (615, 2)
Features validation shape: (205, 11), Label validation shape: (205, 2)
Features test shape: (205, 11), Label test shape: (205, 2)


In [12]:
adam = keras.optimizers.Adam(learning_rate=0.001)
bce = keras.losses.BinaryCrossentropy(from_logits=False)
accuracy = keras.metrics.BinaryAccuracy()

In [13]:
fnn_model = keras.Sequential([
    keras.layers.InputLayer(input_shape = features_train.shape[1]),
    keras.layers.Dense(128, activation='relu',kernel_regularizer=regularizers.L2(0.001)),
    #keras.layers.Dropout(0.25),
    keras.layers.Dense(64, activation='relu',kernel_regularizer=regularizers.L2(0.001)),
    #keras.layers.Dropout(0.25),
    keras.layers.Dense(32, activation='relu',kernel_regularizer=regularizers.L2(0.001)),
    #keras.layers.Dropout(0.5),
    keras.layers.Dense(label_train.shape[1], activation='sigmoid')
])
fnn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1536      
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 2)                 66        
                                                                 
Total params: 11,938
Trainable params: 11,938
Non-trainable params: 0
_________________________________________________________________


In [14]:
#model.compile(optimizer=SGD(learning_rate=0.001, momentum=0.9, nesterov=True),loss='categorical_crossentropy',metrics=['accuracy'])
fnn_model.compile(optimizer=adam,loss=bce,metrics=[accuracy])

In [15]:
base_log_path = r"C:\Users\User\Downloads\heart_log"
log_path = os.path.join(base_log_path, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
es = EarlyStopping(monitor='val_loss',patience=5, verbose=2)
tb = TensorBoard(log_dir=log_path)

EPOCHS = 20
BATCH_SIZE = 32
history = fnn_model.fit(features_train,label_train,
                        validation_data=(features_val, label_val),
                        batch_size=BATCH_SIZE, epochs=EPOCHS, 
                        callbacks=[tb, es])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
#Evaluate with test data for wild testing
test_result = fnn_model.evaluate(features_test,label_test,batch_size=BATCH_SIZE)
print(f"Test loss = {test_result[0]}")
print(f"Test accuracy = {test_result[1]}")

Test loss = 0.28477638959884644
Test accuracy = 0.9414634108543396


In [17]:
#Make prediction
predictions_softmax = fnn_model.predict(features_test)
predictions = np.argmax(predictions_softmax,axis=-1)
label_test_element, label_test_idx = np.where(np.array(label_test) == 1)
for prediction, label in zip(predictions,label_test_idx):
    print(f'Prediction: {prediction} Label: {label}, Difference: {prediction-label}')

Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 1, Difference: -1
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: 0 Label: 0, Difference: 0
Prediction: 1 Label: 1, Difference: 0
Prediction: