In [None]:
# Importing libraries
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import Model
from tensorflow.keras.callbacks import Callback
from google.colab import files
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.metrics import confusion_matrix, classification_report
import pickle

In [None]:
# Upload CSV dataset file to the 'data' directory
uploaded_file = files.upload()
file_csv_name = next(iter(uploaded_file))

to_directory = 'data_kaggle'
os.makedirs(to_directory, exist_ok = True)

os.rename(file_csv_name, os.path.join(to_directory, file_csv_name))

Saving heart.csv to heart.csv


In [None]:
# Loading the dataset from the csv file
def load_data():
  file_path = os.path.join(to_directory, file_csv_name)
  dataset = pd.read_csv(file_path)
  return dataset

In [None]:
# Separating features and the target variables from the dataset
def separate_data(dataset):
  dataset=load_data()
  features=dataset.drop('HeartDisease',axis=1)
  label=dataset['HeartDisease']
  return features, label

In [None]:
dataset = load_data()
print(dataset)

features, label = separate_data(dataset)
copy_features = features.copy(deep=True)

     Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  \
0     40   M           ATA        140          289          0     Normal   
1     49   F           NAP        160          180          0     Normal   
2     37   M           ATA        130          283          0         ST   
3     48   F           ASY        138          214          0     Normal   
4     54   M           NAP        150          195          0     Normal   
..   ...  ..           ...        ...          ...        ...        ...   
913   45   M            TA        110          264          0     Normal   
914   68   M           ASY        144          193          1     Normal   
915   57   M           ASY        130          131          0     Normal   
916   57   F           ATA        130          236          0        LVH   
917   38   M           NAP        138          175          0     Normal   

     MaxHR ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0      172              N  

In [None]:
# Converting the string values of the dataset into numeric values using the LabelEncoder from sklearn library
def mapping_data(feature_transform):

  feature_values_more_than_2 = {}
  feature_values_equals_2 = {}

  for element in features.select_dtypes(include=['object']).columns:
    if feature_transform[element].dtype ==  'int64':
      return feature_transform
    if len(features[element].unique())>2:
      uniq_value = features[element].unique().tolist()
      label_encoder = LabelEncoder()
      label_encoder.fit(uniq_value)
      feature_values_more_than_2[f"{element}"] = label_encoder.classes_
      label_encoded_data = label_encoder.transform(feature_transform[element])
      feature_transform[element] = label_encoded_data + 1

    elif len(features[element].unique()) == 2:
      uniq_value_onezero = features[element].unique().tolist()
      one_and_zero_encoder = LabelEncoder()
      one_and_zero_encoder.fit(uniq_value_onezero)
      feature_values_equals_2[f'{element}']=one_and_zero_encoder.classes_
      label_encoded_data_onezero = one_and_zero_encoder.transform(feature_transform[element])
      feature_transform[element] = label_encoded_data_onezero

  return feature_transform, feature_values_more_than_2, feature_values_equals_2

In [None]:
data_mapping, class_feature_more_than_2, class_feature_2 = mapping_data(copy_features)
copy_data_mapping = data_mapping.copy()
data_mapping

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,1,2,140,289,0,2,172,0,0.0,3
1,49,0,3,160,180,0,2,156,0,1.0,2
2,37,1,2,130,283,0,3,98,0,0.0,3
3,48,0,1,138,214,0,2,108,1,1.5,2
4,54,1,3,150,195,0,2,122,0,0.0,3
...,...,...,...,...,...,...,...,...,...,...,...
913,45,1,4,110,264,0,2,132,0,1.2,2
914,68,1,1,144,193,1,2,141,0,3.4,2
915,57,1,1,130,131,0,2,115,1,1.2,2
916,57,0,2,130,236,0,1,174,0,0.0,2


In [None]:
# Showing the converted string values
for key1 in class_feature_more_than_2:
  classMany = enumerate(class_feature_more_than_2[key1],start=1)
  print(key1)
  for i in classMany:
    print(i, end='\n')
  print('\n')
for key2 in class_feature_2:
  twoclass = enumerate(class_feature_2[key2])
  print(key2)
  for j in twoclass:
    print(j, end='\n')
  print('\n')

ChestPainType
(1, 'ASY')
(2, 'ATA')
(3, 'NAP')
(4, 'TA')


RestingECG
(1, 'LVH')
(2, 'Normal')
(3, 'ST')


ST_Slope
(1, 'Down')
(2, 'Flat')
(3, 'Up')


Sex
(0, 'F')
(1, 'M')


ExerciseAngina
(0, 'N')
(1, 'Y')




In [None]:
# Standardizing data using the StandardScaler from sklearn library
def standardize_data(data_transform, datas=data_mapping, columns_transform=['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']):
  scaler = StandardScaler()
  selected_data = datas[columns_transform]
  scaler.fit(datas[columns_transform])
  transformed_columns = scaler.transform(data_transform[columns_transform])
  transformed_data = pd.DataFrame(transformed_columns, columns=columns_transform)
  data_transform[columns_transform] = transformed_data
  return data_transform

In [None]:
# Splitting the dataset into training dataset and validation dataset using train_test_split from sklearn library
standardized_data = standardize_data(copy_data_mapping)
array_features=np.array(standardized_data)
train_data, valid_data = train_test_split(array_features, train_size=0.8, shuffle=False)
train_label, valid_label = train_test_split(label, train_size=0.8, shuffle=False)

In [None]:
# Showing the original features
features.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up


In [None]:
# Showing the mapped features
data_mapping.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,1,2,140,289,0,2,172,0,0.0,3
1,49,0,3,160,180,0,2,156,0,1.0,2
2,37,1,2,130,283,0,3,98,0,0.0,3
3,48,0,1,138,214,0,2,108,1,1.5,2
4,54,1,3,150,195,0,2,122,0,0.0,3


In [None]:
# Showing the standardized features
standardized_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,-1.43314,1,2,0.410909,0.82507,0,2,1.382928,0,-0.832432,3
1,-0.478484,0,3,1.491752,-0.171961,0,2,0.754157,0,0.105664,2
2,-1.751359,1,2,-0.129513,0.770188,0,3,-1.525138,0,-0.832432,3
3,-0.584556,0,1,0.302825,0.13904,0,2,-1.132156,1,0.574711,2
4,0.051881,1,3,0.951331,-0.034755,0,2,-0.581981,0,-0.832432,3


In [None]:
# Checking the shape of the train and validation datasets
print(train_data.shape)
print(valid_data.shape)

(734, 11)
(184, 11)


In [None]:
# Defining the custom callback function
class CustomStopCallback(Callback):
    def __init__(self, target_accuracy):
        super(CustomStopCallback, self).__init__()
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        current_accuracy = logs.get('val_accuracy')
        if current_accuracy is not None and current_accuracy >= self.target_accuracy:
            print(f"\nReached target val_accuracy ({self.target_accuracy}). Stopping training.")
            self.model.stop_training = True



In [None]:
# Buildiing the model
def model():
  input_layer = Input(shape=(train_data.shape[1],))
  tf.keras.utils.set_random_seed(0)
  x1=Dense(64, activation='relu', kernel_regularizer='L2', name='first_dense')(input_layer)
  x2=Dropout(0.5)(x1)
  x3=Dense(32, activation='relu', kernel_regularizer='L2', name='second_dense')(x2)
  y1=Dense(32,activation='relu', kernel_regularizer='L2')(x1)
  y2=Dense(32,activation='relu', kernel_regularizer='L2')(y1)
  x=tf.keras.layers.Multiply()([x3,y2])
  x=Dropout(0.2)(x)
  output_layer=Dense(1, activation='sigmoid', name='final_layer')(x)
  return Model(inputs=input_layer, outputs=output_layer)


In [None]:
# Compiling the model
model=model()
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(1e-3), metrics=['accuracy'])

In [None]:
# Fitting the model to the training and validation data
model.fit(train_data, train_label, verbose=1, epochs=400, validation_data=(valid_data, valid_label), batch_size = 8, callbacks = [CustomStopCallback(target_accuracy=0.83)])

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

<keras.src.callbacks.History at 0x7d7d947ab490>

In [None]:
y_pred=model.predict(valid_data)>0.5
print(confusion_matrix(valid_label, y_pred))

[[80 18]
 [13 73]]


In [None]:
print(classification_report(valid_label, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.82      0.84        98
           1       0.80      0.85      0.82        86

    accuracy                           0.83       184
   macro avg       0.83      0.83      0.83       184
weighted avg       0.83      0.83      0.83       184



In [None]:
# Showing the validation label
print(valid_label)

734    1
735    1
736    1
737    1
738    0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 184, dtype: int64


In [None]:
# Showing the prediction (validation dataset)
print(y_pred)

[[ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [False]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [ True]
 

In [None]:
# Saving the trained model into a pickle file
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

In [None]:
# Saving the model into an h5 file
model.save('model.h5')

In [None]:
# Testing the model with a single data
a=[49, 'F', 'ATA', 135, 260, 0, 'Normal', 172, 'Y', 1.5, 'Up']
df_ujiCoba = pd.DataFrame([a], columns=['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope'])
x = mapping_data(df_ujiCoba)[0]
x = standardize_data(x)
x

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,-0.478484,0,2,0.140698,0.559805,0,2,1.382928,1,0.574711,3


In [None]:
model.predict(x)



array([[0.09270106]], dtype=float32)

In [None]:
new_var = tf.keras.models.load_model('model.h5')
new_var.predict(x)



array([[0.09270106]], dtype=float32)

In [None]:
# Testing the model with a single data
b=[49, 0, 2, 135, 260, 0, 2, 172, 1, 1.5, 3]
df_ujiCoba_b = pd.DataFrame([b], columns=['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope'])
y = mapping_data(df_ujiCoba_b)
y = standardize_data(y)
y


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,-0.478484,0,2,0.140698,0.559805,0,2,1.382928,1,0.574711,3


In [None]:
model.predict(y)



array([[0.09270106]], dtype=float32)