# Import Liberaries

In [15]:
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import pickle
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import FeatureUnion 
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
pd.set_option('display.float_format', '{:.2f}'.format)

# Import Data

In [2]:
data = pd.read_csv('heart_attack_prediction_dataset.csv')

# Descriptive analysis

In [3]:
print("Shape of the data:",data.shape)
print(data.info())
print(data.describe())
data.head()

Shape of the data: (8763, 26)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 26 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Patient ID                       8763 non-null   object 
 1   Age                              8763 non-null   int64  
 2   Sex                              8763 non-null   object 
 3   Cholesterol                      8763 non-null   int64  
 4   Blood Pressure                   8763 non-null   object 
 5   Heart Rate                       8763 non-null   int64  
 6   Diabetes                         8763 non-null   int64  
 7   Family History                   8763 non-null   int64  
 8   Smoking                          8763 non-null   int64  
 9   Obesity                          8763 non-null   int64  
 10  Alcohol Consumption              8763 non-null   int64  
 11  Exercise Hours Per Week          8763 non-null   flo

Unnamed: 0,Patient ID,Age,Sex,Cholesterol,Blood Pressure,Heart Rate,Diabetes,Family History,Smoking,Obesity,...,Sedentary Hours Per Day,Income,BMI,Triglycerides,Physical Activity Days Per Week,Sleep Hours Per Day,Country,Continent,Hemisphere,Heart Attack Risk
0,BMW7812,67,Male,208,158/88,72,0,0,1,0,...,6.62,261404,31.25,286,0,6,Argentina,South America,Southern Hemisphere,0
1,CZE1114,21,Male,389,165/93,98,1,1,1,1,...,4.96,285768,27.19,235,1,7,Canada,North America,Northern Hemisphere,0
2,BNI9906,21,Female,324,174/99,72,1,0,0,0,...,9.46,235282,28.18,587,4,4,France,Europe,Northern Hemisphere,0
3,JLN3497,84,Male,383,163/100,73,1,1,1,0,...,7.65,125640,36.46,378,3,4,Canada,North America,Northern Hemisphere,0
4,GFO8847,66,Male,318,91/88,93,1,1,1,1,...,1.51,160555,21.81,231,1,5,Thailand,Asia,Northern Hemisphere,0


# Custom Functions

# Pre - Processing of the data 
* Lable Encode the Gender and Diet Features
* Country need to be one-hot encoded
* Add a simple inputer to final dict
* Scaling the dict using standerd scaler 
* Create a Transfomration pipeline 
* Create a ANN Model using Tensorflow Keras
* Train the model 

In [4]:
# Process the Blood Pressure
data[['BP High Value', 'BP Low Value']] = data['Blood Pressure'].str.split('/', expand=True)  	
data[['BP High Value', 'BP Low Value']] = data[['BP High Value', 'BP Low Value']].astype('float32')
data.drop(columns= ['Patient ID', 'Continent','Hemisphere','Blood Pressure'], inplace=True)

### Creating the Transformation Pipeline and Perform Transformation

In [10]:
encoder = ColumnTransformer(
    transformers=[
        ('label_encoder',OrdinalEncoder(),['Sex', 'Diet']),
        ('onehot_encoder',OneHotEncoder(sparse_output=False, handle_unknown='ignore'),['Country'])
        ],
        remainder = 'passthrough',
        force_int_remainder_cols = False,
        verbose_feature_names_out = "{feature_name}"
    )

transfomration_pipeline = Pipeline(steps=[
    ('encoding',encoder),
    ('scaling',StandardScaler()),
    ],
    
) 
transfomration_pipeline.set_output(transform="pandas")


# Divide the data into regressors and target
X = data.drop(columns=['Heart Attack Risk'])
y = data['Heart Attack Risk']

# Split the data into train test
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.2, random_state=42)

# Transformed data
transfomrmed_X_train = transfomration_pipeline.fit_transform(X_train)
transfomrmed_X_test = transfomration_pipeline.transform(X_test)

### Build a ANN Model

In [11]:
# Define the model
model = Sequential([
    Input(shape=(transfomrmed_X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Define the optimizer, loss function and metrcis to the model
optimizer = Adam(learning_rate=0.01)
loss = BinaryCrossentropy() 

model.compile(optimizer=optimizer, loss=loss, metrics=['f1_score','accuracy'])

# Add callbacks to the model
# Set Up Tensor Board
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 
tensor_board_call_back = TensorBoard(log_dir=log_dir)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2000, restore_best_weights=True)

# Train the model
model.fit(transfomrmed_X_train, y_train, 
          validation_data=(transfomrmed_X_test, y_test), 
          epochs = 5000,
        #   batch_size=10,
          callbacks= [tensor_board_call_back, early_stopping_callback])

Epoch 1/5000
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6220 - f1_score: 0.5259 - loss: 0.6797 - val_accuracy: 0.6418 - val_f1_score: 0.5275 - val_loss: 0.6607
Epoch 2/5000
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6447 - f1_score: 0.5239 - loss: 0.6505 - val_accuracy: 0.6418 - val_f1_score: 0.5275 - val_loss: 0.6550
Epoch 3/5000
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6447 - f1_score: 0.5236 - loss: 0.6422 - val_accuracy: 0.6418 - val_f1_score: 0.5275 - val_loss: 0.6608
Epoch 4/5000
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6428 - f1_score: 0.5250 - loss: 0.6417 - val_accuracy: 0.6418 - val_f1_score: 0.5275 - val_loss: 0.6598
Epoch 5/5000
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6333 - f1_score: 0.5359 - loss: 0.6442 - val_accuracy: 0.6389 - val_f1_sco

<keras.src.callbacks.history.History at 0x1a201c12f60>

In [None]:
# Save required files
# Save trained model
model.save('model.h5')

# Save Transformer
with open('transfomration_pipeline.pkl', 'wb') as file:
     pickle.dump(transfomration_pipeline, file)

# joblib.dump(transfomration_pipeline, 'transfomration_pipeline.pkl')

In [None]:

%load_ext tensorboard


In [None]:
%tensorboard --logdir logs/fit