# ecgai-model
ECG (electrocardiogram) classification using Artificial Neural Network.

## Load the required modules
This loads the required modules to be used in the data preparation, training,
and testing of the model. Make sure to install the libraries from before
proceeding to this step. If libraries were not yet installed, use the command
`pip install -r requirements.txt`. The command will install those libraries listed in
the `requirements.txt` file along with its specific versions.

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

## Load the ECG dataset CSV files
The [ECG Heartbeat Categorization Dataset](https://www.kaggle.com/datasets/shayanfazeli/heartbeat)
was downloaded from Kaggle. This dataset from Kaggle contains a simplified
CSV version of datasets from [MIT-BIH Arrhythmia Database](https://www.physionet.org/content/mitdb/1.0.0/)
and [PTB Diagnostic ECG Database](https://www.physionet.org/content/ptbdb/1.0.0/).
For this project, the MIT-BIH Arrhythmia Database is used as the dataset for the training and testing.
Download and unzip the file. Once the extraction is done,
copy the files `mitbih_train.csv` and `mitbih_test.csv` inside the `dataset` directory.

The code in the next cell displays the line chart of the first record for training.

In [None]:
train_df = pd.read_csv('dataset/mitbih_train.csv', header=None)
test_df = pd.read_csv('dataset/mitbih_test.csv', header=None)

In [None]:
plt.plot(train_df.iloc[200][:186])
plt.show()

## Resample the dataset

In [None]:
df_1=train_df[train_df[187]==1]
df_2=train_df[train_df[187]==2]
df_3=train_df[train_df[187]==3]
df_4=train_df[train_df[187]==4]

df_0=(train_df[train_df[187]==0]).sample(n=20000,random_state=42)
df_1_resample=resample(df_1,replace=True,n_samples=20000,random_state=123)
df_2_resample=resample(df_2,replace=True,n_samples=20000,random_state=124)
df_3_resample=resample(df_3,replace=True,n_samples=20000,random_state=125)
df_4_resample=resample(df_4,replace=True,n_samples=20000,random_state=126)

train_df=pd.concat([df_0,df_1_resample,df_2_resample,df_3_resample,df_4_resample])

## Split the dataset into training, validation, and testing

In [None]:
x_train, x_temp, y_train, y_temp = train_test_split(
    train_df.drop([187], axis=1),
    train_df[187],
    test_size=0.1,
    random_state=42
)

x_valid, x_test, y_valid, y_test = train_test_split(
    x_temp,
    y_temp,
    test_size=0.5,
    random_state=42
)

## Define the model layers
![Model View](img/model.png)

The image above is a visual representaiton of the model that is created by the code below.

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(187,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']          
)

model.summary()

## Let the training begin


In [None]:
history = model.fit(x_train, y_train, batch_size=100, epochs=200, validation_data=(x_valid, y_valid))

## Visualize Training Accuracy and Loss

In [None]:
fig, (accuracy_chart, loss_chart) = plt.subplots(1, 2)
fig.suptitle('Training Accuracy and Loss shits')
accuracy_chart.set_title('Accuracy')
loss_chart.set_title('Loss')

accuracy_chart.plot(history.history['accuracy'])
accuracy_chart.plot(history.history['val_accuracy'])
accuracy_chart.legend(["accuracy","val_accuracy"])
accuracy_chart.set(xlabel='Epoch', ylabel='Accuracy')


loss_chart.plot(history.history['loss'])
loss_chart.plot(history.history['val_loss'])
loss_chart.legend(["loss","val_loss"])
loss_chart.set(xlabel='Epoch', ylabel='Loss')
fig.set_figwidth(15)


## Determine Accuracy and generate a Confusion Matrix

In [None]:
fig, (cm_test, cm_external) = plt.subplots(1, 2)
fig.suptitle('Confusion Matrix')
cm_test.set_title('From Test Split')
cm_external.set_title('From Test File')

y_pred = model.predict(x_test)
test_pred_labels = np.argmax(y_pred, axis=1)
test_accuracy = accuracy_score(y_test, test_pred_labels)
print("Accuracy using the test split: {}".format(test_accuracy))

cm = confusion_matrix(y_test, test_pred_labels)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm)


x_external = test_df.drop([187], axis=1)
y_external = test_df[187]
y_pred = model.predict(x_external)
test_pred_labels = np.argmax(y_pred, axis=1)
test_accuracy = accuracy_score(y_external, test_pred_labels)
print("Accuracy using the test file: {}".format(test_accuracy))
cm = confusion_matrix(y_external, test_pred_labels)
cm_external_display = ConfusionMatrixDisplay(confusion_matrix=cm)

cm_display.plot(ax=cm_test)
cm_external_display.plot(ax=cm_external)
fig.set_figwidth(15)

## Save the trained model
After the training and

In [None]:
model.save('./models/model.keras')