In [None]:
%pip install wfdb

In [None]:
%pip install missingno

# 1. Environment Setup and Data Processing

### 1.1 Import libraries & signal data directory

In [None]:
import pandas as pd
import numpy as np
import wfdb
import ast
from tqdm import tqdm
import warnings; warnings.filterwarnings('ignore')
from IPython.display import display
import pywt

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import missingno as msno

In [None]:
def load_raw_data(df, sampling_rate, path):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

In [None]:
path = 'Desktop/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3'
sampling_rate=100

### 1.2 Metadata preprocessing

In [None]:
# load and convert annotation data
metadata = pd.read_csv('ptbxl_database.csv', index_col='ecg_id')
metadata.scp_codes = metadata.scp_codes.apply(lambda x: ast.literal_eval(x))

# Load scp_codes (diagnosis) for each signal data
scp_codes_df = metadata[['scp_codes']]
print(scp_codes_df)

In [None]:
#Overview to see the density of each column
msno.matrix(metadata)
plt.show()

In [None]:
print(metadata['scp_codes'].value_counts()) 

In [None]:
# Load scp_statements.csv for diagnostic aggregation
agg_df = pd.read_csv('scp_statements.csv', index_col=0)
agg_df = agg_df[agg_df.diagnostic == 1]

print(agg_df.shape)

In [None]:
def aggregate_superclass_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            # only select 100% diagnosis
            if y_dic[key] == 100: 
                tmp.append(agg_df.loc[key].diagnostic_class)
    return list(set(tmp))
    
# Apply diagnostic superclass
metadata['diagnostic_superclass'] = metadata.scp_codes.apply(aggregate_superclass_diagnostic)
metadata['diagnostic_superclass_len'] = metadata['diagnostic_superclass'].apply(len)
metadata.loc[metadata.diagnostic_superclass_len >= 1, 'diagnostic_superclass']

In [None]:
vc_superclass = metadata['diagnostic_superclass_len'].value_counts()

sns.set_style("whitegrid")
bar,ax = plt.subplots(figsize=(10,6))
ax = sns.barplot(x=vc_superclass.values/vc_superclass.values.sum()*100., y=vc_superclass.index, ci=None, palette="muted",orient='h' )
ax.set_title("Diagnostic Superclass Len Distribution", fontsize=20)
ax.set_xlabel ("percentage over all samples")
ax.set_ylabel ("diagnostic_superclass_len")
for rect in ax.patches:
    ax.text (rect.get_width(), rect.get_y() + rect.get_height() / 2,"%.1f%%"% rect.get_width(), weight='bold' )


In [None]:
# show the distribution of single superclass data
single_label = metadata[metadata['diagnostic_superclass_len'] == 1]
print(single_label.shape)
vc_single = single_label['diagnostic_superclass'].value_counts()
plt.pie(vc_single, labels=vc_single.index, autopct='%1.1f%%')
plt.title('Distribution of Single Superclass')
plt.show() 

### 1.3 Metadata filtering

In [None]:
# keep the single superclass data with 'NORM', 'STTC','CD','MI'
single_label = single_label[single_label['diagnostic_superclass'].apply(lambda x: x != ['HYP'])]
single_label['diagnostic_superclass']=single_label['diagnostic_superclass'].apply(lambda x: ' '.join(x))
single_label['diagnostic_superclass']

In [None]:
superclass= single_label.groupby(['diagnostic_superclass']).size().reset_index(name='count')
print(superclass)

# Plot a bar plot to visualize the number of ECG for each label using seaborn
plt.figure(figsize=(10, 6))
sns.barplot(data=superclass, x='diagnostic_superclass', y='count')
plt.title('Number of Each Diagnostic Superclass')
plt.xlabel('Diagnostic Superclass')
plt.ylabel('Count')
plt.xticks(rotation=45)  # Rotate x-axis labels for readability
plt.show()


In [None]:
#Sample 3000 ECG signals for each class (if less than 3000, sample all of them)
samples = []
for label in single_label['diagnostic_superclass'].unique():
    # Filter the data for the current label
    _data = single_label[single_label['diagnostic_superclass'] == label]
    
    # Sample 3000 rows (or fewer if there aren't enough rows) from the current label
    _sample = _data.sample(n=min(3000, len(_data)), random_state=42)
    
    # Append the sampled data to the list
    samples.append(_sample)

# Concatenate all samples into a single DataFrame
balanced_data = pd.concat(samples).reset_index(drop=True)

balanced_data.groupby(['diagnostic_superclass']).size().reset_index(name='count')

In [None]:
meta_cols =balanced_data[['age', 'sex', 'strat_fold', 'filename_lr', 'filename_hr']]
one_hot = pd.get_dummies(balanced_data['diagnostic_superclass'], columns=['diagnostic_superclass', ], prefix='', prefix_sep='', dtype=int)
ECG_meta=pd.concat([meta_cols, one_hot], axis=1)
ECG_meta.info()

### 1.4 Waveform signal display

In [None]:
#load ECG data according to selected metadata
X = load_raw_data(ECG_meta, sampling_rate, path)

In [None]:
#show one sample data for each class
sample_num = 1

for superclass in one_hot:
    filt = ECG_meta[superclass] == 1
    y_selected = ECG_meta.loc[filt]
    x_selected = X[filt]
    
    for i in range(sample_num):
        y_ = y_selected.iloc[i]
        x_ = x_selected[i]
        
        bar, axes = plt.subplots(x_.shape[1], 1, figsize=(10,10))
        title = "Superclass = {}, Sex = {}, Age={}".format(superclass, y_['sex'], y_['age'],)
        axes[0].set_title(title, fontsize=15)
        
        for c in np.arange(x_.shape[1]):
            sns.lineplot(x=np.arange(x_.shape[0]), y=x_[:, c], ax=axes[c])
        
        plt.tight_layout()
        plt.show()

# 2. Train-Valid-Test Set Splitting

According to the source of the dataset:https://physionet.org/content/ptb-xl/1.0.1/, there are recommended stratified 10-folds, where the same patients are kept within the same folds. Moreover, 9th fold and 10th are of higher quality, which are recommeded to be used as validation set and test set.

Cross-validation Folds: recommended 10-fold train-test splits (strat_fold) obtained via stratified sampling while respecting patient assignments, i.e. all records of a particular patient were assigned to the same fold. Records in fold 9 and 10 underwent at least one human evaluation and are therefore of a particularly high label quality. We therefore propose to use folds 1-8 as training set, fold 9 as validation set and fold 10 as test set.
Here, I will split compile fold 1-8 as train sets, fold 9 as validation set, and fold 10 as test set.

In [None]:
fold= balanced_data.copy()
fold['strat_fold'] = fold['strat_fold'].map(lambda x: 'train' if x in range(1, 9) else 'validation' if x in [9] else 'test')
fold_counts = fold.groupby(['strat_fold','diagnostic_superclass']).size().reset_index(name='count')
print(fold_counts)

# Plot a bar plot to visualize the number of ECG for each label using seaborn
plt.figure(figsize=(10, 6))
sns.barplot(data=fold_counts, x='strat_fold', y='count',hue='diagnostic_superclass')
plt.title('Distribution for Each Diagnostic Superclass')
plt.xlabel('Diagnostic Superclass')
plt.ylabel('Count')
plt.xticks(rotation=45)  # Rotate x-axis labels for readability
plt.legend(title='superclass')
plt.show()

In [None]:
# show the distribution of single superclass data
vc_fold = fold['strat_fold'].value_counts()
plt.pie(vc_fold, labels=vc_fold.index, autopct='%1.1f%%')
plt.title('Distribution of Single Superclass')
plt.show() 

In [None]:
target =  ECG_meta[['NORM', 'MI', 'STTC', 'CD']]
target

In [None]:
#training-validation-testing data split
X_train, Y_train = X[ECG_meta.strat_fold <= 8],  target[ECG_meta.strat_fold <= 8]
X_valid, Y_valid = X[ECG_meta.strat_fold == 9],  target[ECG_meta.strat_fold == 9]
X_test,  Y_test  = X[ECG_meta.strat_fold == 10], target[ECG_meta.strat_fold == 10]

print(X_train.shape, Y_train.shape)
print(X_valid.shape, Y_valid.shape)
print(X_test.shape,  Y_test.shape)


In [None]:
#save data to npz file
NUMPY_DATA_FILE = 'data.npz'

save_args = {
    'X_train': X_train.astype('float32'),
    'X_valid': X_valid.astype('float32'),
    'X_test':  X_test.astype('float32'),
    'Y_train': Y_train.to_numpy().astype('float32'), 
    'Y_valid': Y_valid.to_numpy().astype('float32'),
    'Y_test':  Y_test.to_numpy().astype('float32'),
}
np.savez(NUMPY_DATA_FILE, **save_args)


# 3: Model building and training

In [None]:
%pip install tensorflow
%pip install seaborn
%pip install scikit-learn
%pip install opencv-python
%pip install imutils

In [None]:
%pip install keras-tuner

In [None]:
#import libraries
import sys
import os
import math
from tensorflow.keras import metrics
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense,LeakyReLU
import tensorflow as tf
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
import cv2
import time
from kerastuner.tuners import RandomSearch
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
thismodule = sys.modules[__name__]

with np.load('data.npz') as data:
    for k in data.keys():
        setattr(thismodule, k, data[k].astype(float))

### 3.1 1DCNN model

In [None]:
#build CNN model
def CNN():
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=10, strides=3, padding='same',input_shape = (1000,12)))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling1D(pool_size=2, strides=3))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=16, kernel_size=5, strides=2, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Flatten())
    model.add(layers.Dropout(rate=0.1))
    model.add(layers.Dense(units=7))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(units=4, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics=['accuracy', 'auc', 'precision', 'recall'])
    return model

In [None]:
#model1 summary
model1=CNN()
model1.summary()

In [None]:
#model1 training
history1 = model1.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_precision, val_recall= model1.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history1.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history1.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history1.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history1.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history1.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history1.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history1.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history1.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred = model1.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: 1DCNN model')
plt.show()

In [None]:
loss, accuracy, auc, precision, recall = model1.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred = model1.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: 1DCNN model')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support


#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay

# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = model1.predict(X_test)

# Plot ROC and AUC for Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
# heatmap visualization
def plot_class_specific_confusion_matrices(y_true, y_pred, class_names):
    num_classes = cm.shape[0]

    for i in range(1,num_classes):
        # Calculate metrics for class `i`
        TP = cm[i, i]/np.sum(cm)
        FP = (np.sum(cm[:, i]) - cm[i, i])/np.sum(cm)
        FN = (np.sum(cm[i, :]) - cm[i, i])/np.sum(cm)   
        TN = 1 - (TP + FP + FN)
        # Create a binary confusion matrix for class `i`
        class_cm = np.array([[TP, FP], [FN, TN]])

        # Labels for the binary confusion matrix
        labels = ["Pred Positive", "Pred Negative"]
        tick_labels = ["True Positive", "True Negative"]

        # Plot the binary confusion matrix
        plt.figure(figsize=(4,3))
        sns.heatmap(class_cm, annot=True, cmap='Blues',
                    xticklabels=labels, yticklabels=tick_labels)
        plt.title(f"Confusion Matrix for Class {class_names[i]}")
        plt.xlabel("Predicted Labels")
        plt.ylabel("True Labels")
        plt.show()
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)



### 3.1.1 Random search Hyperparameter tuning

In [None]:
def CNN(hp):
    model = Sequential()
    model.add(Conv1D(filters=hp.Choice('filters_1', values=[16, 32, 64]), kernel_size=hp.Choice('kernel_size_1', values=[3, 5, 10]), strides=3, padding='same',input_shape = (1000,12)))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(MaxPooling1D(pool_size=2, strides=3))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=hp.Choice('filters_2', values=[16, 32, 64]), kernel_size=hp.Choice('kernel_size_2', values=[3, 5, 7]), strides=2, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=hp.Choice('filters_3', values=[32, 64, 128]), kernel_size=hp.Choice('kernel_size_3', values=[3, 5, 7]), strides=1, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Flatten())
    model.add(layers.Dropout(rate=hp.Choice('dropout_rate', values=[0.1, 0.2, 0.3])))
    model.add(layers.Dense(units=hp.Choice('units_1', values=[5,7,10])))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(units=4, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics=['accuracy', 'auc', 'precision', 'recall'])
    return model

In [None]:
tuner = RandomSearch(
    CNN,
    objective='val_accuracy',
    max_trials=7,  # Number of hyperparameter combinations to try
    executions_per_trial=2,  # Number of times to train each model
    directory='hyperparameter_tuning_1DCNN',
    project_name='1dcnn_tuning'
)

In [None]:
tuner.search(X_train, Y_train, 
             epochs=10, 
             validation_data=(X_valid, Y_valid), 
             batch_size=64)

# Get the best model and hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
tuned_model_1DCNN = tuner.get_best_models(num_models=1)[0]

# Print best hyperparameters
print(f"""
Best hyperparameters:
Filters 1: {best_hps.get('filters_1')}
Kernel Size 1: {best_hps.get('kernel_size_1')}
Filters 2: {best_hps.get('filters_2')}
Kernel Size 2: {best_hps.get('kernel_size_2')}
Filters 3: {best_hps.get('filters_3')}
Kernel Size 3: {best_hps.get('kernel_size_3')}
Dense Units 1: {best_hps.get('units_1')}
Dropout Rate: {best_hps.get('dropout_rate')}
""")
tuned_model_1DCNN.summary()

In [None]:
history2 = tuned_model_1DCNN.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_percision, val_recall = tuned_model_1DCNN.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history2.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history2.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(8,4))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history2.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history2.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history2.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.ylim(0,1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history2.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history2.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history2.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred = tuned_model_1DCNN.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
plt.figure(figsize=(8, 6))
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: tuned 1DCNN model')
plt.show()

In [None]:
loss, accuracy, auc, precision, recall = tuned_model_1DCNN.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred =tuned_model_1DCNN.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: tuned 1DCNN model')
plt.show()

In [None]:

from sklearn.metrics import precision_recall_fscore_support

#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay

# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = tuned_model_1DCNN.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)

### 3.2 Bidirectional LSTM 

In [None]:
def BiLSTM():
    model = Sequential()
    model.add(layers.Bidirectional(LSTM(64), input_shape=(1000, 12)))
    model.add(Dropout(0.2))
    model.add(Dense(units=7))
    model.add(LeakyReLU())
    model.add(Dense(units=4, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics=['accuracy', 'auc', 'precision', 'recall'])
    return model

In [None]:
model2=BiLSTM()
model2.summary()

In [None]:
history3 = model2.fit(X_train, Y_train, epochs=5, batch_size=128, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_percision, val_recall = model2.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,6)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history3.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history3.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history3.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history3.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history3.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.ylim(0,1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history3.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history3.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history3.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred =model2.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix for BiLSTM model')
plt.show()

In [None]:
Loss, accuracy, auc, precision, recall = model2.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred =model2.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: BiLSTM model')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support

#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay

# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = model2.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)

### 3.3 CNN+BiLSTM

In [None]:
def CNN_BiLSTM():
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=10, strides=3, padding='same',input_shape = (1000,12)))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(MaxPooling1D(pool_size=2, strides=3))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=16, kernel_size=5, strides=2, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.Bidirectional((LSTM(64))))
    model.add(Flatten())
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(units=10))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(units=7, activation='relu'))
    model.add(layers.Dense(units=4, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics=['accuracy', 'auc', 'precision', 'recall'])
    return model

In [None]:
model3=CNN_BiLSTM()
model3.summary()

In [None]:
history4 = model3.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_percision, val_recall = model3.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history4.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history4.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history4.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history4.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history4.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history4.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history4.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history4.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred =model3.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix for CNN+BiLSTM model')
plt.show()

In [None]:
Loss, accuracy, auc, precision, recall = model3.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred =model3.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: CNN+BiLSTM model')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support

#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay

# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = model3.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)

### 3.3.1 Random search hyperparameter tuning

In [None]:
def CNN_LSTM(hp):
    model = Sequential()
    model.add(Conv1D(filters=hp.Choice('filters_1', values=[16, 32, 64]), kernel_size=hp.Choice('kernel_size_1', values=[3, 5, 10]), strides=3, padding='same', input_shape = (1000,12)))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(MaxPooling1D(pool_size=2, strides=3))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=hp.Choice('filters_2', values=[16, 32, 64]), kernel_size=hp.Choice('kernel_size_2', values=[3, 5, 7]), strides=2, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.BatchNormalization())
    model.add(Conv1D(filters=hp.Choice('filters_3', values=[32, 64, 128]), kernel_size=hp.Choice('kernel_size_3', values=[3, 5, 7]), strides=1, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(layers.Bidirectional(LSTM(units=hp.Choice('lstm_units', values=[20, 64, 100]))))
    model.add(Flatten())
    model.add(layers.Dropout(rate=hp.Choice('dropout_rate', values=[0.1, 0.2, 0.3])))
    model.add(layers.Dense(units=10))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(units=7, activation='relu'))
    model.add(layers.Dense(units=4, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics=['accuracy', 'auc', 'precision', 'recall'])
    return model

In [None]:
tuner = RandomSearch(
    CNN_LSTM,
    objective='val_accuracy',
    max_trials=7,  # Number of hyperparameter combinations to try
    executions_per_trial=2,  # Number of times to train each model
    directory='hyperparameter_tuning_CNNBiLSTM',
    project_name='cnn_bilstm_tuning'
)

In [None]:
tuner.search(X_train, Y_train, 
             epochs=10, 
             validation_data=(X_valid, Y_valid), 
             batch_size=64)

# Get the best model and hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
tuned_model_CNNLSTM = tuner.get_best_models(num_models=1)[0]

# Print best hyperparameters
print(f"""
Best hyperparameters:
Filters 1: {best_hps.get('filters_1')}
Kernel Size 1: {best_hps.get('kernel_size_1')}
Filters 2: {best_hps.get('filters_2')}
Kernel Size 2: {best_hps.get('kernel_size_2')}
Filters 3: {best_hps.get('filters_3')}
Kernel Size 3: {best_hps.get('kernel_size_3')}
LSTM Units 1: {best_hps.get('lstm_units')}
Dropout Rate: {best_hps.get('dropout_rate')}
""")
tuned_model_CNNLSTM.summary()

In [None]:
history5 = tuned_model_CNNLSTM.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_percision, val_recall = tuned_model_CNNLSTM.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history5.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history5.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history5.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history5.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history5.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Baseline Model Training and Validation Accuracy')
plt.ylim(0,1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history5.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history5.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history5.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Baseline Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred = tuned_model_CNNLSTM.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: tuned CNN+LSTM model')
plt.show()

In [None]:
Loss, accuracy, auc, precision, recall = tuned_model_CNNLSTM.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred =tuned_model_CNNLSTM.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: tuned 1DCNN+BiLSTM model')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support


#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay

# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred =tuned_model_CNNLSTM.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)

# 4. Class imbalance: SMOTE and Class Weight

### 4.1 Best performing model with SMOTE

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
# flatten the signals into a single feature vector for each sample. 
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
smote = SMOTE(random_state=42) #apply SMOTE to oversample the minority class. 
X_train_resampled, Y_trian_resampled = smote.fit_resample(X_train_flattened, Y_train)
# Reshape back to the original shape
X_train_resampled = X_train_resampled.reshape(-1, 1000, 12)

In [None]:
model4=tuned_model_CNNLSTM
history6 = model4.fit(X_train_resampled, Y_trian_resampled, epochs=10, batch_size=64, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_precision, vall_recall = model4.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history6.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history6.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history6.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history6.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history6.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.ylim(0,1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history6.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history6.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history6.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred = model4.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: SMOTE')
plt.show()

In [None]:
loss, accuracy, auc, precision, recall = model4.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred =model4.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: SMOTE')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support
#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay
# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = model4.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9,6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)

### 4.2.class weight

In [None]:
class_names = ['NORM','MI', 'STTC','CD']
column_counts = np.sum(Y_train == 1, axis=0)
row_counts = Y_train.shape[0]
NORM_weight=row_counts/(4*column_counts[0])
MI_weight=row_counts/(4*column_counts[1])
STTC_weight=row_counts/(4*column_counts[2])
CD_weight=row_counts/(4*column_counts[3])

In [None]:
class_weight={0:NORM_weight,1:MI_weight,2:STTC_weight,3:CD_weight}
print(class_weight)

In [None]:
model5=tuned_model_CNNLSTM
history7 = model5.fit(X_train, Y_train, epochs=10, batch_size=64, class_weight=class_weight, validation_data=(X_valid, Y_valid))
val_loss, val_acc, val_auc, val_percision, val_recall = model5.evaluate(X_valid, Y_valid)

In [None]:
# Create Array for Epochs
epochs = np.arange(1,11)
# Best epoch for accuracy
best_acc_epoch = np.argmax(history7.history['val_accuracy'])
# Best epoch for loss
best_loss_epoch = np.argmin(history7.history['val_loss'])

# Plot accuracy
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(epochs, history7.history['accuracy'], label='Training Accuracy', color='red')
plt.plot(epochs, history7.history['val_accuracy'], label='Validation Accuracy', color='green')
plt.scatter(best_acc_epoch+1, history7.history['val_accuracy'][best_acc_epoch], color='blue', label=f'Best Epoch = {best_acc_epoch+1}')
plt.title('Model Training and Validation Accuracy')
plt.ylim(0,1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(epochs, history7.history['loss'], label='Training Loss', color='red')
plt.plot(epochs, history7.history['val_loss'], label='Validation Loss', color='green')
plt.scatter(best_loss_epoch+1, history7.history['val_loss'][best_loss_epoch], color='blue', label=f'Best Epoch = {best_loss_epoch+1}')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
# Generate predictions
Y_pred = model5.predict(X_valid)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_valid, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_valid.argmax(axis=1), Y_pred.argmax(axis=1))

# Plot confusion matrix
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: class weight')
plt.show()

In [None]:
loss, accuracy, auc, precision, recall = model5.evaluate(X_test, Y_test)
print(f"Loss : {loss}")
print(f"Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
# Generate predictions
Y_pred = model5.predict(X_test)
Y_pred = (Y_pred > 0.5) # Assuming one-hot encoded labels
y_true = np.argmax(Y_test, axis=1)

# Compute confusion matrix
class_names = ['NORM','MI', 'STTC','CD']
cm = confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
cmp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix: class weight')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support

#calculate precision,recall and F1score
precision, recall, f1score, support = precision_recall_fscore_support(Y_test.argmax(axis=1), Y_pred.argmax(axis=1))

# tranform the former label dictionary into list
label_list = class_names

# print precision, recall, f1score and he sample size for each class
for i, (p, r, f, s) in enumerate(zip(precision, recall, f1score, support)):
    label = label = label_list[i]
    print(f'{label}: Precision={p:.3f}, Recall={r:.3f}, F1 Score={f:.3f}, Support={s}')

In [None]:
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle
import sklearn.metrics as metrics
from sklearn.metrics import RocCurveDisplay
# Binarize The Target By One-Hot-Encoding In A OvR Fashion
label_binarizer = LabelBinarizer().fit(Y_train)
y_onehot_test = label_binarizer.transform(Y_test)
n_classes = Y_test.shape[1]
y_label = Y_test
y_pred = model5.predict(X_test)

# Plot ROC and AUC for Baseline Model Validation
fig, ax = plt.subplots(figsize=(9, 6))
colors = cycle(["dodgerblue", "tomato", "goldenrod", "seagreen"])
for class_id, color in zip(range(n_classes), colors):
    RocCurveDisplay.from_predictions(
        y_onehot_test[:, class_id],
        y_pred[:, class_id],
        name=f"Class {class_id}",
        ax=ax,
        color=color,
        plot_chance_level=(class_id == 3),
    )

_ = ax.set(
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    title="ROC Curve for multi-class classification",
)

In [None]:
plot_class_specific_confusion_matrices(y_true, y_pred, class_names)