In [None]:
#Run on Google Colab to use GPU T4

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.callbacks import ReduceLROnPlateau

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE, RandomOversampler

import pickle
import os
from contextlib import redirect_stdout

In [None]:
#import MIT data
df_mitbih_test = pd.read_csv('data/original/mitbih_test.csv', header = None)

X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
y_train = y_train['187']

X_train_sm = pd.read_csv('data/processed/X_train_sm.csv')
y_train_sm = pd.read_csv('data/processed/y_train_sm.csv')
y_train_sm = y_train_sm['187']

X_val = pd.read_csv('data/processed/X_val.csv')
y_val = pd.read_csv('data/processed/y_val.csv')
y_val = y_val['187']

X_test = df_mitbih_test.drop(187, axis = 1)
y_test = df_mitbih_test[187]


# Reshape the data for 1D CNN
X_train_sm_cnn = np.expand_dims(X_train_sm, axis=2)
X_val_cnn = np.expand_dims(X_val, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2) 

display(X_train_sm_cnn.shape)
display(X_val_cnn.shape)
display(X_test_cnn.shape)

In [None]:
#Function to plot and save validation accuracy and validation loss over epochs from history
def plot_training_history(history, save_dir, prefix): 
    hist = history.history
    metrics = [m for m in hist.keys() if not m.startswith('val_')]  

    # Create the output folder if it does not exist
    os.makedirs(save_dir, exist_ok=True)

    for m in metrics:
        plt.figure()
        plt.plot(hist[m], label=f'Train {m}')
        if f'val_{m}' in hist:
            plt.plot(hist[f'val_{m}'], label=f'Val {m}')
        plt.xlabel('Epoch')
        plt.ylabel(m)
        plt.title(f'{m} over epochs')
        plt.legend()
        plt.grid(True)

        # Construct filename with prefix and filepath with directory and filename
        filename = f"{prefix}_{m}.png"
        filepath = os.path.join(save_dir, filename)

        # Save the figure
        plt.savefig(filepath, format='png', dpi=300, bbox_inches='tight')
        print(f"Saved: {filepath}")
        plt.show()

In [None]:
#Used DNNs

#CNN1
cnn1 = Sequential([
    # First 1D Conv layer
    Input((187, 1)),
    Conv1D(filters=64, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Second 1D Conv layer
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Third 1D Conv layer
    Conv1D(filters=256, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Flatten and dense layers
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(5, activation='softmax')  
])


#CNN2, CNN4 Paper 2020
cnn2 = Sequential([
    Input((187, 1)),
    
    # First Conv1D layer
    Conv1D(filters=32, kernel_size=5, activation='relu'),
    MaxPooling1D(pool_size=2),

    # Second Conv1D layer
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),

    # Third Conv1D layer
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    MaxPooling1D(pool_size=2),

    # Fourth Conv1D layer (256 filters, kernel size 3)
    Conv1D(filters=256, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),

    # Flatten and dense layers
    Flatten(),
    Dense(64, activation='relu'),

    # Output layer for 5 classes
    Dense(5, activation='softmax')
])


#CNN3, CNN4-2 Paper 2020, BatchNormalization and Dropout layers added
cnn3 = Sequential([
    Input((187, 1)),
    
    # First Conv1D layer
    Conv1D(filters=32, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Second Conv1D layer
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Third Conv1D layer
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Fourth Conv1D layer (256 filters, kernel size 3)
    Conv1D(filters=256, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Flatten and dense layers
    Flatten(),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),

    # Output layer for 5 classes
    Dense(5, activation='softmax')
])


#CNN4, CNN4-3 Paper 2020, more BatchNormalization and Dropout layers added
cnn4 = Sequential([
    Input((187, 1)),
    # First Conv1D layer
    Conv1D(filters=32, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Second Conv1D layer
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Third Conv1D layer
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Fourth Conv1D layer (256 filters, kernel size 3)
    Conv1D(filters=256, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    # Flatten and dense layers
    Flatten(),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    # Output layer for 5 classes
    Dense(5, activation='softmax')
])

In [None]:
#Model summary
cnn1.summary()

cnn2.summary()

cnn3.summary()

cnn4.summary()

In [None]:
#reduce lr when plateau
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Metric to monitor
    factor=0.1,          # Factor by which the learning rate will be reduced
    patience=5,          # Number of epochs with no improvement after which learning rate is reduced
    min_lr=1e-6,         # Minimum learning rate
    verbose=1           
)


#Compile model, change model when needed
cnn1.compile(loss="sparse_categorical_crossentropy",
              optimizer=Adam(learning_rate=1e-3),
              metrics=["accuracy"]) 


#Define where and how to save the best model, note lr and bs
checkpoint = ModelCheckpoint(
    filepath='../CNN_output/cnn1_sm_lr_bs_epoch_{epoch:02d}_valloss_{val_loss:.4f}.keras',
    monitor='val_loss',        # metric to monitor
    mode='min',                # because higher accuracy is better
    save_best_only=True,       # only save when val_accuracy improves
    verbose=1                  # print message when a model is saved
)

In [None]:
#Training
history = cnn1.fit(                      # change cnn1 when using different model architecture
    X_train_sm_cnn,
    y_train_sm,
    epochs=100,                          # change when needed
    batch_size=128,                      # change when needed
    validation_data=(X_val_cnn, y_val),  # unaltered validation set
    callbacks=[checkpoint]               # add here reduce_lr when needed
)

In [None]:
#Save training history
with open("../CNN_output/cnn1_sm_lr_bs_epoch__valloss_.pkl", "wb") as f: #change for model
    pickle.dump(history.history, f)

    
best_model = load_model('../CNN_output/cnn1_sm_lr_bs_epoch__valloss_.keras') #change for model


#prediction of test data
test_pred = best_model.predict(X_test_cnn)
y_test_class = y_test
y_pred_class = np.argmax(test_pred, axis=1)


#classification report
print(classification_report(y_test_class, y_pred_class, digits=4))


#confusion matrix
print(pd.crosstab(y_test_class, y_pred_class, colnames=['Predictions']))


#save results of metrics
with open("../CNN_output/cnn1_sm_lr_bs_epoch__valloss_.txt", "w") as file: #change for model
    
    file.write("\nModel: CNN1\n")#change for model
        
    file.write("\nData augmentation: Smote\n")
    
    file.write("\nConfusion Matrix on test set:\n")
    file.write(str(pd.crosstab(y_test_class, y_pred_class, colnames=['Predictions'])))
    
    file.write("\n\nClassification Report on test set:\n")
    file.write(classification_report(y_test_class, y_pred_class, digits=4))

In [None]:
# plot and save validation accuracy and validation loss over epochs from history
plot_training_history(history, save_dir="../CNN_output", prefix="cnn1_sm_lr_bs_epoch__valloss_") #change for model