In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Conv1D, MaxPooling1D, Flatten, Add, ReLU, LSTM, Reshape
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE, RandomOversampler

import pickle
import os
from contextlib import redirect_stdout

In [None]:
#import MIT data
df_mitbih_test = pd.read_csv('data/original/mitbih_test.csv', header = None)

X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
y_train = y_train['187']

X_train_sm = pd.read_csv('data/processed/X_train_sm.csv')
y_train_sm = pd.read_csv('data/processed/y_train_sm.csv')
y_train_sm = y_train_sm['187']

X_val = pd.read_csv('data/processed/X_val.csv')
y_val = pd.read_csv('data/processed/y_val.csv')
y_val = y_val['187']

X_test = df_mitbih_test.drop(187, axis = 1)
y_test = df_mitbih_test[187]


# Reshape the data for 1D CNN
X_train_sm_cnn = np.expand_dims(X_train_sm, axis=2)
X_val_cnn = np.expand_dims(X_val, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2) 

display(X_train_sm_cnn.shape)
display(X_val_cnn.shape)
display(X_test_cnn.shape)

In [None]:
#Function to plot and save validation accuracy and validation loss over epochs from history
def plot_training_history(history, save_dir, prefix): 
    hist = history.history
    metrics = [m for m in hist.keys() if not m.startswith('val_')]  

    # Create the output folder if it does not exist
    os.makedirs(save_dir, exist_ok=True)

    for m in metrics:
        plt.figure()
        plt.plot(hist[m], label=f'Train {m}')
        if f'val_{m}' in hist:
            plt.plot(hist[f'val_{m}'], label=f'Val {m}')
        plt.xlabel('Epoch')
        plt.ylabel(m)
        plt.title(f'{m} over epochs')
        plt.legend()
        plt.grid(True)

        # Construct filename with prefix and filepath with directory and filename
        filename = f"{prefix}_{m}.png"
        filepath = os.path.join(save_dir, filename)

        # Save the figure
        plt.savefig(filepath, format='png', dpi=300, bbox_inches='tight')
        print(f"Saved: {filepath}")
        plt.show()

In [None]:
#Used models
#CNN4, CNN4-3 Paper 2020, more BatchNormalization and Dropout layers added, LSTM3, 10 layers

# Input layer
inputs = Input(shape=(187, 1))

# CNN model without the final Dense layers
cnn_output = Sequential([
    Conv1D(filters=32, kernel_size=5, activation='relu', input_shape=(187, 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(filters=256, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3)
])(inputs)

# Reshape CNN output for LSTM input
reshaped_cnn_output = Reshape((-1, 256))(cnn_output)  

# LSTM model without the initial Input layer and final Dense layers
lstm_output = Sequential([
    LSTM(units=32, return_sequences=True, input_shape=reshaped_cnn_output.shape[1:]),
    LSTM(units=32, return_sequences=True),
    LSTM(units=32, return_sequences=True),
    LSTM(units=32, return_sequences=True),
    LSTM(units=32, return_sequences=True),
    LSTM(units=32, return_sequences=False)
])(reshaped_cnn_output)

# Fully connected layers
x = Dense(64, activation='relu')(lstm_output)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

# Output layer for 5 classes
outputs = Dense(5, activation='softmax')(x)

# Create combined model
cnn_lstm = Model(inputs=inputs, outputs=outputs)

In [None]:
cnn_lstm.summary()

In [None]:
#Compile model, change model when needed
cnn_lstm.compile(loss="sparse_categorical_crossentropy",
              optimizer=Adam(learning_rate=1e-3),
              metrics=["accuracy"]) 


#Define where and how to save the best model, note lr and bs
checkpoint = ModelCheckpoint(
    filepath='../CNNLSTM_output/cnnlstm_sm_lr_bs_epoch_{epoch:02d}_valloss_{val_loss:.4f}.keras',
    monitor='val_loss',        # metric to monitor
    mode='min',                # because higher accuracy is better
    save_best_only=True,       # only save when val_accuracy improves
    verbose=1                  # print message when a model is saved
)

In [None]:
history = cnn_lstm.fit(
    X_train_sm_cnn,
    y_train_sm,
    epochs=50,
    batch_size=128,
    validation_data=(X_val_cnn, y_val),  # original, unaltered validation set
    callbacks=[checkpoint]
)

In [None]:
#Save training history
with open("../CNNLSTM_output/cnnlstm_sm_lr_bs_epoch__valloss_.pkl", "wb") as f: #change for model
    pickle.dump(history.history, f)

    
best_model = load_model('../CNNLSTM_output/cnnlstm_sm_lr_bs_epoch__valloss_.keras') #change for model


#prediction of test data
test_pred = best_model.predict(X_test_cnn)
y_test_class = y_test
y_pred_class = np.argmax(test_pred, axis=1)


#classification report
print(classification_report(y_test_class, y_pred_class, digits=4))


#confusion matrix
print(pd.crosstab(y_test_class, y_pred_class, colnames=['Predictions']))


#save results of metrics
with open("../CNNLSTM_output/cnnlstm_sm_lr_bs_epoch__valloss_.txt", "w") as file: #change for model
    
    file.write("\nModel: CNN-LSTM\n")#change for model
        
    file.write("\nData augmentation: Smote\n")
    
    file.write("\nConfusion Matrix on test set:\n")
    file.write(str(pd.crosstab(y_test_class, y_pred_class, colnames=['Predictions'])))
    
    file.write("\n\nClassification Report on test set:\n")
    file.write(classification_report(y_test_class, y_pred_class, digits=4))

In [None]:
# plot and save validation accuracy and validation loss over epochs from history
plot_training_history(history, save_dir="../CNNLSTM_output", prefix="cnnlstm_sm_lr_bs_epoch__valloss_") #change for model