# CNN Models

--> Results for Table 05

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline 

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from src.utils.dl_architectures import cnn1, cnn2, cnn3, cnn4, cnn5, cnn6, cnn7, cnn8, cnn9

from sklearn.metrics import accuracy_score, classification_report, f1_score

from pathlib import Path
import re 

import pickle
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
print(tf.config.list_physical_devices('GPU'))  # should show []
from contextlib import redirect_stdout
import json
from collections import Counter

from src.visualization.visualization import plot_training_history 
from src.visualization.confusion_matrix import plot_confusion_matrix

In [None]:
SAMPLING_METHOD = "SMOTE"
REMOVE_OUTLIERS = False
OUTPUT_PATH = "src/models/MIT_02_03_dl_models/CNN/"
REPORTS_PATH = "reports/04_dl_models/" 
results_csv = REPORTS_PATH+"05_DL_model_comparison.csv"
models = {"cnn1_sm": {}, "cnn2_sm": {}, "cnn3_sm": {}, "cnn4_sm": {}, 'cnn5_sm': {},
          "cnn6_sm": {}, "cnn7_sm": {}, "cnn8_sm": {}, "cnn9_sm": {}}
model_names = list(models.keys())

#import MIT data
df_mitbih_test = pd.read_csv('data/original/mitbih_test.csv', header = None)

#X_train = pd.read_csv('data/processed/mitbih/X_train.csv')
#y_train = pd.read_csv('data/processed/mitbih/y_train.csv')
#y_train = y_train['187']

X_train_sm = pd.read_csv('data/processed/mitbih/X_train_sm.csv')
y_train_sm = pd.read_csv('data/processed/mitbih/y_train_sm.csv')
y_train_sm = y_train_sm['187']

X_val = pd.read_csv('data/processed/mitbih/X_val.csv')
y_val = pd.read_csv('data/processed/mitbih/y_val.csv')
y_val = y_val['187']

X_test = df_mitbih_test.drop(187, axis = 1)
y_test = df_mitbih_test[187]

# Optional: Reduce dataset size for quicker testing
if False:
    from sklearn.model_selection import train_test_split
    # Subsample training set to 10 % (keeping all classes)
    X_train_small, _, y_train_small, _ = train_test_split(
        X_train_sm, y_train_sm,
        train_size=0.05,
        stratify=y_train_sm,
        random_state=42
    )

    # Subsample test set to 10 % as well
    X_test_small, _, y_test_small, _ = train_test_split(
        X_test, y_test,
        train_size=0.05,
        stratify=y_test,
        random_state=42
    )

    print("Reduced MIT-BIH dataset")
    print(f"\tTraining size: {X_train_small.shape}, {y_train_small.shape}")
    print(f"\tTest size: {X_test_small.shape}, {y_test_small.shape}")

    # Assign back for your pipeline
    X_train_sm, y_train_sm = X_train_small, y_train_small
    X_test,  y_test  = X_test_small,  y_test_small

# Reshape the data for 1D CNN
X_train_sm_cnn = np.expand_dims(X_train_sm, axis=2)
X_val_cnn = np.expand_dims(X_val, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2) 

display(X_train_sm_cnn.shape)
display(X_val_cnn.shape)
display(X_test_cnn.shape)


def parse_epoch_from_name(name, default_epochs=512):
    # Expect pattern like ..._epoch_12_...; returns int if found else default
    m = re.search(r"epoch_(\d+)", name)
    return int(m.group(1)) if m else default_epochs

def parse_val_loss_from_name(name):
    # Expect pattern like ..._valloss_0.1234.keras
    m = re.search(r"valloss_([0-9]+\.[0-9]+)", name)
    return float(m.group(1)) if m else np.nan

In [None]:
models["cnn1_sm"]["model"] = cnn1
models["cnn2_sm"]["model"] = cnn2
models["cnn3_sm"]["model"] = cnn3
models["cnn4_sm"]["model"] = cnn4
models["cnn5_sm"]["model"] = cnn5
models["cnn6_sm"]["model"] = cnn6
models["cnn7_sm"]["model"] = cnn7
models["cnn8_sm"]["model"] = cnn8
models["cnn9_sm"]["model"] = cnn9

In [None]:
#Model summary
cnn1.summary()

cnn2.summary()

cnn3.summary()

cnn4.summary()

cnn5.summary()

cnn6.summary()

cnn7.summary()

cnn8.summary()

cnn9.summary()

In [None]:
def append_metrics_to_csv(metrics: dict, csv_path: str):
    """
    Appends or updates a row in the CSV based on a unique key:
    (model_name, batch_size, training_size, lr_start, lr_schedule).

    If a matching row exists, it is replaced with the new row.
    All other rows remain unchanged.
    """

    df_new = pd.DataFrame([metrics])

    # identifier columns for uniqueness
    key_cols = ["model_name", "batch_size", "training_size", "lr_start", "lr_schedule"]

    if os.path.exists(csv_path):
        df_existing = pd.read_csv(csv_path)

        # ensure column alignment
        all_cols = sorted(set(df_existing.columns).union(df_new.columns))
        df_existing = df_existing.reindex(columns=all_cols)
        df_new = df_new.reindex(columns=all_cols)

        # remove row(s) with matching key
        mask_match = np.ones(len(df_existing), dtype=bool)
        for col in key_cols:
            mask_match &= (df_existing[col] == df_new.iloc[0][col])

        df_existing = df_existing[~mask_match]  # drop matching row(s)

        # append new row
        df_combined = pd.concat([df_existing, df_new], ignore_index=True)

        # save
        df_combined.to_csv(csv_path, index=False)

    else:
        # create new CSV
        df_new.to_csv(csv_path, index=False)


In [None]:
for model_name, model in models.items():
    print("*"*80)
    print("*"*5,'\t',model_name,'\t',"*"*5)
    print("*"*80)

    BATCH_SIZE = 512
    EPOCHS = 200

    initial_learning_rate = 1e-3
    lr_schedule = ExponentialDecay(
        initial_learning_rate,
        decay_steps=1000,
        decay_rate=0.96)


    #Early stopping
    early_stop = EarlyStopping(
        monitor='val_loss',        # what to monitor 
        patience=20,               # how many epochs with no improvement before stopping
        restore_best_weights=True, 
        min_delta=0.001            #only stop if improvement < 0.001
    )


    cf = model["model"]

    #Compile model, change model when needed
    cf.compile(
        optimizer=Adam(learning_rate=lr_schedule),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])


    #Define where and how to save the best model, note lr and bs
    checkpoint = ModelCheckpoint(
        filepath=f'{OUTPUT_PATH}{model_name}_BS{BATCH_SIZE}_best.keras',
        monitor='val_loss',            # metric to monitor
        mode='min',                    # minimize loss
        save_best_only=True,          
        verbose=1                      # print message when a model is saved
    )

    #Training
    history = cf.fit(                      
        X_train_sm_cnn,
        y_train_sm,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(X_val_cnn, y_val),
        callbacks=[checkpoint, early_stop] 
    )

    # ----------------------------
    # 1) Basic training info
    # ----------------------------
    hist = history.history
    n_epochs = len(hist["loss"])

    # best epoch according to val_loss
    best_epoch = int(np.argmin(hist["val_loss"]))
    last_epoch = n_epochs - 1

    # ----------------------------
    # 2) Predictions for test
    # ----------------------------

    # !!! make sure to set restore_best_weights=True in early stopping
    # or load best model before predicting 
    y_prob = cf.predict(X_test)
    y_pred = np.argmax(y_prob, axis=1)

    # ----------------------------
    # 3) F1-macro and per-class F1 on TEST
    # ----------------------------
    f1_macro = f1_score(y_test, y_pred, average="macro")
    f1_per_class = f1_score(y_test, y_pred, average=None)  # array (0–4)
    acc = accuracy_score(y_test, y_pred)

    # ----------------------------
    # 4) Build output dictionary
    # ----------------------------
    # flatten F1-per-class into separate columns
    f1_class_columns = {
        f"test_f1_class_{i}": float(score) 
        for i, score in enumerate(f1_per_class)
    }

    metrics = {
        "model_name": model_name,
        "batch_size": BATCH_SIZE,
        "training_size": X_train_sm_cnn.shape[0],
        "lr_start": initial_learning_rate,
        "lr_schedule": 'EXP_DECAY',
        "best_epoch": best_epoch,
        "last_epoch": last_epoch,
        
        # best epoch values (from history)
        "train_loss_best": float(hist["loss"][best_epoch]),
        "val_loss_best": float(hist["val_loss"][best_epoch]),
        "train_acc_best": float(hist["accuracy"][best_epoch]),
        "val_acc_best": float(hist["val_accuracy"][best_epoch]),

        # last epoch values
        "train_loss_last": float(hist["loss"][last_epoch]),
        "val_loss_last": float(hist["val_loss"][last_epoch]),
        "train_acc_last": float(hist["accuracy"][last_epoch]),
        "val_acc_last": float(hist["val_accuracy"][last_epoch]),

        # TEST metrics
        "test_f1_macro": float(f1_macro),
        "test_accuracy": float(acc)
    }

    # merge F1-per-class columns
    metrics.update(f1_class_columns)


    append_metrics_to_csv(metrics, csv_path=results_csv)


    with open(f"{OUTPUT_PATH}{model_name}_BS{BATCH_SIZE}_full.pkl", "wb") as f: #change for model
        pickle.dump(history.history, f)

    fig_cm, ax_cm = plot_confusion_matrix(
        y_true=y_test,
        y_pred=y_pred,
        normalize=True,
        class_names=["1","2","3","4","5"],
        title=f"Confusion Matrix — {model_name}"
    )


    fig_cm.savefig(
        f"{REPORTS_PATH}/{model_name}_BS{BATCH_SIZE}_confusion_matrix.png",
        dpi=300,
        bbox_inches="tight"
    )
    plt.close(fig_cm)

    plot_training_history(
        history=history,                     # raw history
        save_dir=REPORTS_PATH,               # where plots go
        prefix=f"{model_name}_BS{BATCH_SIZE}_training_history"  # prefix for filenames
    )
    