# CNN Model - 2018 Paper (Kachuee, Fazeli, Sarrafzadeh): CNN8

- with added dropout
- and Batch Normalization
- and one removed last fully connected layer

In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, ReLU, Add, MaxPooling1D, 
    Dropout, Flatten, Dense
)
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
)
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE, RandomOverSampler

from pathlib import Path
import re 

import pickle
import os
import json 
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
print(tf.config.list_physical_devices('GPU'))  # should show []
from contextlib import redirect_stdout

from src.visualization.visualization import plot_training_history

[]


In [4]:
SAMPLING_METHOD = "SMOTE"
REMOVE_OUTLIERS = False
model_name = "cnn8_sm"
OUTPUT_PATH = "src/models/CNN/"
results_csv = "reports/03_model_testing_results/05_CNN_model_comparison.csv"
EPOCHS = 50

#import MIT data
df_mitbih_test = pd.read_csv('data/original/mitbih_test.csv', header = None)

X_train = pd.read_csv('data/processed/mitbih/X_train.csv')
y_train = pd.read_csv('data/processed/mitbih/y_train.csv')
y_train = y_train['187']

X_train_sm = pd.read_csv('data/processed/mitbih/X_train_sm.csv')
y_train_sm = pd.read_csv('data/processed/mitbih/y_train_sm.csv')
y_train_sm = y_train_sm['187']

X_val = pd.read_csv('data/processed/mitbih/X_val.csv')
y_val = pd.read_csv('data/processed/mitbih/y_val.csv')
y_val = y_val['187']

X_test = df_mitbih_test.drop(187, axis = 1)
y_test = df_mitbih_test[187]


# Reshape the data for 1D CNN
X_train_sm_cnn = np.expand_dims(X_train_sm, axis=2)
X_val_cnn = np.expand_dims(X_val, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2) 

display(X_train_sm_cnn.shape)
display(X_val_cnn.shape)
display(X_test_cnn.shape)

(289885, 187, 1)

(17511, 187, 1)

(21892, 187, 1)

In [5]:
# Input layer
input_layer = Input(shape=(187, 1))

# Initial convolution
conv_0 = Conv1D(filters=32, kernel_size=5, padding='same')(input_layer)

# Residual Block 1
x = BatchNormalization()(conv_0)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = Add()([conv_0, x])
x = ReLU()(x)
x = MaxPooling1D(pool_size=5, strides=2, padding='same')(x)
x = Dropout(0.1)(x)

# Residual Block 2
shortcut = x
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = Add()([shortcut, x])
x = ReLU()(x)
x = MaxPooling1D(pool_size=5, strides=2, padding='same')(x)
x = Dropout(0.1)(x)

# Residual Block 3
shortcut = x
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = Add()([shortcut, x])
x = ReLU()(x)
x = MaxPooling1D(pool_size=5, strides=2, padding='same')(x)
x = Dropout(0.2)(x)

# Residual Block 4
shortcut = x
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = Add()([shortcut, x])
x = ReLU()(x)
x = MaxPooling1D(pool_size=5, strides=2, padding='same')(x)
x = Dropout(0.3)(x)

# Residual Block 5
shortcut = x
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(32, 5, padding='same')(x)
x = BatchNormalization()(x)
x = Add()([shortcut, x])
x = ReLU()(x)
x = MaxPooling1D(pool_size=5, strides=2, padding='same')(x)
x = Dropout(0.3)(x)

# Fully connected layers
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.3)(x) 
output_layer = Dense(5, activation='softmax')(x)

# Model
cnn = Model(inputs=input_layer, outputs=output_layer)

# Summary
cnn.summary()


In [6]:
# Learning rate with exponential decay
initial_learning_rate = 0.001
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.75,
    staircase=True
)

# Adam optimizer with specified hyperparameters
optimizer = Adam(
    learning_rate=lr_schedule,
    beta_1=0.9,
    beta_2=0.999
)


# Compile the model
cnn.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=8,
    min_delta=1e-4,
    restore_best_weights=True,
    verbose=1
)

# Define where and how to save the best model
checkpoint = ModelCheckpoint(
    filepath=OUTPUT_PATH+model_name+'_bs_epoch_{epoch:02d}_valloss_{val_loss:.4f}.keras',   # file path (can be .keras or .h5)
    monitor='val_loss',        # metric to monitor
    mode='min',                    # because higher accuracy is better
    save_best_only=True,           # only save when val_accuracy improves
    verbose=1                      # print message when a model is saved
)

In [7]:
history = cnn.fit(
    X_train_sm_cnn,
    y_train_sm,
    epochs=EPOCHS,
    batch_size=128,
    validation_data=(X_val_cnn, y_val),  
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/50
[1m2264/2265[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 45ms/step - accuracy: 0.6639 - loss: 1.2976
Epoch 1: val_loss improved from None to 1.07010, saving model to src/models/CNN/cnn8_sm_bs_epoch_01_valloss_1.0701.keras
[1m2265/2265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 45ms/step - accuracy: 0.7970 - loss: 0.6144 - val_accuracy: 0.5114 - val_loss: 1.0701
Epoch 2/50
[1m2265/2265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.9263 - loss: 0.2232
Epoch 2: val_loss improved from 1.07010 to 0.75944, saving model to src/models/CNN/cnn8_sm_bs_epoch_02_valloss_0.7594.keras
[1m2265/2265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 44ms/step - accuracy: 0.9367 - loss: 0.1945 - val_accuracy: 0.6734 - val_loss: 0.7594
Epoch 3/50
[1m2264/2265[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 46ms/step - accuracy: 0.9559 - loss: 0.1376
Epoch 3: val_loss improved from 0.75944 to 0.64795, saving model to src/mo

In [8]:

def parse_epoch_from_name(name, default_epochs=EPOCHS):
    # Expect pattern like ..._epoch_12_...; returns int if found else default
    m = re.search(r"epoch_(\d+)", name)
    return int(m.group(1)) if m else default_epochs

def parse_val_loss_from_name(name):
    # Expect pattern like ..._valloss_0.1234.keras
    m = re.search(r"valloss_([0-9]+\.[0-9]+)", name)
    return float(m.group(1)) if m else np.nan

# Safer file filtering
model_dir = Path(OUTPUT_PATH)
model_paths = sorted([p for p in model_dir.glob("*.keras")])
model_paths = [p for p in model_paths if model_name in model_paths]

all_labels = np.unique(y_test)  # ground-truth labels present in test set
rows = []

print(all_labels)

for p in model_paths:
    print(p)
    model_ = load_model(str(p))

    y_pred = model_.predict(X_test_cnn)
    y_pred_class = np.argmax(y_pred, axis=1)

    # Force consistent label space for metrics
    print(classification_report(y_test, y_pred_class, digits=4))
    report = classification_report(
        y_test, y_pred_class, labels=all_labels, output_dict=True, zero_division=0
    )

    print(pd.crosstab(y_test, y_pred_class, colnames=['Predictions']))

    accuracy = accuracy_score(y_test, y_pred_class)
    epoch_num = parse_epoch_from_name(p.name)
    val_loss = parse_val_loss_from_name(p.name)

    row = {
        "sampling_method": SAMPLING_METHOD,
        "outliers_removed": REMOVE_OUTLIERS,
        "epochs": epoch_num,
        "model": p.name,
        "val_loss": round(float(val_loss), 4) if not np.isnan(val_loss) else np.nan,
        "test_accuracy": round(float(accuracy), 4),
        "test_f1_macro": round(float(report["macro avg"]["f1-score"]), 4),
        "test_precision_macro": round(float(report["macro avg"]["precision"]), 4),
        "test_recall_macro": round(float(report["macro avg"]["recall"]), 4),
        "test_f1_weighted": round(float(report["weighted avg"]["f1-score"]), 4),
        "test_precision_weighted": round(float(report["weighted avg"]["precision"]), 4),
        "test_recall_weighted": round(float(report["weighted avg"]["recall"]), 4),
    }
    for lbl in all_labels:
        row[f"test_f1_cls_{int(lbl)}"] = round(float(report[str(lbl)]["f1-score"]), 4)
        row[f"test_precision_cls_{int(lbl)}"1] = round(float(report[str(lbl)]["precision"]), 4)
        row[f"test_recall_cls_{int(lbl)}"] = round(float(report[str(lbl)]["recall"]), 4)
        row[f"test_support_cls_{int(lbl)}"] = int(report[str(lbl)]["support"])

    rows.append(row)

df = pd.DataFrame(rows)
os.makedirs(os.path.dirname(results_csv), exist_ok=True)
if os.path.exists(results_csv):
    df.to_csv(results_csv, mode='a', index=False, header=False)
else:
    df.to_csv(results_csv, index=False)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (311444982.py, line 56)

In [None]:
plot_training_history(history, "reports/figures/training_history/", model_name)