# Model Testing

Questions to be answered:

- Remove outliers?
- Which Sampling method to use?


## Imports

In [22]:
import sys
sys.path.append('..')

from src.utils.preprocessing import (
    prepare_mitbih, 
    prepare_ptbdb,
    resample_training
)
from src.utils.visualization import plot_confusion_matrix
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import numpy as np

ModuleNotFoundError: No module named 'src.utils.visualization'

In [18]:
# Helpers

def eval_model(model, X_tr, y_tr, X_va, y_va, X_te, y_te):
    model.fit(X_tr, y_tr)
    yv = model.predict(X_va)
    yt = model.predict(X_te)

    acc_v = accuracy_score(y_va, yv)
    p_v, r_v, f1_v, _ = precision_recall_fscore_support(y_va, yv, average='macro', zero_division=0)
    cm_v = confusion_matrix(y_va, yv)

    acc_t = accuracy_score(y_te, yt)
    p_t, r_t, f1_t, _ = precision_recall_fscore_support(y_te, yt, average='macro', zero_division=0)
    cm_t = confusion_matrix(y_te, yt)

    # using macro averages to account for class imbalance
    return {
        'val': {'accuracy': acc_v, 'precision_macro': p_v, 'recall_macro': r_v, 'f1_macro': f1_v, 'confusion_matrix': cm_v},
        'test': {'accuracy': acc_t, 'precision_macro': p_t, 'recall_macro': r_t, 'f1_macro': f1_t, 'confusion_matrix': cm_t},
    }

results = {}


In [15]:
# Prepare datasets
mitbih = prepare_mitbih(remove_outliers=False)
ptbdb = prepare_ptbdb(remove_outliers=False)

print("MITBIH dataset prepared:")
print(f"  Training size: {mitbih.X_train.shape}")
print(f"  Validation size: {mitbih.X_val.shape if mitbih.X_val is not None else 'None'}")
print(f"  Test size: {mitbih.X_test.shape if mitbih.X_test is not None else 'None'}")

print("\nPTBDB dataset prepared:")
print(f"  Training size: {ptbdb.X_train.shape}")
print(f"  Validation size: {ptbdb.X_val.shape if ptbdb.X_val is not None else 'None'}")
print(f"  Test size: {ptbdb.X_test.shape if ptbdb.X_test is not None else 'None'}")

MITBIH dataset prepared:
  Training size: (78798, 187)
  Validation size: (8756, 187)
  Test size: (21892, 187)

PTBDB dataset prepared:
  Training size: (10472, 187)
  Validation size: (1164, 187)
  Test size: (2909, 187)


## Load Data

In [16]:
X_train, X_val, X_test = mitbih.X_train.values, mitbih.X_val.values, mitbih.X_test.values
y_train = mitbih.y_train.astype(int).values
y_val = mitbih.y_val.astype(int).values
y_test = mitbih.y_test.astype(int).values

# Scale features using train fit only
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)


## Test models with Randomized Search CV

In [17]:
# Logistic Regression (multinomial)
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(max_iter=10000, multi_class='multinomial', solver='lbfgs')
results['LogisticRegression'] = eval_model(
    logreg,
    X_train_s, y_train,
    X_val_s, y_val,
    X_test_s, y_test,
)

results['LogisticRegression']


{'val': {'accuracy': 0.917428049337597,
  'precision_macro': 0.7980497177958749,
  'recall_macro': 0.6053252190815922,
  'f1_macro': 0.6716911485314008,
  'confusion_matrix': array([[7121,   18,   90,    1,   18],
         [ 108,  105,    7,    1,    1],
         [ 352,    5,  202,    9,   11],
         [  39,    0,    5,   20,    0],
         [  51,    1,    6,    0,  585]])},
 'test': {'accuracy': 0.9151288141786954,
  'precision_macro': 0.7908156208229149,
  'recall_macro': 0.5959543230646045,
  'f1_macro': 0.6635732728702419,
  'confusion_matrix': array([[17847,    35,   194,     8,    34],
         [  295,   233,    26,     1,     1],
         [  894,    15,   480,    32,    27],
         [   95,     0,     8,    59,     0],
         [  174,     1,    18,     0,  1415]])}}

In [None]:
plot_confusion_matrix(results['LogisticRegression']['test']['confusion_matrix'], 
                      classes=[0,1,2,3,4], 
                      title='Logistic Regression - Test Set Confusion Matrix', 
                      normalize=True)

In [19]:
# KNN
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
results['KNN'] = eval_model(
    knn,
    X_train_s, y_train,
    X_val_s, y_val,
    X_test_s, y_test,
)

results['KNN']


{'val': {'accuracy': 0.9744175422567383,
  'precision_macro': 0.9198913453303005,
  'recall_macro': 0.8393863390503522,
  'f1_macro': 0.8749828513027905,
  'confusion_matrix': array([[7204,   17,   20,    4,    3],
         [  69,  146,    6,    0,    1],
         [  59,    0,  512,    7,    1],
         [  12,    0,    8,   44,    0],
         [  14,    0,    3,    0,  626]])},
 'test': {'accuracy': 0.9725470491503746,
  'precision_macro': 0.9114463065990963,
  'recall_macro': 0.8207111591423262,
  'f1_macro': 0.8602976534726728,
  'confusion_matrix': array([[18017,    38,    51,     8,     4],
         [  194,   351,    11,     0,     0],
         [  132,     3,  1285,    25,     3],
         [   41,     0,    18,   103,     0],
         [   62,     1,    10,     0,  1535]])}}

In [20]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
results['RandomForest'] = eval_model( # using unscaled data - RF is not sensitive to feature scaling
    rf, 
    X_train, y_train,
    X_val, y_val,
    X_test, y_test,
)

results['RandomForest']


{'val': {'accuracy': 0.9749885792599361,
  'precision_macro': 0.9553214859683177,
  'recall_macro': 0.8080214011107666,
  'f1_macro': 0.8667295730141362,
  'confusion_matrix': array([[7228,    6,   10,    1,    3],
         [  84,  135,    2,    0,    1],
         [  59,    1,  514,    3,    2],
         [  18,    0,    9,   37,    0],
         [  18,    0,    2,    0,  623]])},
 'test': {'accuracy': 0.9737346976064316,
  'precision_macro': 0.963985119115103,
  'recall_macro': 0.8069288529747572,
  'f1_macro': 0.8702969780342661,
  'confusion_matrix': array([[18102,     4,     9,     0,     3],
         [  224,   330,     1,     0,     1],
         [  161,     0,  1269,    13,     5],
         [   50,     0,    11,   101,     0],
         [   88,     0,     5,     0,  1515]])}}

In [21]:
# SVM (RBF kernel)
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1.0, gamma='scale', decision_function_shape='ovr')
results['SVM'] = eval_model(
    svm,
    X_train_s, y_train,
    X_val_s, y_val,
    X_test_s, y_test,
)

results['SVM']


{'val': {'accuracy': 0.9716765646413887,
  'precision_macro': 0.9351555769656905,
  'recall_macro': 0.7928834068797076,
  'f1_macro': 0.8505421809238356,
  'confusion_matrix': array([[7223,   11,   13,    0,    1],
         [  86,  132,    4,    0,    0],
         [  61,    0,  510,    7,    1],
         [  22,    0,    7,   35,    0],
         [  32,    0,    3,    0,  608]])},
 'test': {'accuracy': 0.9686186734880322,
  'precision_macro': 0.9314371340040462,
  'recall_macro': 0.7738490927628444,
  'f1_macro': 0.8368317611472428,
  'confusion_matrix': array([[18083,    12,    19,     1,     3],
         [  238,   311,     7,     0,     0],
         [  162,     1,  1256,    25,     4],
         [   67,     0,     9,    86,     0],
         [  130,     0,     9,     0,  1469]])}}

In [23]:
# Compare models (macro-averaged)
import pandas as pd

rows = []
for name, res in results.items():
    rows.append({
        'model': name,
        'val_accuracy': res['val']['accuracy'],
        'val_f1_macro': res['val']['f1_macro'],
        'test_accuracy': res['test']['accuracy'],
        'test_f1_macro': res['test']['f1_macro'],
    })

comparison_df = pd.DataFrame(rows).sort_values(by=['val_f1_macro','test_f1_macro'], ascending=False).reset_index(drop=True)
comparison_df


Unnamed: 0,model,val_accuracy,val_f1_macro,test_accuracy,test_f1_macro
0,KNN,0.974418,0.874983,0.972547,0.860298
1,RandomForest,0.974989,0.86673,0.973735,0.870297
2,SVM,0.971677,0.850542,0.968619,0.836832
