# Model Testing

Questions to be answered:

- Remove outliers?
- Which Sampling method to use?


In [1]:
scoring = {'f1_macro': 'f1_macro', 'bal_acc': 'balanced_accuracy', 'f1_weighted': 'f1_weighted'}
random_state = 42

## 1. Imports

In [2]:
import sys
import os 
sys.path.append('..')

from src.utils.preprocessing import (
    prepare_mitbih, 
    prepare_ptbdb,
    resample_training
)
from src.utils.evaluation import eval_model
from src.visualization import plot_confusion_matrix
from src.utils.model_saver import create_model_saver

# external 
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from scipy.stats import loguniform, randint, uniform
import numpy as np
import re
import json

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

# Samplers

from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from imblearn.combine import SMOTETomek, SMOTEENN
from sklearn.preprocessing import StandardScaler

# Init model saver
model_saver = create_model_saver("../src/models/exploration_phase")

In [3]:
# Prepare datasets
mitbih = prepare_mitbih(remove_outliers=False)

print("MITBIH dataset prepared:")
print(f"  Training size: {mitbih.X_train.shape}")
print(f"  Test size: {mitbih.X_test.shape if mitbih.X_test is not None else 'None'}")
print("Note: No validation set - using train/test split only. Cross-validation handles train/val splitting.")


MITBIH dataset prepared:
  Training size: (87554, 187)
  Test size: (21892, 187)
Note: No validation set - using train/test split only. Cross-validation handles train/val splitting.


## 2. Load Data

In [4]:
X_train, X_test = mitbih.X_train.values, mitbih.X_test.values
y_train = mitbih.y_train.astype(int).values
y_test = mitbih.y_test.astype(int).values

# Scale features using train fit only
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)


### Create Param Spaces

In [5]:
param_spaces = {
    "LogisticRegression": {
        "estimator": LogisticRegression(max_iter=10000, multi_class='multinomial', solver='lbfgs', n_jobs=-1),
        "params": {
            "C": loguniform(1e-3, 1e3),      # Big C = less penalty on large weights (more freedom, risk of overfitting). 
                                             # Small C = more penalty (more discipline, less overfitting).
                                             # loguniform = means we try values spread across tiny to big scales (e.g., 0.001 up to 100), not just small steps.
            "penalty": ["l2"], # gently pushes weights toward zero, which keeps the model simpler and more stable.
            "solver": ["lbfgs"],
        },
        "cv": StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        "n_iter": 5,
        "create_new_model": False,
    },
    "KNN": {
        "estimator": KNeighborsClassifier(n_jobs=-1),
        "params": {
            "n_neighbors": randint(1, 51),
            "weights": ["uniform", "distance"],
            "metric": ["minkowski", "manhattan", "euclidean"],
            "p": [1, 2],
        },
        "cv": StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        "n_iter": 5,
    },
    "RandomForest": {
        "estimator": RandomForestClassifier(random_state=random_state, n_jobs=-1),
        "params": {
            "n_estimators": [100, 200, 300],
            "max_depth": [10, 15, 20],
            "min_samples_split": [2, 5, 10, 20, 50],
            "min_samples_leaf": [1, 2, 4, 8],
            "max_features": ["sqrt", "log2", None],
            "bootstrap": [True],
            "class_weight": ["balanced", None],
            "criterion": ["gini", "entropy"],
        },
        "cv": StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        "n_iter": 5,
    },
    "SVM": {
        "estimator": SVC(),
        "params": {
            "kernel": ["rbf", "poly"],
            "C": [0.1, 1, 10],
            "gamma": [0.001, 0.01, 0.1, 0.5, 0.9],
        },
        "cv": StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state),
        "n_iter": 5,
    },
    "DecisionTree": {
        "estimator": DecisionTreeClassifier(random_state=random_state),
        "params": {
            "max_depth": [None, 5, 10, 15, 20, 25, 30],
            "min_samples_split": [2, 5, 10, 20, 50],
            "min_samples_leaf": [1, 2, 4, 8, 16],
            "max_features": ["sqrt", "log2", None],
            "criterion": ["gini", "entropy"],
            "class_weight": ["balanced", None],
            "splitter": ["best", "random"],
        },
        "cv": StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        "n_iter": 100,
    },
    "XGBoost": {
        "estimator": xgb.XGBClassifier(
            objective="multi:softmax",
            num_class=5,
            random_state=random_state,
            n_jobs=-1,
            eval_metric="mlogloss",
        ),
        "params": {
            "n_estimators": [100, 200, 300, 500],
            "max_depth": [3, 4, 5, 6, 7, 8],
            "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3],
            "subsample": [0.8, 0.9, 1.0],
            "colsample_bytree": [0.8, 0.9, 1.0],
            "reg_alpha": [0, 0.1, 0.5, 1.0],
            "reg_lambda": [0, 0.1, 0.5, 1.0],
            "min_child_weight": [1, 3, 5, 7],
            "gamma": [0, 0.1, 0.2, 0.3],
        },
        "cv": StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        "n_iter": 40,
    },
    "LDA": {
        "estimator": LinearDiscriminantAnalysis(),
        "params": [
            {"solver": ["svd"], "store_covariance": [False, True], "tol": [1e-4, 1e-3, 1e-2]},
            {"solver": ["lsqr", "eigen"], "shrinkage": [None, "auto", 0.0, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.9], "tol": [1e-4, 1e-3, 1e-2]},
        ],
        "cv": StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state),
        "n_iter": 50,
    },
    "ANN": {
        "estimator": MLPClassifier(
            max_iter=300,
            early_stopping=True,
            random_state=random_state,
            n_iter_no_change=10,
            solver="adam",
        ),
        "params": {
            "hidden_layer_sizes": [(64,), (128,), (128, 64)],
            "activation": ["relu"],
            "alpha": loguniform(1e-4, 1e-2),
            "learning_rate_init": loguniform(1e-3, 1e-2),
            "batch_size": randint(64, 129),
            "beta_1": uniform(0.9, 0.09),
            "beta_2": uniform(0.95, 0.049),
            "validation_fraction": [0.1, 0.15],
        },
        "cv": StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state),
        "n_iter": 100,
    },
}

## 3. Test models with Randomized Search CV

### 3.1 Without outlier removal or sampling

#### 3.1.1 Logistic Regression

In [6]:
# Check if model already exists
classifier_name = "LogisticRegression"
experiment_name = "no_sampling"
create_new = param_spaces[classifier_name].get('create_new_model', False)

if not create_new and model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_logreg = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    logreg = param_spaces[classifier_name]['estimator']

    param_dist_logreg = param_spaces[classifier_name]['params']

    rs_logreg = RandomizedSearchCV(
        estimator=logreg,
        param_distributions=param_dist_logreg,
        n_iter=param_spaces[classifier_name]['n_iter'],
        scoring=scoring,
        refit='f1_macro',
        cv=param_spaces[classifier_name]['cv'],
        random_state=random_state,
        n_jobs=-1,
        verbose=2
    )
    rs_logreg.fit(X_train_s, y_train)
    
    # Save the trained model
    metadata = {
        'best_params': rs_logreg.best_params_,
        'best_score': rs_logreg.best_score_,
        'cv_results': rs_logreg.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_logreg, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_no_sampling.joblib


Model LogisticRegression already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [7]:
best_logreg = rs_logreg.best_estimator_
results['LogisticRegression'] = eval_model(
    best_logreg,
    X_train_s, y_train,
    X_test_s, y_test,
)

KeyboardInterrupt: 

In [10]:
display(best_logreg)
results['LogisticRegression']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9176564641388762,
  'precision_macro': 0.800141874074688,
  'recall_macro': 0.6105422563887755,
  'f1_macro': 0.6765546028672075,
  'precision_per_class': array([0.92888831, 0.81679389, 0.65064103, 0.65625   , 0.94813614]),
  'recall_per_class': array([0.98220199, 0.48198198, 0.35060449, 0.328125  , 0.90979782]),
  'f1_per_class': array([0.9548015 , 0.60623229, 0.45566779, 0.4375    , 0.92857143]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7119,   18,   91,    1,   19],
         [ 106,  107,    7,    1,    1],
         [ 350,    5,  203,    9,   12],
         [  38,    0,    5,   21,    0],
         [  51,    1,    6,    0,  585]])},
 'test': {'accuracy': 0.9151288141786954,
  'precision_macro': 0.7885019903691001,
  'recall_macro': 0.5967700974401475,
  'f1_macro': 0.6633702561560696,
  'precision_per_class': array([0.92469552, 0.82105263, 0.66344828, 0.57843137, 0.95488215]),
 

#### 3.1.2 KNN

In [None]:
# Check if model already exists
classifier_name = "KNN"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_knn = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    knn = KNeighborsClassifier()
    param_dist_knn = {
        'n_neighbors': randint(1, 51),
        'weights': ['uniform', 'distance'],           # helps with imbalance; 'distance' often better
        'metric': ['minkowski', 'manhattan', 'euclidean'],
        'p': [1,2],                           # used only for minkowski, if left out it defaults to euclidean
    }

    rs_knn = RandomizedSearchCV(
        estimator=knn,
        param_distributions=param_dist_knn,
        n_iter=20,
        scoring=scoring,
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        random_state=random_state,
        n_jobs=-1,
    )
    rs_knn.fit(X_train_s, y_train)
    
    # Save the trained model
    metadata = {
        'best_params': rs_knn.best_params_,
        'best_score': rs_knn.best_score_,
        'cv_results': rs_knn.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_knn, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.models.exploration_phase.model_saver:Model loaded: src/models/exploration_phase/KNN_no_sampling.joblib


Model KNN already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [None]:
best_knn = rs_knn.best_estimator_
results['KNN'] = eval_model(
    best_knn,
    X_train_s, y_train,
    X_test_s, y_test,
)

In [13]:
display(best_knn)
results['KNN']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9798994974874372,
  'precision_macro': 0.9391433113355262,
  'recall_macro': 0.8599185488349104,
  'f1_macro': 0.8949289961165755,
  'precision_per_class': array([0.98375205, 0.88826816, 0.95087719, 0.87755102, 0.99526814]),
  'recall_per_class': array([0.99406733, 0.71621622, 0.93609672, 0.671875  , 0.98133748]),
  'f1_per_class': array([0.98888279, 0.79301746, 0.94342907, 0.76106195, 0.98825372]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7205,   18,   20,    3,    2],
         [  61,  159,    2,    0,    0],
         [  33,    0,  542,    3,    1],
         [  16,    0,    5,   43,    0],
         [   9,    2,    1,    0,  631]])},
 'test': {'accuracy': 0.9775260369084597,
  'precision_macro': 0.920137365015206,
  'recall_macro': 0.8528821323680518,
  'f1_macro': 0.8832154533279242,
  'precision_per_class': array([0.98254894, 0.88167053, 0.94080338, 0.80141844, 0.99424552]),
 

#### 3.1.3 Random Forest


In [None]:
# Check if model already exists
classifier_name = "RandomForest"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_rf = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    rf = RandomForestClassifier(random_state=random_state, n_jobs=-1)
    param_dist_rf = {
        'n_estimators': [100, 200, 300],
        'max_depth': [10, 15, 20], # prevent overfitting majority class
        
        'min_samples_split': [2, 5, 10, 20, 50],
        'min_samples_leaf': [1, 2, 4, 8], # higher = better regularization
        
        'max_features': ['sqrt', 'log2', None],
        'bootstrap': [True], # better generalization
        
        'class_weight': ['balanced', None], # for imbalanced data
        
        # Split criterion: entropy can help with imbalanced classes
        'criterion': ['gini', 'entropy'],
    }

    rs_rf = RandomizedSearchCV(
        estimator=rf,
        param_distributions=param_dist_rf,
        n_iter=20,
        scoring=scoring,
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )

    rs_rf.fit(X_train, y_train) # using unscaled data - RF is not sensitive to feature scaling
    
    # Save the trained model
    metadata = {
        'best_params': rs_rf.best_params_,
        'best_score': rs_rf.best_score_,
        'cv_results': rs_rf.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_rf, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.models.exploration_phase.model_saver:Model loaded: src/models/exploration_phase/RandomForest_no_sampling.joblib


Model RandomForest already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [None]:
best_rf = rs_rf.best_estimator_
results['RandomForest'] = eval_model( 
    best_rf, 
    X_train, y_train,
    X_test, y_test,
)

In [16]:
display(best_rf)
results['RandomForest']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9785290086797624,
  'precision_macro': 0.9160207060087867,
  'recall_macro': 0.8648953216324944,
  'f1_macro': 0.8885240457117929,
  'precision_per_class': array([0.98467013, 0.88172043, 0.94210526, 0.78571429, 0.98589342]),
  'recall_per_class': array([0.99254967, 0.73873874, 0.92746114, 0.6875    , 0.97822706]),
  'f1_per_class': array([0.9885942 , 0.80392157, 0.93472585, 0.73333333, 0.98204528]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7194,   20,   20,    8,    6],
         [  54,  164,    3,    0,    1],
         [  34,    2,  537,    4,    2],
         [  14,    0,    6,   44,    0],
         [  10,    0,    4,    0,  629]])},
 'test': {'accuracy': 0.9748309884889458,
  'precision_macro': 0.901553466600747,
  'recall_macro': 0.8709987482601733,
  'f1_macro': 0.8848821016625263,
  'precision_per_class': array([0.98157362, 0.8516129 , 0.94247159, 0.74556213, 0.98654709]),
 

#### 3.1.4 SVM

In [None]:
# Check if model already exists
classifier_name = "SVM"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_svm = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    svm = SVC()
    param_dist_svm = {
        'kernel': ['rbf', 'poly'],
        'C': [0.1, 1, 10],
        'gamma': [0.001, 0.01, 0.1, 0.5, 1],
    }
    rs_svm = RandomizedSearchCV(
        estimator=svm,
        param_distributions=param_dist_svm,
        n_iter=15,
        scoring=scoring,
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state),
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )
    rs_svm.fit(X_train_s, y_train)
    
    # Save the trained model
    metadata = {
        'best_params': rs_svm.best_params_,
        'best_score': rs_svm.best_score_,
        'cv_results': rs_svm.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_svm, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.models.exploration_phase.model_saver:Model loaded: src/models/exploration_phase/SVM_no_sampling.joblib


Model SVM already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [None]:
best_svm = rs_svm.best_estimator_
results['SVM'] = eval_model(
    best_svm,
    X_train_s, y_train,
    X_test_s, y_test,
)

In [19]:
display(best_svm)
results['SVM']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9745317496573778,
  'precision_macro': 0.9163739251346034,
  'recall_macro': 0.8540245675024011,
  'f1_macro': 0.8822902628187215,
  'precision_per_class': array([0.98194995, 0.84615385, 0.91872792, 0.8490566 , 0.98598131]),
  'recall_per_class': array([0.99075607, 0.69369369, 0.89810017, 0.703125  , 0.9844479 ]),
  'f1_per_class': array([0.98633336, 0.76237624, 0.90829694, 0.76923077, 0.98521401]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7181,   23,   35,    3,    6],
         [  65,  154,    3,    0,    0],
         [  46,    5,  520,    5,    3],
         [  12,    0,    7,   45,    0],
         [   9,    0,    1,    0,  633]])},
 'test': {'accuracy': 0.9740544491138315,
  'precision_macro': 0.9049136882358036,
  'recall_macro': 0.8394617444559591,
  'f1_macro': 0.8684037795992783,
  'precision_per_class': array([0.98054602, 0.87347932, 0.92642857, 0.75675676, 0.98735777]),


#### 3.1.5 Decision Tree Classifier

In [None]:
# Check if model already exists
classifier_name = "DecisionTree"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_dt = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    dt = DecisionTreeClassifier(random_state=random_state)

    param_dist_dt = {
        'max_depth': [None, 5, 10, 15, 20, 25, 30],
        'min_samples_split': [2, 5, 10, 20, 50],
        'min_samples_leaf': [1, 2, 4, 8, 16],
        'max_features': ['sqrt', 'log2', None],
        'criterion': ['gini', 'entropy'],  
        'class_weight': ['balanced', None],
        'splitter': ['best', 'random'],  # Split strategy
    }

    rs_dt = RandomizedSearchCV(
        estimator=dt,
        param_distributions=param_dist_dt,
        n_iter=100,  
        scoring=scoring,
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )

    rs_dt.fit(X_train, y_train)  # Using unscaled data - DT doesn't need scaling
    
    # Save the trained model
    metadata = {
        'best_params': rs_dt.best_params_,
        'best_score': rs_dt.best_score_,
        'cv_results': rs_dt.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_dt, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.models.exploration_phase.model_saver:Model loaded: src/models/exploration_phase/DecisionTree_no_sampling.joblib


Model DecisionTree already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [None]:
best_dt = rs_dt.best_estimator_
results['DecisionTree'] = eval_model(
    best_dt,
    X_train, y_train,
    X_test, y_test,
)

In [22]:
display(best_dt)
results['DecisionTree']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9611694837825491,
  'precision_macro': 0.8286714991952492,
  'recall_macro': 0.8113420712229086,
  'f1_macro': 0.8192777361500603,
  'precision_per_class': array([0.97892852, 0.7357513 , 0.86677909, 0.61290323, 0.94899536]),
  'recall_per_class': array([0.98068433, 0.63963964, 0.88773748, 0.59375   , 0.95489891]),
  'f1_per_class': array([0.97980564, 0.68433735, 0.87713311, 0.6031746 , 0.95193798]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7108,   41,   58,   15,   26],
         [  71,  142,    7,    0,    2],
         [  47,    5,  514,    9,    4],
         [  16,    1,    8,   38,    1],
         [  19,    4,    6,    0,  614]])},
 'test': {'accuracy': 0.9594372373469761,
  'precision_macro': 0.8282860612296581,
  'recall_macro': 0.7983514845333289,
  'f1_macro': 0.8124014481179902,
  'precision_per_class': array([0.97427195, 0.72995781, 0.8837535 , 0.59119497, 0.96225208]),


#### 3.1.6 XGBoost / Gradien Boosting

In [None]:
# Check if model already exists
classifier_name = "XGBoost"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_xgb = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")
else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    xgb_model = xgb.XGBClassifier(
        objective='multi:softmax',
        num_class=5,  # no of classes
        random_state=random_state,
        n_jobs=-1,
        eval_metric='mlogloss'
    )

    param_dist_xgb = {
        'n_estimators': [100, 200, 300, 500],
        'max_depth': [3, 4, 5, 6, 7, 8],
        'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
        'subsample': [0.8, 0.9, 1.0],
        'colsample_bytree': [0.8, 0.9, 1.0],
        'reg_alpha': [0, 0.1, 0.5, 1.0],  # L1 regularization
        'reg_lambda': [0, 0.1, 0.5, 1.0],  # L2 regularization
        'min_child_weight': [1, 3, 5, 7],
        'gamma': [0, 0.1, 0.2, 0.3],  # Minimum loss reduction
    }

    rs_xgb = RandomizedSearchCV(
        estimator=xgb_model,
        param_distributions=param_dist_xgb,
        n_iter=30,  
        scoring=scoring,
        refit='f1_macro',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state),
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )

    rs_xgb.fit(X_train, y_train)  # XGBoost handles scaling internally
    
    # Save the trained model
    metadata = {
        'best_params': rs_xgb.best_params_,
        'best_score': rs_xgb.best_score_,
        'cv_results': rs_xgb.cv_results_,
        'experiment': experiment_name,
        'classifier': classifier_name
    }
    model_saver.save_model(classifier_name, rs_xgb, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

INFO:src.models.exploration_phase.model_saver:Model loaded: src/models/exploration_phase/XGBoost_no_sampling.joblib


Model XGBoost already exists for experiment no_sampling. Loading...
Model loaded successfully!


In [None]:
best_xgb = rs_xgb.best_estimator_
results['XGBoost'] = eval_model(
    best_xgb,
    X_train, y_train,
    X_test, y_test,
)

In [25]:
display(best_xgb)
results['XGBoost']

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9816126084970306,
  'precision_macro': 0.9503427741288547,
  'recall_macro': 0.8653017509227453,
  'f1_macro': 0.9026234500019875,
  'precision_per_class': array([0.98499318, 0.92397661, 0.94903339, 0.9       , 0.99371069]),
  'recall_per_class': array([0.99613687, 0.71171171, 0.93264249, 0.703125  , 0.98289269]),
  'f1_per_class': array([0.99053368, 0.80407125, 0.94076655, 0.78947368, 0.98827209]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7220,   11,   13,    2,    2],
         [  62,  158,    1,    0,    1],
         [  34,    1,  540,    3,    1],
         [   7,    0,   12,   45,    0],
         [   7,    1,    3,    0,  632]])},
 'test': {'accuracy': 0.9818198428649735,
  'precision_macro': 0.9545382548756202,
  'recall_macro': 0.8654063021130722,
  'f1_macro': 0.9046987238778078,
  'precision_per_class': array([0.98292922, 0.95588235, 0.97383721, 0.86764706, 0.99239544]),


#### 3.1.7 Linear Discriminant Analysis

In [None]:
# Check if model already exists
classifier_name = "LDA"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_lda = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")

else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    param_distributions = [
        {
            "solver": ["svd"],
            "store_covariance": [False, True],
            "tol": [1e-4, 1e-3, 1e-2],
            # n_components kept implicit (None) to avoid invalid values vs. n_classes-1
        },
        {
            "solver": ["lsqr", "eigen"],
            "shrinkage": [None, "auto", 0.0, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.9],
            "tol": [1e-4, 1e-3, 1e-2],
        },
    ]

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)

    lda = LinearDiscriminantAnalysis()
    rs_lda = RandomizedSearchCV(
        estimator=lda,
        param_distributions=param_distributions,
        n_iter=50,
        scoring=scoring,
        refit="f1_macro",
        cv=cv,
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )


    rs_lda.fit(X_train_s, y_train)
    
    # Save the trained model
    metadata = {
        "best_params": rs_lda.best_params_,
        "best_score": rs_lda.best_score_,
        "cv_results": rs_lda.cv_results_,
        "experiment": experiment_name,
        "classifier": classifier_name,
    }

    model_saver.save_model(classifier_name, rs_lda, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")

Model LDA not found. Training new model...
Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] END .............shrinkage=0.85, solver=lsqr, tol=0.001; total time=   1.7s
[CV] END .............shrinkage=0.0, solver=lsqr, tol=0.0001; total time=   1.8s
[CV] END .............shrinkage=0.85, solver=lsqr, tol=0.001; total time=   2.1s
[CV] END .............shrinkage=0.0, solver=lsqr, tol=0.0001; total time=   2.2s
[CV] END ............shrinkage=0.75, solver=eigen, tol=0.001; total time=   2.3s
[CV] END .............shrinkage=0.0, solver=eigen, tol=0.001; total time=   2.4s
[CV] END .............shrinkage=0.0, solver=lsqr, tol=0.0001; total time=   2.2s
[CV] END .............shrinkage=0.5, solver=eigen, tol=0.001; total time=   2.4s
[CV] END .............shrinkage=0.85, solver=lsqr, tol=0.001; total time=   2.5s
[CV] END ............shrinkage=0.75, solver=eigen, tol=0.001; total time=   2.5s
[CV] END ............shrinkage=None, solver=eigen, tol=0.001; total time=   2.7s
[CV]

INFO:src.models.exploration_phase.model_saver:Model saved: ../src/models/exploration_phase/LDA_no_sampling.joblib
INFO:src.models.exploration_phase.model_saver:Metadata saved: ../src/models/exploration_phase/LDA_no_sampling_metadata.pkl


Model LDA saved successfully!


In [None]:
best_lda = rs_lda.best_estimator_
results['LDA'] = eval_model(
    best_lda,
    X_train_s, y_train,
    X_test_s, y_test,
)

In [56]:
display(best_lda)
display(rs_lda.best_params_)
results['LDA']

{'tol': 0.0001, 'solver': 'lsqr', 'shrinkage': 'auto'}

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.8941297396071265,
  'precision_macro': 0.6346495795067106,
  'recall_macro': 0.6599108158462592,
  'f1_macro': 0.6339529523776317,
  'precision_per_class': array([0.94157365, 0.56157635, 0.5323475 , 0.18181818, 0.9559322 ]),
  'recall_per_class': array([0.94274283, 0.51351351, 0.49740933, 0.46875   , 0.87713841]),
  'f1_per_class': array([0.94215788, 0.53647059, 0.51428571, 0.26200873, 0.91484185]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[6833,   69,  227,  106,   13],
         [  94,  114,   10,    4,    0],
         [ 234,   19,  288,   25,   13],
         [  29,    0,    5,   30,    0],
         [  67,    1,   11,    0,  564]])},
 'test': {'accuracy': 0.8933857116754979,
  'precision_macro': 0.6406346941002119,
  'recall_macro': 0.6505553927969936,
  'f1_macro': 0.6284933290106093,
  'precision_per_class': array([0.93934237, 0.58119658, 0.56112377, 0.15560166, 0.96590909]),


#### 3.1.8 Artificial Neural Networks

In [None]:
classifier_name = "ANN"
experiment_name = "no_sampling"

if model_saver.model_exists(classifier_name, experiment_name):
    print(f"Model {classifier_name} already exists for experiment {experiment_name}. Loading...")
    rs_lda = model_saver.load_model(classifier_name, experiment_name)
    print("Model loaded successfully!")

else:
    print(f"Model {classifier_name} not found. Training new model...")
    
    ann = MLPClassifier(
        max_iter=300,
        early_stopping=True,
        random_state=random_state,
        n_iter_no_change=10,
        solver="adam",
    )


    param_distributions = {
        "hidden_layer_sizes": [
            (64,),
            (128,),
            (128, 64),
        ],
        "activation": ["relu"],  # focused, fast
        "alpha": loguniform(1e-4, 1e-2),  # L2
        "learning_rate_init": loguniform(1e-3, 1e-2),
        "batch_size": randint(64, 129),
        "beta_1": uniform(0.9, 0.09),   # ~0.90-0.99
        "beta_2": uniform(0.95, 0.049), # ~0.95-0.999
        "validation_fraction": [0.1, 0.15],
    }

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)

    rs_ann = RandomizedSearchCV(
        estimator=ann,
        param_distributions=param_distributions,
        n_iter=20,
        scoring=scoring,
        refit="f1_macro",
        cv=cv,
        random_state=random_state,
        n_jobs=-1,
        verbose=2,
    )

    rs_ann.fit(X_train_s, y_train)
    
    metadata = {
        "best_params": rs_ann.best_params_,
        "best_score": rs_ann.best_score_,
        "cv_results": rs_ann.cv_results_,
        "experiment": experiment_name,
        "classifier": classifier_name,
    }
    model_saver.save_model(classifier_name, rs_ann, experiment_name, metadata)
    print(f"Model {classifier_name} saved successfully!")


Model ANN not found. Training new model...
Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV] END activation=relu, alpha=0.0096476850757201, batch_size=96, beta_1=0.9273818018663584, beta_2=0.9757130651499796, hidden_layer_sizes=(64,), learning_rate_init=0.001955370866274525, validation_fraction=0.1; total time=  14.7s
[CV] END activation=relu, alpha=0.0096476850757201, batch_size=96, beta_1=0.9273818018663584, beta_2=0.9757130651499796, hidden_layer_sizes=(64,), learning_rate_init=0.001955370866274525, validation_fraction=0.1; total time=  21.4s
[CV] END activation=relu, alpha=0.00032927591344236165, batch_size=67, beta_1=0.928053996848047, beta_2=0.9754833330377127, hidden_layer_sizes=(128,), learning_rate_init=0.003695730787054511, validation_fraction=0.1; total time=  24.5s
[CV] END activation=relu, alpha=0.00013066739238053285, batch_size=87, beta_1=0.9585799625653968, beta_2=0.9527641673723278, hidden_layer_sizes=(128,), learning_rate_init=0.0016305687346221474, va

INFO:src.models.exploration_phase.model_saver:Model saved: ../src/models/exploration_phase/ANN_no_sampling.joblib
INFO:src.models.exploration_phase.model_saver:Metadata saved: ../src/models/exploration_phase/ANN_no_sampling_metadata.pkl


Model ANN saved successfully!


In [None]:
best_ann = rs_ann.best_estimator_
results['ANN'] = eval_model(
    best_ann,
    X_train_s, y_train,
    X_test_s, y_test,
)

In [66]:
display(best_ann)
display(rs_ann.best_score_)
results['ANN']

0.8895580263203478

{'labels': array([0, 1, 2, 3, 4]),
 'val': {'accuracy': 0.9809273640931933,
  'precision_macro': 0.9202085425774307,
  'recall_macro': 0.8750374326674202,
  'f1_macro': 0.8959547701775328,
  'precision_per_class': array([0.98765432, 0.88421053, 0.94415358, 0.80357143, 0.98145286]),
  'recall_per_class': array([0.99337748, 0.75675676, 0.9343696 , 0.703125  , 0.98755832]),
  'f1_per_class': array([0.99050764, 0.81553398, 0.93923611, 0.75      , 0.98449612]),
  'support_per_class': array([7248,  222,  579,   64,  643]),
  'confusion_matrix': array([[7200,   18,   17,    6,    7],
         [  50,  168,    2,    0,    2],
         [  26,    4,  541,    5,    3],
         [   8,    0,   11,   45,    0],
         [   6,    0,    2,    0,  635]])},
 'test': {'accuracy': 0.9778457884158597,
  'precision_macro': 0.9155768607194622,
  'recall_macro': 0.8594817664280281,
  'f1_macro': 0.8849524970380223,
  'precision_per_class': array([0.98451521, 0.86067416, 0.93907563, 0.80985915, 0.98376015]),


#### 3.1.9 Results Summary and Comparison

In [None]:
def _safe_col(label):
    # Make safe column names like "val_f1_cls_0" or "val_f1_cls_N"
    return re.sub(r'[^0-9a-zA-Z_]+', '_', str(label)).strip('_')

# Mapping of model names to their RandomizedSearchCV objects
models_and_searchers = {
    "LogisticRegression": rs_logreg,
    "KNN": rs_knn, 
    "RandomForest": rs_rf,
    "SVM": rs_svm,
    "DecisionTree": rs_dt,
    "XGBoost": rs_xgb,
    "LDA": rs_lda,
    "ANN": rs_ann,
}

rows = []
for name, res in results.items():
    row = {
        'model': name,
        'test_accuracy': round(res['test']['accuracy'], 2),
        'test_f1_macro': round(res['test']['f1_macro'], 2),
    }

    # Add best parameters from RandomizedSearchCV
    if name in models_and_searchers:
        searcher = models_and_searchers[name]
        best_params = searcher.best_params_
        best_cv_score = searcher.best_score_
        row['best_cv_score'] = round(best_cv_score, 2)
        row['best_parameters'] = str(best_params)
    else:
        row['best_cv_score'] = None
        row['best_parameters'] = None

    labels = res['labels']
    f1_t = res['test']['f1_per_class']

    # Add per-class F1 columns for test set only
    for lbl, f1 in zip(labels, f1_t):
        row[f'test_f1_cls_{_safe_col(lbl)}'] = round(f1, 2)

    rows.append(row)

comparison_df = (
    pd.DataFrame(rows)
      .sort_values(by=['test_f1_macro'], ascending=False)
      .reset_index(drop=True)
)

best_model_name = comparison_df.iloc[0]['model']
best_model_results = results[best_model_name]

comparison_df_display = comparison_df.copy()
comparison_df_display['best_parameters'] = comparison_df_display['best_parameters'].apply(
    lambda x: json.dumps(x, indent=2) if isinstance(x, dict) else x
)
import os 

comparison_df_display.to_csv("../src/data/03_model_testing_results/model_comparison_without_resampling.csv", index=False)

# Display the comparison table with best parameters
print("=" * 100)
print("MODEL COMPARISON WITH BEST PARAMETERS FROM RANDOMIZEDSEARCHCV")
print("=" * 100)
display(comparison_df_display)


MODEL COMPARISON WITH BEST PARAMETERS FROM RANDOMIZEDSEARCHCV


Unnamed: 0,model,val_accuracy,val_f1_macro,test_accuracy,test_f1_macro,best_cv_score,best_parameters,val_f1_cls_0,val_f1_cls_1,val_f1_cls_2,val_f1_cls_3,val_f1_cls_4,test_f1_cls_0,test_f1_cls_1,test_f1_cls_2,test_f1_cls_3,test_f1_cls_4
0,XGBoost,0.98,0.9,0.98,0.9,0.9,"{'subsample': 0.8, 'reg_lambda': 0, 'reg_alpha...",0.99,0.8,0.94,0.79,0.99,0.99,0.81,0.95,0.79,0.98
1,KNN,0.98,0.89,0.98,0.88,0.88,"{'metric': 'manhattan', 'n_neighbors': 4, 'p':...",0.99,0.79,0.94,0.76,0.99,0.99,0.77,0.93,0.75,0.98
2,RandomForest,0.98,0.89,0.97,0.88,0.88,"{'n_estimators': 200, 'min_samples_split': 20,...",0.99,0.8,0.93,0.73,0.98,0.99,0.78,0.93,0.76,0.97
3,SVM,0.97,0.88,0.97,0.87,0.87,"{'kernel': 'poly', 'gamma': 0.01, 'C': 10}",0.99,0.76,0.91,0.77,0.99,0.99,0.74,0.91,0.72,0.98
4,DecisionTree,0.96,0.82,0.96,0.81,0.83,"{'splitter': 'best', 'min_samples_split': 20, ...",0.98,0.68,0.88,0.6,0.95,0.98,0.67,0.88,0.59,0.95
5,LogisticRegression,0.92,0.68,0.92,0.66,0.66,"{'C': 4.0428727350273315, 'penalty': 'l2', 'so...",0.95,0.61,0.46,0.44,0.93,0.95,0.56,0.44,0.45,0.92


In [None]:
# Check saved models
print("=" * 80)
print("SAVED MODELS INFORMATION")
print("=" * 80)

saved_models = model_saver.list_saved_models()
if saved_models:
    for model_key, info in saved_models.items():
        print(f"\nModel: {model_key}")
        print(f"  Exists: {info['exists']}")
        print(f"  Path: {info['model_path']}")
        if info['exists']:
            print(f"  Size: {info['size_bytes']} bytes")
            print(f"  Modified: {info['modified_time']}")
        
        # Load and display metadata if available
        if info['metadata_exists']:
            try:
                metadata = model_saver.load_metadata(model_key.split('_')[0], model_key.split('_')[1] if '_' in model_key else 'default')
                if metadata:
                    print(f"  Best Score: {metadata.get('best_score', 'N/A')}")
                    print(f"  Best Params: {metadata.get('best_params', 'N/A')}")
            except Exception as e:
                print(f"  Error loading metadata: {e}")
else:
    print("No saved models found.")


SAVED MODELS INFORMATION

Model: DecisionTree_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/DecisionTree_no_sampling.joblib
  Size: 232415 bytes
  Modified: 1760415820.9521165

Model: SVM_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/SVM_no_sampling.joblib
  Size: 12504219 bytes
  Modified: 1760415063.7689745

Model: KNN_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/KNN_no_sampling.joblib
  Size: 118528799 bytes
  Modified: 1760396388.739042

Model: RandomForest_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/RandomForest_no_sampling.joblib
  Size: 48327727 bytes
  Modified: 1760399598.0772789

Model: LogisticRegression_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/LogisticRegression_no_sampling.joblib
  Size: 26175 bytes
  Modified: 1760395829.1684663

Model: XGBoost_no_sampling
  Exists: True
  Path: ../src/models/exploration_phase/XGBoost_no_sampling.joblib
  Size: 2872222 bytes
  

### 3.2. With Sampling Methods

but without
- Feature Engineering ( RR-Interval! )
- baseline wandering removal
- denoising
- Leak-Free Scaling
- RepeatedStratifiedKFold


#### 3.2.1 Quick run - Using the best models from above

In [7]:
sampling_methods = {
    'No_Sampling': None,
    'RandomOverSampler': RandomOverSampler(random_state=random_state),
    'SMOTE': SMOTE(random_state=random_state, k_neighbors=5),
    'ADASYN': ADASYN(random_state=random_state, n_neighbors=5),
    'SMOTETomek': SMOTETomek(random_state=random_state, smote=SMOTE(random_state=random_state, k_neighbors=5)),
    'SMOTEENN': SMOTEENN(random_state=random_state, smote=SMOTE(random_state=random_state, k_neighbors=5)),
}

sampling_results = {}

best_models = {
    'KNN': best_knn,
    'RandomForest': best_rf,
    'XGBoost': best_xgb,
}

scale_sensitive = ['LogisticRegression', 'SVM', 'KNN']

print("Testing sampling methods on best models...")
print("=" * 80)

for sampling_name, sampler in sampling_methods.items():
    print(f"\nTesting {sampling_name}...")
    sampling_results[sampling_name] = {}
    
    for model_name, model in best_models.items():
        print(f"  - {model_name}")
        
        try:
            if sampler is None:
                # No sampling - use original, only scaled data
                result = eval_model(model,
                                    X_train_s if model_name in scale_sensitive else X_train , y_train,
                                    X_test_s if model_name in scale_sensitive else X_test, y_test)
            else:
                # Apply sampling on unscaled data
                X_train_sampled, y_train_sampled = sampler.fit_resample(X_train, y_train)
                
                # Re-scale if needed for models that require scaling
                if model_name in scale_sensitive:
                    scaler_sampling = StandardScaler()
                    X_train_sampled = scaler_sampling.fit_transform(X_train_sampled)
                    X_test_sampled = scaler_sampling.transform(X_test)
                else: # e.g. RF, XGBoost
                    X_test_sampled = X_test
            
                result = eval_model(
                    model,
                    X_train_sampled, y_train_sampled,
                    X_test_sampled, y_test,
                )
            
            sampling_results[sampling_name][model_name] = result
            
            # Printing statistics
            if sampler is not None:
                unique, counts = np.unique(y_train_sampled, return_counts=True)
                print(f"    Class distribution after {sampling_name}:")
                for cls, count in zip(unique, counts):
                    print(f"      Class {cls}: {count:,} samples")
            
        except Exception as e:
            print(f"    ERROR with {sampling_name} + {model_name}: {str(e)}")
            sampling_results[sampling_name][model_name] = None


NameError: name 'best_knn' is not defined

In [None]:

# Create comprehensive comparison table
print("\n" + "=" * 100)
print("SAMPLING METHODS COMPARISON")
print("=" * 100)

# Prepare comparison data
comparison_rows = []

for sampling_name, models_results in sampling_results.items():
    for model_name, result in models_results.items():
        if result is not None:
            row = {
                'sampling_method': sampling_name,
                'model': model_name,
                'test_accuracy': round(result['test']['accuracy'],2),
                'test_f1_macro': round(result['test']['f1_macro'],2)
            }
            
            # Add per-class F1 scores for test set only
            labels = result['labels']
            f1_t = result['test']['f1_per_class']
            
            for lbl, f1 in zip(labels, f1_t):
                row[f'test_f1_cls_{_safe_col(lbl)}'] = round(f1,2)
            
            comparison_rows.append(row)

# Create and display comparison DataFrame
sampling_comparison_df = (
    pd.DataFrame(comparison_rows)
    .sort_values(by=['test_f1_macro'], ascending=False)
    .reset_index(drop=True)
)

In [None]:
sampling_comparison_df.to_csv("../reports/03_model_testing_results/model_comparison_with_sampling_on_best_models.csv", index=False)

# Find best combination
best_sampling_model = sampling_comparison_df.iloc[0]
print(f"\nBEST COMBINATION:")
print(f"Sampling Method: {best_sampling_model['sampling_method']}")
print(f"Model: {best_sampling_model['model']}")
print(f"Test F1-Macro: {best_sampling_model['test_f1_macro']:.4f}")

# Summary statistics
print(f"\nSUMMARY STATISTICS:")
print(f"Total combinations tested: {len(comparison_rows)}")
print(f"Best test F1-macro: {sampling_comparison_df['test_f1_macro'].max():.4f}")

# Show top 5 combinations
print(f"\nTOP 5 COMBINATIONS:")
top_5 = sampling_comparison_df.head(5)[['sampling_method', 'model', 'test_f1_macro']]
display(top_5)


BEST COMBINATION:
Sampling Method: RandomOverSampler
Model: XGBoost
Test F1-Macro: 0.9200
Validation F1-Macro: 0.9200

SUMMARY STATISTICS:
Total combinations tested: 18
Best test F1-macro: 0.9200
Best validation F1-macro: 0.9200

TOP 5 COMBINATIONS:


Unnamed: 0,sampling_method,model,test_f1_macro,val_f1_macro
0,RandomOverSampler,XGBoost,0.92,0.92
1,SMOTE,XGBoost,0.91,0.91
2,SMOTETomek,XGBoost,0.91,0.91
3,No_Sampling,XGBoost,0.9,0.9
4,RandomOverSampler,RandomForest,0.9,0.9


#### 3.2.2 Extended Run: Sampling + RandomizedSearchCV

In [6]:

# Step 1: Model Training Phase
# This cell focuses only on training models with RandomizedSearchCV
# Evaluation is separated to prevent interruption of cross-validation runs

sampling_methods = {
    'RandomOverSampler': RandomOverSampler(random_state=random_state),
    'SMOTE': SMOTE(random_state=random_state, k_neighbors=5),
    'ADASYN': ADASYN(random_state=random_state, n_neighbors=5),
    'SMOTETomek': SMOTETomek(random_state=random_state, smote=SMOTE(random_state=random_state, k_neighbors=5)),
    'SMOTEENN': SMOTEENN(random_state=random_state, smote=SMOTE(random_state=random_state, k_neighbors=5)),
}

# Which models need scaling (no pipeline used; fit scaler once on the resampled training set)
scale_sensitive = ["LogisticRegression", "KNN", "SVM", "LDA", "ANN"]



In [None]:
print("Starting 3.2.2 TRAINING PHASE: Full RandomizedSearchCV for each model, per sampling method")
print("=" * 80)

for sampling_name, sampler in sampling_methods.items():
    print(f"\n=== Training with Sampling: {sampling_name} ===")

    # Apply sampling on original training set (before CV)
    try:
        if sampler is None:
            X_train_res, y_train_res = X_train, y_train
        else:
            X_train_res, y_train_res = sampler.fit_resample(X_train, y_train)
    except Exception as e:
        print(f"  Skipping sampling '{sampling_name}' due to error: {e}")
        continue

    # Show distribution if sampling applied
    if sampler is not None:
        unique, counts = np.unique(y_train_res, return_counts=True)
        print("  Class distribution after sampling:")
        for cls, cnt in zip(unique, counts):
            print(f"    Class {cls}: {cnt:,} samples")

    # For each model, run RS-CV on the resampled dataset (TRAINING ONLY)
    for model_name, spec in param_spaces.items():
        experiment_name = f"with_sampling_{sampling_name}"
        classifier_name = model_name

        # Prepare data (leak-prone scaling by design here; no pipelines)
        if model_name in scale_sensitive:
            scaler = StandardScaler()
            X_tr_fit = scaler.fit_transform(X_train_res)
        else:
            X_tr_fit = X_train_res

        # Train or load if already saved
        try:
            # Check if an RS-CV object already exists for this sampler+model
            if model_saver.model_exists(classifier_name, experiment_name):
                print(f"  [{model_name}] Exists for {experiment_name}. LoaLoading next one...")
            else:
                print(f"  [{model_name}] Training RS-CV for {experiment_name}...")
                rs = RandomizedSearchCV(
                    estimator=spec["estimator"],
                    param_distributions=spec["params"],
                    n_iter=spec["n_iter"],
                    scoring=scoring,
                    refit="f1_macro",
                    cv=spec["cv"],
                    random_state=random_state,
                    n_jobs=-1,
                    verbose=2,
                )
                rs.fit(X_tr_fit, y_train_res)

                metadata = {
                    "best_params": rs.best_params_,
                    "best_score": rs.best_score_,
                    "cv_results": rs.cv_results_,
                    "experiment": experiment_name,
                    "classifier": classifier_name,
                    "sampling_method": sampling_name,
                }
                model_saver.save_model(classifier_name, rs, experiment_name, metadata)
                print(f"  [{model_name}] Saved for {experiment_name}.")

        except Exception as e:
            print(f"  [{model_name}] ERROR for {experiment_name}: {e}")

print("\n" + "=" * 80)
print("TRAINING PHASE COMPLETED")
print("=" * 80)



In [7]:
# Step 2: Model Evaluation Phase
# This cell focuses only on evaluating the trained models
# This separation prevents interruption of cross-validation runs from affecting evaluation

print("Starting 3.2.2 EVALUATION PHASE: Evaluating trained models")
print("=" * 80)
# Path to results file
out_path = "../reports/03_model_testing_results/model_comparison_with_sampling_randomized_search.csv"

# Load existing results if CSV already exists
if os.path.exists(out_path):
    existing_df = pd.read_csv(out_path)
    print(f"Loaded existing results with {len(existing_df)} rows from {out_path}")
else:
    existing_df = pd.DataFrame()
    print(f"No existing results found. Will create {out_path}")

print("Starting 3.2.2 EVALUATION PHASE: Evaluating trained models")
print("=" * 80)

# Evaluation and saving
sampling_results = {}

for sampling_name, sampler in sampling_methods.items():
    print(f"\n=== Evaluating with Sampling: {sampling_name} ===")
    sampling_results[sampling_name] = {}

    # Apply sampling on original training set (before CV)
    try:
        if sampler is None:
            X_train_res, y_train_res = X_train, y_train
        else:
            X_train_res, y_train_res = sampler.fit_resample(X_train, y_train)
    except Exception as e:
        print(f"  Skipping sampling '{sampling_name}' due to error: {e}")
        continue

    for model_name, spec in param_spaces.items():
        experiment_name = f"with_sampling_{sampling_name}"
        classifier_name = model_name

        # Skip if already evaluated
        if not existing_df.empty and (
            (existing_df["sampling_method"] == sampling_name)
            & (existing_df["model"] == model_name)
        ).any():
            print(f"  [{model_name}] Skipping (already in CSV)")
            continue

        # Prepare scaled data if necessary
        if model_name in scale_sensitive:
            scaler = StandardScaler()
            X_tr_fit = scaler.fit_transform(X_train_res)
            X_te_fit = scaler.transform(X_test)
        else:
            X_tr_fit, X_te_fit = X_train_res, X_test

        try:
            # Load the trained model
            if model_saver.model_exists(classifier_name, experiment_name):
                print(f"  [{model_name}] Loading trained model for evaluation...")
                rs = model_saver.load_model(classifier_name, experiment_name)

                # Evaluate best estimator on test set
                print(f"  Evaluating model [{model_name}]")
                best_est = rs.best_estimator_
                res = eval_model(
                    best_est,
                    X_tr_fit, y_train_res,
                    X_te_fit, y_test,
                )

                sampling_results[sampling_name][model_name] = {"rs": rs, "eval": res}

                # Prepare single row result
                row = {
                    "sampling_method": sampling_name,
                    "model": model_name,
                    "test_accuracy": round(res["test"]["accuracy"], 2),
                    "test_f1_macro": round(res["test"]["f1_macro"], 2),
                    "best_cv_score": round(rs.best_score_, 2),
                    "best_parameters": json.dumps(rs.best_params_),
                }

                # Add per-class F1 scores
                labels = res["labels"]
                for lbl, f1 in zip(labels, res["test"]["f1_per_class"]):
                    row[f"test_f1_cls_{lbl}"] = round(float(f1), 2)

                # Convert to DataFrame and append immediately
                new_df = pd.DataFrame([row])
                header = not os.path.exists(out_path)
                new_df.to_csv(out_path, mode="a", index=False, header=header)
                print(f"  ✅ Result saved for [{model_name}] ({sampling_name})")

                # Update in-memory record too
                existing_df = pd.concat([existing_df, new_df], ignore_index=True)

            else:
                print(f"  [{model_name}] No trained model found for {experiment_name}")
                sampling_results[sampling_name][model_name] = None

        except Exception as e:
            print(f"  [{model_name}] ERROR for {experiment_name}: {e}")
            sampling_results[sampling_name][model_name] = None


# Final check summary
if not existing_df.empty:
    print("\n" + "=" * 100)
    print("CURRENT MODEL EVALUATION SUMMARY")
    print("=" * 100)
    display(
        existing_df[["sampling_method", "model", "test_f1_macro"]]
        .sort_values(by=["test_f1_macro"], ascending=False)
        .head(10)
    )
else:
    print("No results to display.")

print("\n" + "=" * 80)
print("EVALUATION PHASE COMPLETED")
print("=" * 80)


Starting 3.2.2 EVALUATION PHASE: Evaluating trained models
No existing results found. Will create ../reports/03_model_testing_results/model_comparison_with_sampling_randomized_search.csv
Starting 3.2.2 EVALUATION PHASE: Evaluating trained models

=== Evaluating with Sampling: RandomOverSampler ===


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_with_sampling_RandomOverSampler.joblib


  [LogisticRegression] Loading trained model for evaluation...
  Evaluating model [LogisticRegression]
  ✅ Result saved for [LogisticRegression] (RandomOverSampler)
  [KNN] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/KNN_with_sampling_RandomOverSampler.joblib


  Evaluating model [KNN]
  ✅ Result saved for [KNN] (RandomOverSampler)
  [RandomForest] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/RandomForest_with_sampling_RandomOverSampler.joblib


  Evaluating model [RandomForest]
  ✅ Result saved for [RandomForest] (RandomOverSampler)
  [SVM] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/SVM_with_sampling_RandomOverSampler.joblib


  Evaluating model [SVM]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/DecisionTree_with_sampling_RandomOverSampler.joblib


  ✅ Result saved for [SVM] (RandomOverSampler)
  [DecisionTree] Loading trained model for evaluation...
  Evaluating model [DecisionTree]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/XGBoost_with_sampling_RandomOverSampler.joblib


  ✅ Result saved for [DecisionTree] (RandomOverSampler)
  [XGBoost] Loading trained model for evaluation...
  Evaluating model [XGBoost]
  ✅ Result saved for [XGBoost] (RandomOverSampler)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LDA_with_sampling_RandomOverSampler.joblib


  [LDA] Loading trained model for evaluation...
  Evaluating model [LDA]
  ✅ Result saved for [LDA] (RandomOverSampler)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/ANN_with_sampling_RandomOverSampler.joblib


  [ANN] Loading trained model for evaluation...
  Evaluating model [ANN]
  ✅ Result saved for [ANN] (RandomOverSampler)

=== Evaluating with Sampling: SMOTE ===


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_with_sampling_SMOTE.joblib


  [LogisticRegression] Loading trained model for evaluation...
  Evaluating model [LogisticRegression]
  ✅ Result saved for [LogisticRegression] (SMOTE)
  [KNN] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/KNN_with_sampling_SMOTE.joblib


  Evaluating model [KNN]
  ✅ Result saved for [KNN] (SMOTE)
  [RandomForest] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/RandomForest_with_sampling_SMOTE.joblib


  Evaluating model [RandomForest]
  ✅ Result saved for [RandomForest] (SMOTE)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/SVM_with_sampling_SMOTE.joblib


  [SVM] Loading trained model for evaluation...
  Evaluating model [SVM]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/DecisionTree_with_sampling_SMOTE.joblib


  ✅ Result saved for [SVM] (SMOTE)
  [DecisionTree] Loading trained model for evaluation...
  Evaluating model [DecisionTree]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/XGBoost_with_sampling_SMOTE.joblib


  ✅ Result saved for [DecisionTree] (SMOTE)
  [XGBoost] Loading trained model for evaluation...
  Evaluating model [XGBoost]
  ✅ Result saved for [XGBoost] (SMOTE)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LDA_with_sampling_SMOTE.joblib


  [LDA] Loading trained model for evaluation...
  Evaluating model [LDA]
  ✅ Result saved for [LDA] (SMOTE)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/ANN_with_sampling_SMOTE.joblib


  [ANN] Loading trained model for evaluation...
  Evaluating model [ANN]
  ✅ Result saved for [ANN] (SMOTE)

=== Evaluating with Sampling: ADASYN ===


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_with_sampling_ADASYN.joblib


  [LogisticRegression] Loading trained model for evaluation...
  Evaluating model [LogisticRegression]
  ✅ Result saved for [LogisticRegression] (ADASYN)
  [KNN] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/KNN_with_sampling_ADASYN.joblib


  Evaluating model [KNN]
  ✅ Result saved for [KNN] (ADASYN)
  [RandomForest] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/RandomForest_with_sampling_ADASYN.joblib


  Evaluating model [RandomForest]
  ✅ Result saved for [RandomForest] (ADASYN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/SVM_with_sampling_ADASYN.joblib


  [SVM] Loading trained model for evaluation...
  Evaluating model [SVM]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/DecisionTree_with_sampling_ADASYN.joblib


  ✅ Result saved for [SVM] (ADASYN)
  [DecisionTree] Loading trained model for evaluation...
  Evaluating model [DecisionTree]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/XGBoost_with_sampling_ADASYN.joblib


  ✅ Result saved for [DecisionTree] (ADASYN)
  [XGBoost] Loading trained model for evaluation...
  Evaluating model [XGBoost]
  ✅ Result saved for [XGBoost] (ADASYN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LDA_with_sampling_ADASYN.joblib


  [LDA] Loading trained model for evaluation...
  Evaluating model [LDA]
  ✅ Result saved for [LDA] (ADASYN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/ANN_with_sampling_ADASYN.joblib


  [ANN] Loading trained model for evaluation...
  Evaluating model [ANN]
  ✅ Result saved for [ANN] (ADASYN)

=== Evaluating with Sampling: SMOTETomek ===


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_with_sampling_SMOTETomek.joblib


  [LogisticRegression] Loading trained model for evaluation...
  Evaluating model [LogisticRegression]
  ✅ Result saved for [LogisticRegression] (SMOTETomek)
  [KNN] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/KNN_with_sampling_SMOTETomek.joblib


  Evaluating model [KNN]
  ✅ Result saved for [KNN] (SMOTETomek)
  [RandomForest] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/RandomForest_with_sampling_SMOTETomek.joblib


  Evaluating model [RandomForest]
  ✅ Result saved for [RandomForest] (SMOTETomek)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/SVM_with_sampling_SMOTETomek.joblib


  [SVM] Loading trained model for evaluation...
  Evaluating model [SVM]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/DecisionTree_with_sampling_SMOTETomek.joblib


  ✅ Result saved for [SVM] (SMOTETomek)
  [DecisionTree] Loading trained model for evaluation...
  Evaluating model [DecisionTree]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/XGBoost_with_sampling_SMOTETomek.joblib


  ✅ Result saved for [DecisionTree] (SMOTETomek)
  [XGBoost] Loading trained model for evaluation...
  Evaluating model [XGBoost]
  ✅ Result saved for [XGBoost] (SMOTETomek)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LDA_with_sampling_SMOTETomek.joblib


  [LDA] Loading trained model for evaluation...
  Evaluating model [LDA]
  ✅ Result saved for [LDA] (SMOTETomek)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/ANN_with_sampling_SMOTETomek.joblib


  [ANN] Loading trained model for evaluation...
  Evaluating model [ANN]
  ✅ Result saved for [ANN] (SMOTETomek)

=== Evaluating with Sampling: SMOTEENN ===


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LogisticRegression_with_sampling_SMOTEENN.joblib


  [LogisticRegression] Loading trained model for evaluation...
  Evaluating model [LogisticRegression]
  ✅ Result saved for [LogisticRegression] (SMOTEENN)
  [KNN] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/KNN_with_sampling_SMOTEENN.joblib


  Evaluating model [KNN]
  ✅ Result saved for [KNN] (SMOTEENN)
  [RandomForest] Loading trained model for evaluation...


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/RandomForest_with_sampling_SMOTEENN.joblib


  Evaluating model [RandomForest]
  ✅ Result saved for [RandomForest] (SMOTEENN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/SVM_with_sampling_SMOTEENN.joblib


  [SVM] Loading trained model for evaluation...
  Evaluating model [SVM]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/DecisionTree_with_sampling_SMOTEENN.joblib


  ✅ Result saved for [SVM] (SMOTEENN)
  [DecisionTree] Loading trained model for evaluation...
  Evaluating model [DecisionTree]


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/XGBoost_with_sampling_SMOTEENN.joblib


  ✅ Result saved for [DecisionTree] (SMOTEENN)
  [XGBoost] Loading trained model for evaluation...
  Evaluating model [XGBoost]
  ✅ Result saved for [XGBoost] (SMOTEENN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/LDA_with_sampling_SMOTEENN.joblib


  [LDA] Loading trained model for evaluation...
  Evaluating model [LDA]
  ✅ Result saved for [LDA] (SMOTEENN)


INFO:src.utils.model_saver:Model loaded: ../src/models/exploration_phase/ANN_with_sampling_SMOTEENN.joblib


  [ANN] Loading trained model for evaluation...
  Evaluating model [ANN]
  ✅ Result saved for [ANN] (SMOTEENN)

CURRENT MODEL EVALUATION SUMMARY


Unnamed: 0,sampling_method,model,test_f1_macro
5,RandomOverSampler,XGBoost,0.92
13,SMOTE,XGBoost,0.91
29,SMOTETomek,XGBoost,0.91
21,ADASYN,XGBoost,0.9
23,ADASYN,ANN,0.89
37,SMOTEENN,XGBoost,0.89
7,RandomOverSampler,ANN,0.88
15,SMOTE,ANN,0.88
31,SMOTETomek,ANN,0.88
27,SMOTETomek,SVM,0.86



EVALUATION PHASE COMPLETED


## 4. GridSearch - Final run on 3 best models

- test with an without outlier removal
- GridSearchCV on same parameter spaces
- using common train test dataset
- RepeatedStratifiedKFold:
    - single CV split can be "lucky" or "unlucky" --> dependency how data is shuffled
    - repeating stratified k-fold with different shuffles averages out randomness
    - more stable, less noisy estimates of performance
- Implement Leak-free scaling
    - current: without pipeline: scale once on the full training set, then do CV on the already sclaed data --> scaler "saw" all CV folds, including each folds validation part --> data leakage
    - makes CV too optimisic?
    - pipeline fits the scaler only on each training fold, then applieos it to that folds validation split. 
    - solution: Pipeline(StandardScaler(), model) so scaling is fit per CV fold
- Try to optimize the signal - run best models with the optimized signal and new features
    - add RR-Interval as feature
    - add new Target "not_normal" in MIT to compare to PTB
    - baseline wandering removal
    - denoising
- target models. XGBoost, ANN, SVM