# 4 Find best "simple" model for given dataset + Feature Engineering

This part provides a pipeline for heartbeat classification based on the requirements from `notebooks/03_model_testing_example_mit.ipynb`.

### What the notebook does

- The notebook builds a leak-free pipeline per experiment:
  - [Optional] `StandardScaler` → [Optional] `Sampler` (e.g., SMOTE) → `Classifier`
  - This pipeline is passed into `GridSearchCV` with `RepeatedStratifiedKFold`.

- Data loading:
  - Loads preprocessed MIT-BIH training split (`X_train`, `y_train`) and a held-out validation split (`X_val`, `y_val`) from `data/processed/mitbih/`.
  - When `remove_outliers=True`, outlier removal is applied to the training split only; the validation split is never altered.
  - Applies Transformations: Baseline Wander Removal, Denoising

- Inside each CV fold (leak-free):
  1. Split the training data into `train_fold` and `val_fold` (internal to CV).
  2. Fit `StandardScaler` on `train_fold` only (If Scaling applies)
  3. Fit the `Sampler` (e.g., SMOTE) on `train_fold` only.
  4. Train the classifier on the resampled `train_fold`.
  5. Evaluate on the untouched `val_fold`.
  6. Repeat across folds; aggregate metrics and select best hyperparameters.

- After CV:
  - Refit the best pipeline on the full training data (`X_train`, `y_train`).
  - Evaluate on the held-out validation set (`X_val`, `y_val`) which was never sampled or transformed using training information.
  - Append results to `reports/03_model_testing_results/04_model_comparison_best_models.csv` and save the fitted model artifact.

### Explanation

- Sampling (oversampling/undersampling) is performed only after each fold’s split and only on the training fold within the CV loop.
- The validation fold in CV and the final held-out validation set are kept untouched, preventing information leakage.
- This follows best practices consistently recommended in the literature for imbalanced learning and model evaluation.
- Accordingly, reported performance (e.g., accuracy/F1) reflects a trustworthy estimate; high scores are plausible on MIT-BIH when methodology is correct.

### Minimal data-flow

1. Load processed MIT-BIH data:
   - `X_train`, `y_train` (base or with outlier removal applied to training only)
   - `X_val`, `y_val` (always untouched)
2. For each model/sampling setting:
   - Construct pipeline: `[Scaler?] -> [Sampler?] -> Classifier`
   - Run `GridSearchCV` with `RepeatedStratifiedKFold`
     - Per fold: fit scaler/sampler on `train_fold` only; score on `val_fold`
3. Select best params; refit on full `X_train`, `y_train`
4. Evaluate on `X_val`, `y_val`; log metrics and save model


## 1. Imports

In [None]:
import os
from pathlib import Path
import json
import warnings
from typing import Dict, List, Optional, Tuple, Union

# Add src to path
print(os.getcwd())

import pandas as pd
import numpy as np

# ML libraries
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, confusion_matrix
)
from sklearn import set_config

# Models
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

# Sampling
from imblearn.pipeline import Pipeline


# Custom utilities
from src.utils.preprocessing import (
    load_processed_dataset,
    DatasetSplit,
    build_full_suffix as pp_build_full_suffix,
    generate_all_processed_datasets,
    _normalize_sampling_method_name,
    _SAMPLING_REGISTRY
)
from src.utils.evaluation import evaluate_model
from src.utils.model_saver import create_model_saver

import numpy as np
from typing import Optional, Dict, Union, Tuple
from scipy.signal import butter, filtfilt, medfilt
import pywt
import os, json
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

from src.utils.model_saver import create_model_saver
from src.utils.preprocessing import (
    load_processed_dataset,
    build_full_suffix as pp_build_full_suffix,   # if not exported, import as in your notebook (pp_build_full_suffix alias)
    generate_all_processed_datasets,
)



ArrayLike = Union[np.ndarray, list]


# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

/home/christianm/Projects/Repos/heartbeat_classification


## 2. Constants & Param Spaces

In [2]:
# Paths and config
DATA_DIR = "data/processed/mitbih"
RANDOM_STATE = 42
SCORING = {'f1_macro': 'f1_macro', 'bal_acc': 'balanced_accuracy', 'f1_weighted': 'f1_weighted'}
results_csv = "reports/03_model_testing_results/04_02_model_comparison_grid_search_fe_best_models.csv"

PARAM_SPACES = {
    "XGBoost": {
        "estimator": xgb.XGBClassifier(
            objective="multi:softmax",
            num_class=5,
            random_state=RANDOM_STATE,
            n_jobs=-1,
            eval_metric="mlogloss",
        ),
        "params": {
            "n_estimators": [150, 200, 250],
            "max_depth": [8, 9],
            "learning_rate": [0.2],
            "subsample": [0.7, 0.8],
            "colsample_bytree": [0.9],
            "reg_alpha": [0.1, 0.2],
            "reg_lambda": [0.0, 0.05],
            "min_child_weight": [5],
            "gamma": [0.0, 0.05],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
    "ANN": {
        "estimator": MLPClassifier(
            max_iter=300,
            early_stopping=True,
            random_state=RANDOM_STATE,
            n_iter_no_change=10,
            solver="adam",
        ),
        "params": {
            "hidden_layer_sizes": [(128, 64)],
            "activation": ["relu"],
            "alpha": [3e-4],
            "learning_rate_init": [0.001, 0.0015],
            "batch_size": [96, 128],
            "beta_1": [0.9, 0.91],
            "beta_2": [0.97, 0.974],
            "validation_fraction": [0.1],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
    # best: {'clf__kernel': 'rbf', 'clf__gamma': 0,5, 'clf__C': 10}
    "SVM": {
        "estimator": SVC(),
        "params": {
            "kernel": ["rbf"],
            "C": [10],
            "gamma": [0.4, 0.5, 0.6],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
}

DATA_DIR = "data/processed/mitbih"


## 3. Methods used


- Baseline wander removal: removes slow drift (<0.5 Hz) caused by respiration/motion so classifiers see a stable isoelectric line. Typical approach is a high‑pass filter around 0.5 Hz or subtracting a smoothed baseline (median/LP filter).
- Denoising: suppresses high‑frequency noise (muscle, mains, sensor). Typical approach is a low‑pass or band‑pass (e.g., 0.5–40 Hz) or wavelet soft‑thresholding.

In [3]:

# --------- Filter helpers ---------
def _butter_highpass(cut_hz: float, fs: float, order: int = 3) -> Tuple[np.ndarray, np.ndarray]:
    nyq = 0.5 * fs
    Wn = cut_hz / nyq
    Wn = min(max(Wn, 1e-6), 0.999999)  # clamp
    b, a = butter(order, Wn, btype="highpass")
    return b, a

def _butter_bandpass(low_hz: float, high_hz: float, fs: float, order: int = 4) -> Tuple[np.ndarray, np.ndarray]:
    nyq = 0.5 * fs
    low = max(low_hz / nyq, 1e-6)
    high = min(high_hz / nyq, 0.999999)
    if not (low < high):
        # fallback to low-pass if bounds cross
        b, a = butter(order, high, btype="lowpass")
    else:
        b, a = butter(order, [low, high], btype="bandpass")
    return b, a

# --------- Baseline wander removal ---------
def remove_baseline(
    x: ArrayLike,
    fs: float = 125.0,
    method: str = "highpass",
    hp_cut_hz: float = 0.5,
    hp_order: int = 3,
    median_win_sec: float = 0.6,   # for 'median' method ~ respiratory baseline
    median_smooth_sec: float = 0.2 # pre-smoothing for robustness
) -> np.ndarray:
    """
    x: 1-D array (single beat) or 2-D array (n_samples, n_points)
    method: 'highpass' or 'median'
    """
    X = np.asarray(x, dtype=float)
    if X.ndim == 1:
        X = X[None, :]

    X_out = np.empty_like(X)

    if method.lower() == "highpass":
        b, a = _butter_highpass(hp_cut_hz, fs, order=hp_order)
        for i in range(X.shape[0]):
            xi = X[i]
            # zero-mean before filtering helps with stability on short segments
            xi_d = xi - np.mean(xi)
            X_out[i] = filtfilt(b, a, xi_d, method="gust")
    else:
        # Two-stage median baseline estimate (popular ECG heuristic)
        k1 = max(3, int(round(median_smooth_sec * fs)) | 1)  # odd
        k2 = max(3, int(round(median_win_sec * fs)) | 1)
        for i in range(X.shape[0]):
            xi = X[i]
            smooth = medfilt(xi, kernel_size=k1)
            baseline = medfilt(smooth, kernel_size=k2)
            X_out[i] = xi - baseline

    return X_out.squeeze()

# --------- Denoising ---------
def denoise_signal(
    x: ArrayLike,
    fs: float = 125.0,
    method: str = "bandpass",
    bp_low_hz: float = 0.5,
    bp_high_hz: float = 40.0,
    bp_order: int = 4,
    wavelet: str = "db6",
    wavelet_level: Optional[int] = None,
    wavelet_mode: str = "soft",
) -> np.ndarray:
    """
    method: 'bandpass' (0.5-40 Hz typical ECG) or 'wavelet'
    """
    X = np.asarray(x, dtype=float)
    if X.ndim == 1:
        X = X[None, :]

    X_out = np.empty_like(X)

    if method.lower() == "bandpass":
        b, a = _butter_bandpass(bp_low_hz, bp_high_hz, fs, order=bp_order)
        for i in range(X.shape[0]):
            xi = X[i]
            X_out[i] = filtfilt(b, a, xi, method="gust")
    else:
        # Wavelet soft-thresholding (well-suited for ECG)
        for i in range(X.shape[0]):
            xi = X[i]
            coeffs = pywt.wavedec(xi, wavelet=wavelet, level=wavelet_level)
            # Universal threshold based on detail coeffs at finest scale
            detail = coeffs[-1]
            sigma = np.median(np.abs(detail)) / 0.6745 + 1e-12
            thr = sigma * np.sqrt(2 * np.log(len(xi)))
            coeffs_th = [coeffs[0]] + [pywt.threshold(c, thr, mode=wavelet_mode) for c in coeffs[1:]]
            X_out[i] = pywt.waverec(coeffs_th, wavelet=wavelet)[: len(xi)]

    return X_out.squeeze()

# --------- Combined ---------
def preprocess_ecg_segments(
    X: np.ndarray,
    fs: float = 125.0,
    baseline_cfg: Optional[Dict] = None,
    denoise_cfg: Optional[Dict] = None,
) -> np.ndarray:
    """
    Apply baseline removal then denoising to a batch of ECG segments.
    Pass per-step configs like:
      baseline_cfg={'method':'highpass', 'hp_cut_hz':0.5}
      denoise_cfg={'method':'bandpass', 'bp_low_hz':0.5, 'bp_high_hz':40}
    """
    Xp = np.asarray(X, dtype=float)

    if baseline_cfg is not None:
        Xp = remove_baseline(Xp, fs=fs, **baseline_cfg)

    if denoise_cfg is not None:
        Xp = denoise_signal(Xp, fs=fs, **denoise_cfg)

    return Xp

In [4]:
# Pretty, Jupyter-native diagram (works in notebooks)
def show_pipeline_diagram(pipe: Pipeline) -> None:
    set_config(display="diagram")
    display(pipe)  # Jupyter display


def create_leak_free_pipeline(
    model_name: str,
    estimator,
    sampling_method: Optional[str] = "none",
    sampler_kwargs: Optional[Dict] = None,
    random_state: Optional[int] = 42,
) -> Pipeline:
    """
    Build a leak-free pipeline:
    - Using imblearn.Pipeline ensures fit/transform of SAMPLER happen within each CV fold on TRAIN only.
    """
    sampler_kwargs = dict(sampler_kwargs or {})

    # Provide a default random_state to samplers if not overridden
    if random_state is not None and "random_state" not in sampler_kwargs:
        sampler_kwargs["random_state"] = random_state

    internal_name = _normalize_sampling_method_name(sampling_method)

    steps = []

    SamplerClass = _SAMPLING_REGISTRY[internal_name]
    steps.append(("sampler", SamplerClass(**sampler_kwargs)))

    steps.append(("classifier", estimator))
    display(steps)
    return Pipeline(steps)


def prepare_dataset_with_sampling(
    data_dir: str = DATA_DIR,
    sampling_method: str = "No_Sampling",
    remove_outliers: bool = False
) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, Optional[np.ndarray]]:
    """Load an existing processed dataset for the given configuration.

    Datasets are assumed to be pre-generated by preprocessing utilities. This
    function never overwrites or generates new data; it only loads.
    """
    # Ensure all datasets are generated once (no-op if already done)
    generate_all_processed_datasets(data_dir=data_dir, only_once=True)

    full_suffix = pp_build_full_suffix(sampling_method, remove_outliers)
    split = load_processed_dataset(data_dir=data_dir, sampling_suffix=full_suffix)

    X_train_res = split.X_train.values
    y_train_res = split.y_train.values
    X_val = split.X_val.values if split.X_val is not None else None
    y_val = split.y_val.values if split.y_val is not None else None

    return X_train_res, X_val, y_train_res, y_val


def run_grid_search(
    model_name: str,
    sampling_method: str = "No_Sampling",
    remove_outliers: bool = False,
    model_saver=None,
    results_dir: str = "reports/comprehensive_model_testing"
) -> Dict:
    """
    Run GridSearchCV for a specific model and sampling method.
    
    Args:
        model_name: Name of the model to train
        sampling_method: Sampling method to use
        remove_outliers: Whether to remove outliers
        model_saver: Model saver instance
        results_dir: Directory to save results
        
    Returns:
        Dictionary with results
    """
    print(f"\n{'='*80}")
    print(f"Running GridSearchCV for {model_name} with {sampling_method}")
    print(f"Outlier removal: {remove_outliers}")
    print(f"{'='*80}")
    
    # Get model configuration
    model_config = PARAM_SPACES[model_name]
    estimator = model_config["estimator"]
    params = model_config["params"]
    cv = model_config["cv"]
    
    # Prepare data
    X_train, X_val, y_train, y_val = prepare_dataset_with_sampling(
        sampling_method="No_Sampling", # using non-sampled method for training - apply sampling inside pipeline
        remove_outliers=remove_outliers
    )

    baseline_cfg = {'method': 'highpass', 'hp_cut_hz': 0.5, 'hp_order': 3}
    denoise_cfg  = {'method': 'bandpass', 'bp_low_hz': 0.5, 'bp_high_hz': 40.0, 'bp_order': 4}

    fs = 125.0  # set your actual sampling rate if known
    # denoising
    X_train = preprocess_ecg_segments(X_train, fs=fs, baseline_cfg=baseline_cfg, denoise_cfg=denoise_cfg)
    X_val   = preprocess_ecg_segments(X_val,   fs=fs, baseline_cfg=baseline_cfg, denoise_cfg=denoise_cfg)

    
    # Create leak-free pipeline
    pipeline = create_leak_free_pipeline(model_name, estimator, sampling_method)
    
    # Adjust parameter names for pipeline
    pipeline_params = {}
    for param_name, param_values in params.items():
        pipeline_params[f'classifier__{param_name}'] = param_values
    
    # Create experiment name
    experiment_name = f"{sampling_method.lower()}_outliers_{remove_outliers}"
    
    # Check if model already exists
    if model_saver and model_saver.model_exists(model_name, experiment_name):
        print(f"Model {model_name} already exists for experiment {experiment_name}. Skipping training and CSV append.")
        return None
    else:
        print(f"Training new model for {model_name}...")
        
        # Run GridSearchCV
        grid_search = GridSearchCV(
            estimator=pipeline,
            param_grid=pipeline_params,
            scoring=SCORING,
            refit='f1_macro',
            cv=cv,
            n_jobs=-1,
            verbose=3
        )
        
        grid_search.fit(X_train, y_train)
        
        # Save model if saver is provided
        if model_saver:
            metadata = {
                'best_params': grid_search.best_params_,
                'best_score': grid_search.best_score_,
                'cv_results': grid_search.cv_results_,
                'experiment': experiment_name,
                'classifier': model_name,
                'sampling_method': sampling_method,
                'remove_outliers': remove_outliers,
            }
            model_saver.save_model(model_name, grid_search, experiment_name, metadata)
            print(f"Model {model_name} saved successfully!")
    
    print(f"Evaluating {model_name} on validation set...")
    best_model = grid_search.best_estimator_
    
    # For evaluation, we need to fit the model again since pipeline might not be fitted
    best_model.fit(X_train, y_train)
    
    # Get predictions
    y_pred = best_model.predict(X_val)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        y_val, y_pred, average='macro', zero_division=0
    )
    
    # Per-class metrics
    labels = np.unique(np.concatenate([y_train, y_val]))
    precision_per_class, recall_per_class, f1_per_class, support_per_class = precision_recall_fscore_support(
        y_val, y_pred, average=None, labels=labels, zero_division=0
    )
    
    confusion_mat = confusion_matrix(y_val, y_pred, labels=labels)
    
    results = {
        'model_name': model_name,
        'sampling_method': sampling_method,
        'remove_outliers': remove_outliers,
        'best_cv_score': grid_search.best_score_,
        'best_params': grid_search.best_params_,
        'validation_accuracy': accuracy,
        'validation_f1_macro': f1_macro,
        'validation_precision_macro': precision_macro,
        'validation_recall_macro': recall_macro,
        'validation_f1_per_class': f1_per_class,
        'validation_precision_per_class': precision_per_class,
        'validation_recall_per_class': recall_per_class,
        'validation_support_per_class': support_per_class,
        'confusion_matrix': confusion_mat,
        'labels': labels,
    }
    
    print(f"Validation F1-Macro: {f1_macro:.4f}")
    print(f"Validation Accuracy: {accuracy:.4f}")

    # Append to results CSV
    row = {
        'sampling_method': sampling_method,
        'outliers_removed': remove_outliers,
        'model': model_name,
        'test_accuracy': round(float(accuracy), 4),
        'test_f1_macro': round(float(f1_macro), 4),
        'best_cv_score': round(float(grid_search.best_score_), 4),
        'best_parameters': json.dumps(grid_search.best_params_),
    }
    # Add per-class F1 columns
    for lbl, f1 in zip(labels, f1_per_class):
        row[f'test_f1_cls_{lbl}'] = round(float(f1), 2)

    os.makedirs(os.path.dirname(results_csv), exist_ok=True)
    header = not os.path.exists(results_csv)
    pd.DataFrame([row]).to_csv(results_csv, mode='a', index=False, header=header)

    return results


## 4. Run

In [5]:
print("Starting Model Testing")
print("="*80)

print(os.getcwd())

# Initialize model saver
# Change to project root directory

model_saver = create_model_saver("src/models/best_simple_models_testing_fe")

# Define experiments to run
experiments = [
    # Without outlier removal
    
    #("XGBoost", "No_Sampling", False),
    #("ANN", "No_Sampling", False),
    #("SVM", "No_Sampling", False),
    
    # With outlier removal
    #("XGBoost", "No_Sampling", True),
    #("ANN", "No_Sampling", True),
    #("SVM", "No_Sampling", True),
    
    # With sampling (no outlier removal)
    #("XGBoost", "SMOTE", False),
    #("ANN", "SMOTE", False),
    ("SVM", "SMOTE", False),
    
    # With sampling (with outlier removal)
    #("XGBoost", "SMOTE", True),
    #("ANN", "SMOTE", True),
    ("SVM", "SMOTE", True),
]

# Run experiments
all_results = []

for model_name, sampling_method, remove_outliers in experiments:
    try:
        result = run_grid_search(
            model_name=model_name,
            sampling_method=sampling_method,
            remove_outliers=remove_outliers,
            model_saver=model_saver
        )
        if result is not None:  # Only append if result is not None
            all_results.append(result)
    except Exception as e:
        print(f"Error running {model_name} with {sampling_method}: {e}")
        continue

Starting Model Testing
/home/christianm/Projects/Repos/heartbeat_classification

Running GridSearchCV for SVM with SMOTE
Outlier removal: False
Loading processed X_train dataset from: data/processed/mitbih/X_train.csv
Loading processed y_train dataset from: data/processed/mitbih/y_train.csv


[('sampler', SMOTE(random_state=42)), ('classifier', SVC())]

model_path=PosixPath('src/models/best_simple_models_testing_fe/SVM_smote_outliers_False.joblib')
Training new model for SVM...
Fitting 15 folds for each of 3 candidates, totalling 45 fits
[CV 15/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.899) f1_macro: (test=0.887) f1_weighted: (test=0.978) total time=65.0min
[CV 9/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.890) f1_macro: (test=0.891) f1_weighted: (test=0.979) total time=65.3min
[CV 6/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.894) f1_macro: (test=0.883) f1_weighted: (test=0.977) total time=65.5min
[CV 14/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.895) f1_macro: (test=0.884) f1_weighted: (test=0.976) total time=66.7min
[CV 12/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.875) f1_macro: (test=0.883) f1_weight

INFO:src.utils.model_saver:Model saved: src/models/best_simple_models_testing_fe/SVM_smote_outliers_False.joblib
INFO:src.utils.model_saver:Metadata saved: src/models/best_simple_models_testing_fe/SVM_smote_outliers_False_metadata.pkl


Model SVM saved successfully!
Evaluating SVM on validation set...
Validation F1-Macro: 0.8941
Validation Accuracy: 0.9797

Running GridSearchCV for SVM with SMOTE
Outlier removal: True
Loading processed X_train dataset from: data/processed/mitbih/X_train_olr.csv
Loading processed y_train dataset from: data/processed/mitbih/y_train_olr.csv


[('sampler', SMOTE(random_state=42)), ('classifier', SVC())]

model_path=PosixPath('src/models/best_simple_models_testing_fe/SVM_smote_outliers_True.joblib')
Training new model for SVM...
Fitting 15 folds for each of 3 candidates, totalling 45 fits
[CV 7/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.904) f1_macro: (test=0.897) f1_weighted: (test=0.978) total time=60.9min
[CV 14/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.902) f1_macro: (test=0.895) f1_weighted: (test=0.979) total time=62.3min
[CV 6/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.884) f1_macro: (test=0.894) f1_weighted: (test=0.979) total time=62.6min
[CV 3/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.903) f1_macro: (test=0.886) f1_weighted: (test=0.978) total time=62.8min
[CV 11/15] END classifier__C=10, classifier__gamma=0.4, classifier__kernel=rbf; bal_acc: (test=0.891) f1_macro: (test=0.887) f1_weighted

INFO:src.utils.model_saver:Model saved: src/models/best_simple_models_testing_fe/SVM_smote_outliers_True.joblib
INFO:src.utils.model_saver:Metadata saved: src/models/best_simple_models_testing_fe/SVM_smote_outliers_True_metadata.pkl


Model SVM saved successfully!
Evaluating SVM on validation set...
Validation F1-Macro: 0.8902
Validation Accuracy: 0.9784


In [7]:
print(f"\n{'='*100}")
print("BEST OVERALL RESULT (FROM ALL RUNS)")
print(f"{'='*100}")

# Load existing results from CSV to find truly best overall result
if os.path.exists(results_csv):
    df_all_results = pd.read_csv(results_csv)

    if len(df_all_results) > 0:
        best_idx = df_all_results['test_f1_macro'].idxmax()
        best_result = df_all_results.loc[best_idx]
        print(f"Best overall model: {best_result['model']}")
        print(f"Sampling Method: {best_result['sampling_method']}")
        print(f"Validation (test_*) F1-Macro: {best_result['test_f1_macro']:.4f}")
        print(f"Validation (test_*) Accuracy: {best_result['test_accuracy']:.4f}")
        print(f"Best CV Score: {best_result['best_cv_score']:.4f}")
        print(f"Best Parameters: {best_result['best_parameters']}")
    else:
        print("No valid results found in existing CSV.")
else:
    print("No existing results CSV found.")


BEST OVERALL RESULT (FROM ALL RUNS)
Best overall model: XGBoost
Sampling Method: SMOTE
Validation (test_*) F1-Macro: 0.9108
Validation (test_*) Accuracy: 0.9837
Best CV Score: 0.9131
Best Parameters: {"classifier__colsample_bytree": 0.9, "classifier__gamma": 0.05, "classifier__learning_rate": 0.2, "classifier__max_depth": 9, "classifier__min_child_weight": 5, "classifier__n_estimators": 250, "classifier__reg_alpha": 0.1, "classifier__reg_lambda": 0.05, "classifier__subsample": 0.7}


### 5. Script to re-generate results based on created models

In [None]:
# The experiments you previously created (match what you trained)
experiments = [
    # Without outlier removal
    #("XGBoost", "No_Sampling", False),
    #("ANN", "No_Sampling", False),
    #("SVM", "No_Sampling", False),

    # With outlier removal
    #("XGBoost", "No_Sampling", True),
    #("ANN", "No_Sampling", True),
    #("SVM", "No_Sampling", True),

    # With sampling (no outlier removal)
    #("XGBoost", "SMOTE", False),
    #("ANN", "SMOTE", False),
    #("SVM", "SMOTE", False),

    # With sampling (with outlier removal)
    #("XGBoost", "SMOTE", True),
    #("ANN", "SMOTE", True),
    #("SVM", "SMOTE", True),
]

# Create/access model saver in the same location as before
model_saver = create_model_saver("src/models/best_simple_models_testing")

def prepare_no_leak_data(remove_outliers: bool):
    """
    Load base processed data (no sampling) for training and validation.
    Sampling (if any) is inside the saved pipeline, so we use unsampled data.
    """
    generate_all_processed_datasets(data_dir=DATA_DIR, only_once=True)
    full_suffix = pp_build_full_suffix("No_Sampling", remove_outliers)
    split = load_processed_dataset(data_dir=DATA_DIR, sampling_suffix=full_suffix)

    X_train = split.X_train.values
    y_train = split.y_train.values
    X_val = split.X_val.values if split.X_val is not None else None
    y_val = split.y_val.values if split.y_val is not None else None
    return X_train, y_train, X_val, y_val

def evaluate_and_append(model_name: str, sampling_method: str, remove_outliers: bool):
    """
    Load saved GridSearchCV, re-fit best pipeline on full train, evaluate on held-out val,
    and append a row with per-class F1 to the CSV with the standardized schema.
    """
    experiment_name = f"{sampling_method.lower()}_outliers_{remove_outliers}"
    if not model_saver.model_exists(model_name, experiment_name):
        print(f"Skipping {model_name} / {experiment_name}: model not found.")
        return

    # Load model (GridSearchCV)
    gs = model_saver.load_model(model_name, experiment_name)
    best_model = gs.best_estimator_

    # Prepare data
    X_train, y_train, X_val, y_val = prepare_no_leak_data(remove_outliers)
    if X_val is None or y_val is None:
        print(f"No validation split found for {model_name} / {experiment_name}; writing row without test_* metrics.")
        row = {
            "sampling_method": sampling_method,
            "model": model_name,
            "test_accuracy": None,
            "test_f1_macro": None,
            "best_cv_score": round(float(gs.best_score_), 4),
            "best_parameters": json.dumps(gs.best_params_),
        }
        os.makedirs(os.path.dirname(results_csv), exist_ok=True)
        header = not os.path.exists(results_csv)
        pd.DataFrame([row]).to_csv(results_csv, mode="a", index=False, header=header)
        return

    # Re-fit on full training set and evaluate on held-out validation
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        y_val, y_pred, average="macro", zero_division=0
    )

    labels = np.unique(np.concatenate([y_train, y_val]))
    precision_per_class, recall_per_class, f1_per_class, support_per_class = precision_recall_fscore_support(
        y_val, y_pred, average=None, labels=labels, zero_division=0
    )
    _ = confusion_matrix(y_val, y_pred, labels=labels)  # kept if you want to save later

    # Append row in standardized schema (matches model_comparison_with_sampling_randomized_search.csv)
    row = {
        "sampling_method": sampling_method,
        "model": model_name,
        "test_accuracy": round(float(accuracy), 4),
        "test_f1_macro": round(float(f1_macro), 4),
        "best_cv_score": round(float(gs.best_score_), 4),
        "best_parameters": json.dumps(gs.best_params_),
    }
    for lbl, f1 in zip(labels, f1_per_class):
        row[f"test_f1_cls_{lbl}"] = round(float(f1), 2)

    os.makedirs(os.path.dirname(results_csv), exist_ok=True)
    header = not os.path.exists(results_csv)
    pd.DataFrame([row]).to_csv(results_csv, mode="a", index=False, header=header)
    print(f"Wrote result for {model_name} / {experiment_name}")

# Run re-evaluation for all saved experiments
for model_name, sampling_method, remove_outliers in experiments:
    evaluate_and_append(model_name, sampling_method, remove_outliers)

print("Done rebuilding CSV.")