# 4 Find best "simple" model for given dataset

This part provides a pipeline for heartbeat classification based on the requirements from `notebooks/03_model_testing_example_mit.ipynb`.

### What the notebook does

- The notebook builds a leak-free pipeline per experiment:
  - [Optional] `StandardScaler` → [Optional] `Sampler` (e.g., SMOTE) → `Classifier`
  - This pipeline is passed into `GridSearchCV` with `RepeatedStratifiedKFold`.

- Data loading:
  - Loads preprocessed MIT-BIH training split (`X_train`, `y_train`) and a held-out validation split (`X_val`, `y_val`) from `data/processed/mitbih/`.
  - When `remove_outliers=True`, outlier removal is applied to the training split only; the validation split is never altered.

- Inside each CV fold (leak-free):
  1. Split the training data into `train_fold` and `val_fold` (internal to CV).
  2. Fit `StandardScaler` on `train_fold` only (If Scaling applies)
  3. Fit the `Sampler` (e.g., SMOTE) on `train_fold` only.
  4. Train the classifier on the resampled `train_fold`.
  5. Evaluate on the untouched `val_fold`.
  6. Repeat across folds; aggregate metrics and select best hyperparameters.

- After CV:
  - Refit the best pipeline on the full training data (`X_train`, `y_train`).
  - Evaluate on the held-out validation set (`X_val`, `y_val`) which was never sampled or transformed using training information.
  - Append results to `reports/03_model_testing_results/04_model_comparison_best_models.csv` and save the fitted model artifact.

### Explanation

- Sampling (oversampling/undersampling) is performed only after each fold’s split and only on the training fold within the CV loop.
- The validation fold in CV and the final held-out validation set are kept untouched, preventing information leakage.
- This follows best practices consistently recommended in the literature for imbalanced learning and model evaluation.
- Accordingly, reported performance (e.g., accuracy/F1) reflects a trustworthy estimate; high scores are plausible on MIT-BIH when methodology is correct.

### Minimal data-flow

1. Load processed MIT-BIH data:
   - `X_train`, `y_train` (base or with outlier removal applied to training only)
   - `X_val`, `y_val` (always untouched)
2. For each model/sampling setting:
   - Construct pipeline: `[Scaler?] -> [Sampler?] -> Classifier`
   - Run `GridSearchCV` with `RepeatedStratifiedKFold`
     - Per fold: fit scaler/sampler on `train_fold` only; score on `val_fold`
3. Select best params; refit on full `X_train`, `y_train`
4. Evaluate on `X_val`, `y_val`; log metrics and save model


## 1. Imports

In [None]:
import os
from pathlib import Path
import json
import warnings
from typing import Dict, List, Optional, Tuple, Union

# Add src to path
print(os.getcwd())

import pandas as pd
import numpy as np

# ML libraries
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, confusion_matrix
)
from sklearn import set_config

# Models
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

# Sampling
from imblearn.pipeline import Pipeline


# Custom utilities
from src.utils.preprocessing import (
    load_processed_dataset,
    DatasetSplit,
    build_full_suffix as pp_build_full_suffix,
    generate_all_processed_datasets,
    _normalize_sampling_method_name,
    _SAMPLING_REGISTRY
)
from src.utils.evaluation import evaluate_model
from src.utils.model_saver import create_model_saver

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

/home/christianm/Projects/Repos/heartbeat_classification


## 2. Constants & Param Spaces

In [None]:
RANDOM_STATE = 42
SCORING = {'f1_macro': 'f1_macro', 'bal_acc': 'balanced_accuracy', 'f1_weighted': 'f1_weighted'}
results_csv = "reports/03_model_testing_results/04_01_model_comparison_grid_search_best_models.csv"
DATA_DIR = "data/processed/mitbih"

PARAM_SPACES = {
    "XGBoost": {
        "estimator": xgb.XGBClassifier(
            objective="multi:softmax",
            num_class=5,
            random_state=RANDOM_STATE,
            n_jobs=-1,
            eval_metric="mlogloss",
        ),
        "params": {
            "n_estimators": [150, 200, 250, 350, 500],
            "max_depth": [8, 9],
            "learning_rate": [0.2],
            "subsample": [0.7, 0.8],
            "colsample_bytree": [0.9],
            "reg_alpha": [0.1, 0.2],
            "reg_lambda": [0.0, 0.05],
            "min_child_weight": [5],
            "gamma": [0.0, 0.05],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
    "ANN": {
        "estimator": MLPClassifier(
            max_iter=300,
            early_stopping=True,
            random_state=RANDOM_STATE,
            n_iter_no_change=10,
            solver="adam",
        ),
        "params": {
            "hidden_layer_sizes": [(128, 64)],
            "activation": ["relu"],
            "alpha": [3e-4],
            "learning_rate_init": [0.001, 0.0015],
            "batch_size": [96, 128],
            "beta_1": [0.9, 0.91],
            "beta_2": [0.97, 0.974],
            "validation_fraction": [0.1],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
    # best: {'clf__kernel': 'rbf', 'clf__gamma': 0,5, 'clf__C': 10}
    "SVM": {
        "estimator": SVC(),
        "params": {
            "kernel": ["rbf"],
            "C": [10],
            "gamma": [0.4, 0.5, 0.6],
        },
        "cv": RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE),
    },
}




## 3. Methods used

In [4]:
# Pretty, Jupyter-native diagram (works in notebooks)
def show_pipeline_diagram(pipe: Pipeline) -> None:
    set_config(display="diagram")
    display(pipe)  # Jupyter display


def create_leak_free_pipeline(
    model_name: str,
    estimator,
    sampling_method: Optional[str] = "none",
    sampler_kwargs: Optional[Dict] = None,
    random_state: Optional[int] = 42,
) -> Pipeline:
    """
    Build a leak-free pipeline:
    - Using imblearn.Pipeline ensures fit/transform of SAMPLER happen within each CV fold on TRAIN only.
    """
    sampler_kwargs = dict(sampler_kwargs or {})

    # Provide a default random_state to samplers if not overridden
    if random_state is not None and "random_state" not in sampler_kwargs:
        sampler_kwargs["random_state"] = random_state

    internal_name = _normalize_sampling_method_name(sampling_method)

    steps = []

    SamplerClass = _SAMPLING_REGISTRY[internal_name]
    steps.append(("sampler", SamplerClass(**sampler_kwargs)))

    steps.append(("classifier", estimator))
    display(steps)
    return Pipeline(steps)


def prepare_dataset_with_sampling(
    data_dir: str = DATA_DIR,
    sampling_method: str = "No_Sampling",
    remove_outliers: bool = False
) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, Optional[np.ndarray]]:
    """Load an existing processed dataset for the given configuration.

    Datasets are assumed to be pre-generated by preprocessing utilities. This
    function never overwrites or generates new data; it only loads.
    """
    # Ensure all datasets are generated once (no-op if already done)
    generate_all_processed_datasets(data_dir=data_dir, only_once=True)

    full_suffix = pp_build_full_suffix(sampling_method, remove_outliers)
    split = load_processed_dataset(data_dir=data_dir, sampling_suffix=full_suffix)

    X_train_res = split.X_train.values
    y_train_res = split.y_train.values
    X_val = split.X_val.values if split.X_val is not None else None
    y_val = split.y_val.values if split.y_val is not None else None

    return X_train_res, X_val, y_train_res, y_val


def run_grid_search(
    model_name: str,
    sampling_method: str = "No_Sampling",
    remove_outliers: bool = False,
    model_saver=None,
    results_dir: str = "reports/comprehensive_model_testing"
) -> Dict:
    """
    Run GridSearchCV for a specific model and sampling method.
    
    Args:
        model_name: Name of the model to train
        sampling_method: Sampling method to use
        remove_outliers: Whether to remove outliers
        model_saver: Model saver instance
        results_dir: Directory to save results
        
    Returns:
        Dictionary with results
    """
    print(f"\n{'='*80}")
    print(f"Running GridSearchCV for {model_name} with {sampling_method}")
    print(f"Outlier removal: {remove_outliers}")
    print(f"{'='*80}")
    
    # Get model configuration
    model_config = PARAM_SPACES[model_name]
    estimator = model_config["estimator"]
    params = model_config["params"]
    cv = model_config["cv"]
    
    # Prepare data
    X_train, X_val, y_train, y_val = prepare_dataset_with_sampling(
        sampling_method="No_Sampling", # using non-sampled method for training - apply sampling inside pipeline
        remove_outliers=remove_outliers
    )
    
    # Create leak-free pipeline
    pipeline = create_leak_free_pipeline(model_name, estimator, sampling_method)
    
    # Adjust parameter names for pipeline
    pipeline_params = {}
    for param_name, param_values in params.items():
        pipeline_params[f'classifier__{param_name}'] = param_values
    
    # Create experiment name
    experiment_name = f"{sampling_method.lower()}_outliers_{remove_outliers}"
    
    # Check if model already exists
    if model_saver and model_saver.model_exists(model_name, experiment_name):
        print(f"Model {model_name} already exists for experiment {experiment_name}. Skipping training and CSV append.")
        return None
    else:
        print(f"Training new model for {model_name}...")
        
        # Run GridSearchCV
        grid_search = GridSearchCV(
            estimator=pipeline,
            param_grid=pipeline_params,
            scoring=SCORING,
            refit='f1_macro',
            cv=cv,
            n_jobs=-1,
            verbose=3
        )
        
        grid_search.fit(X_train, y_train)
        
        # Save model if saver is provided
        if model_saver:
            metadata = {
                'best_params': grid_search.best_params_,
                'best_score': grid_search.best_score_,
                'cv_results': grid_search.cv_results_,
                'experiment': experiment_name,
                'classifier': model_name,
                'sampling_method': sampling_method,
                'remove_outliers': remove_outliers,
            }
            model_saver.save_model(model_name, grid_search, experiment_name, metadata)
            print(f"Model {model_name} saved successfully!")
    
    print(f"Evaluating {model_name} on validation set...")
    best_model = grid_search.best_estimator_
    
    # For evaluation, we need to fit the model again since pipeline might not be fitted
    best_model.fit(X_train, y_train)
    
    # Get predictions
    y_pred = best_model.predict(X_val)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        y_val, y_pred, average='macro', zero_division=0
    )
    
    # Per-class metrics
    labels = np.unique(np.concatenate([y_train, y_val]))
    precision_per_class, recall_per_class, f1_per_class, support_per_class = precision_recall_fscore_support(
        y_val, y_pred, average=None, labels=labels, zero_division=0
    )
    
    confusion_mat = confusion_matrix(y_val, y_pred, labels=labels)
    
    results = {
        'model_name': model_name,
        'sampling_method': sampling_method,
        'remove_outliers': remove_outliers,
        'best_cv_score': grid_search.best_score_,
        'best_params': grid_search.best_params_,
        'validation_accuracy': accuracy,
        'validation_f1_macro': f1_macro,
        'validation_precision_macro': precision_macro,
        'validation_recall_macro': recall_macro,
        'validation_f1_per_class': f1_per_class,
        'validation_precision_per_class': precision_per_class,
        'validation_recall_per_class': recall_per_class,
        'validation_support_per_class': support_per_class,
        'confusion_matrix': confusion_mat,
        'labels': labels,
    }
    
    print(f"Validation F1-Macro: {f1_macro:.4f}")
    print(f"Validation Accuracy: {accuracy:.4f}")

    # Append to results CSV
    row = {
        'sampling_method': sampling_method,
        'outliers_removed': remove_outliers,
        'model': model_name,
        'test_accuracy': round(float(accuracy), 4),
        'test_f1_macro': round(float(f1_macro), 4),
        'best_cv_score': round(float(grid_search.best_score_), 4),
        'best_parameters': json.dumps(grid_search.best_params_),
    }
    # Add per-class F1 columns
    for lbl, f1 in zip(labels, f1_per_class):
        row[f'test_f1_cls_{lbl}'] = round(float(f1), 2)

    os.makedirs(os.path.dirname(results_csv), exist_ok=True)
    header = not os.path.exists(results_csv)
    pd.DataFrame([row]).to_csv(results_csv, mode='a', index=False, header=header)

    return results


## 4. Run

In [None]:
print("Starting Model Testing")
print("="*80)

print(os.getcwd())

# Initialize model saver
# Change to project root directory

model_saver = create_model_saver("src/models/best_simple_models_testing")

# Define experiments to run
experiments = [
    # Without outlier removal
    
    #("XGBoost", "No_Sampling", False),
    #("ANN", "No_Sampling", False),
    #("SVM", "No_Sampling", False), # left out sampling without SMOTE - computation time very long
    
    # With outlier removal
    #("XGBoost", "No_Sampling", True),
    #("ANN", "No_Sampling", True),
    #("SVM", "No_Sampling", True), # left out sampling without SMOTE - computation time very long
    
    # With sampling (no outlier removal)
    ("XGBoost", "SMOTE", False),
    #("ANN", "SMOTE", False),
    #("SVM", "SMOTE", False),
    
    # With sampling (with outlier removal)
    #("XGBoost", "SMOTE", True),
    #("ANN", "SMOTE", True),
    #("SVM", "SMOTE", True),
]

# Run experiments
all_results = []

for model_name, sampling_method, remove_outliers in experiments:
    try:
        result = run_grid_search(
            model_name=model_name,
            sampling_method=sampling_method,
            remove_outliers=remove_outliers,
            model_saver=model_saver
        )
        if result is not None:  # Only append if result is not None
            all_results.append(result)
    except Exception as e:
        print(f"Error running {model_name} with {sampling_method}: {e}")
        continue

In [6]:
print(f"\n{'='*100}")
print("BEST OVERALL RESULT (FROM ALL RUNS)")
print(f"{'='*100}")

# Load existing results from CSV to find truly best overall result
existing_csv = results_csv
if os.path.exists(existing_csv):
    df_all_results = pd.read_csv(existing_csv)

    if len(df_all_results) > 0:
        best_idx = df_all_results['test_f1_macro'].idxmax()
        best_result = df_all_results.loc[best_idx]
        print(f"Best overall model: {best_result['model']}")
        print(f"Sampling Method: {best_result['sampling_method']}")
        print(f"Test F1-Macro: {best_result['test_f1_macro']:.4f}")
        print(f"Test Accuracy: {best_result['test_accuracy']:.4f}")
        print(f"Best CV Score: {best_result['best_cv_score']}")
        print(f"Best Parameters: {best_result['best_parameters']}")
    else:
        print("No valid results found in existing CSV.")
else:
    print("No existing results CSV found.")


BEST OVERALL RESULT (FROM ALL RUNS)
Best overall model: XGBoost
Sampling Method: SMOTE
Test F1-Macro: 0.9717
Test Accuracy: 0.9773
Best CV Score: 0.9642
Best Parameters: {"classifier__colsample_bytree": 0.9, "classifier__gamma": 0.05, "classifier__learning_rate": 0.1, "classifier__max_depth": 9, "classifier__min_child_weight": 5, "classifier__n_estimators": 250, "classifier__reg_alpha": 0.1, "classifier__reg_lambda": 0.0, "classifier__subsample": 0.7}


### 5. Script to re-generate results based on created models

In [None]:
import os, json
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

from src.utils.model_saver import create_model_saver
from src.utils.preprocessing import (
    load_processed_dataset,
    build_full_suffix as pp_build_full_suffix,   # if not exported, import as in your notebook (pp_build_full_suffix alias)
    generate_all_processed_datasets,
)

# Paths and config
results_csv = "reports/03_model_testing_results/04_01_model_comparison_grid_search_best_models.csv"
DATA_DIR = "data/processed/mitbih"

# The experiments you previously created (match what you trained)
experiments = [
    # Without outlier removal
    ("XGBoost", "No_Sampling", False),
    ("ANN", "No_Sampling", False),
    #("SVM", "No_Sampling", False),

    # With outlier removal
    ("XGBoost", "No_Sampling", True),
    ("ANN", "No_Sampling", True),
    #("SVM", "No_Sampling", True),

    # With sampling (no outlier removal)
    ("XGBoost", "SMOTE", False),
    ("ANN", "SMOTE", False),
    ("SVM", "SMOTE", False),

    # With sampling (with outlier removal)
    ("XGBoost", "SMOTE", True),
    ("ANN", "SMOTE", True),
    ("SVM", "SMOTE", True),
]

# Create/access model saver in the same location as before
model_saver = create_model_saver("src/models/best_simple_models_testing")

def prepare_no_leak_data(remove_outliers: bool):
    """
    Load base processed data (no sampling) for training and validation.
    Sampling (if any) is inside the saved pipeline, so we use unsampled data.
    """
    generate_all_processed_datasets(data_dir=DATA_DIR, only_once=True)
    full_suffix = pp_build_full_suffix("No_Sampling", remove_outliers)
    split = load_processed_dataset(data_dir=DATA_DIR, sampling_suffix=full_suffix)

    X_train = split.X_train.values
    y_train = split.y_train.values
    X_val = split.X_val.values if split.X_val is not None else None
    y_val = split.y_val.values if split.y_val is not None else None
    return X_train, y_train, X_val, y_val

def evaluate_and_append(model_name: str, sampling_method: str, remove_outliers: bool):
    """
    Load saved GridSearchCV, re-fit best pipeline on full train, evaluate on held-out val,
    and append a row with per-class F1 to the CSV with the standardized schema.
    """
    experiment_name = f"{sampling_method.lower()}_outliers_{remove_outliers}"
    if not model_saver.model_exists(model_name, experiment_name):
        print(f"Skipping {model_name} / {experiment_name}: model not found.")
        return

    # Load model (GridSearchCV)
    gs = model_saver.load_model(model_name, experiment_name)
    best_model = gs.best_estimator_

    # Prepare data
    X_train, y_train, X_val, y_val = prepare_no_leak_data(remove_outliers)
    if X_val is None or y_val is None:
        print(f"No validation split found for {model_name} / {experiment_name}; writing row without test_* metrics.")
        row = {
            "sampling_method": sampling_method,
            "outliers_removed": remove_outliers,
            "model": model_name,
            "test_accuracy": None,
            "test_f1_macro": None,
            "best_cv_score": round(float(gs.best_score_), 4),
            "best_parameters": json.dumps(gs.best_params_),
        }
        os.makedirs(os.path.dirname(results_csv), exist_ok=True)
        header = not os.path.exists(results_csv)
        pd.DataFrame([row]).to_csv(results_csv, mode="a", index=False, header=header)
        return

    # Re-fit on full training set and evaluate on held-out validation
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        y_val, y_pred, average="macro", zero_division=0
    )

    labels = np.unique(np.concatenate([y_train, y_val]))
    precision_per_class, recall_per_class, f1_per_class, support_per_class = precision_recall_fscore_support(
        y_val, y_pred, average=None, labels=labels, zero_division=0
    )
    _ = confusion_matrix(y_val, y_pred, labels=labels)  # kept if you want to save later

    # Append row in standardized schema (matches model_comparison_with_sampling_randomized_search.csv)
    row = {
        "sampling_method": sampling_method,
        "model": model_name,
        "test_accuracy": round(float(accuracy), 4),
        "test_f1_macro": round(float(f1_macro), 4),
        "best_cv_score": round(float(gs.best_score_), 4),
        "best_parameters": json.dumps(gs.best_params_),
    }
    for lbl, f1 in zip(labels, f1_per_class):
        row[f"test_f1_cls_{lbl}"] = round(float(f1), 2)

    os.makedirs(os.path.dirname(results_csv), exist_ok=True)
    header = not os.path.exists(results_csv)
    pd.DataFrame([row]).to_csv(results_csv, mode="a", index=False, header=header)
    print(f"Wrote result for {model_name} / {experiment_name}")

# Run re-evaluation for all saved experiments
for model_name, sampling_method, remove_outliers in experiments:
    evaluate_and_append(model_name, sampling_method, remove_outliers)

print("Done rebuilding CSV.")

INFO:src.utils.model_saver:Model loaded: src/models/best_simple_models_testing/SVM_smote_outliers_False.joblib


model_path=PosixPath('src/models/best_simple_models_testing/SVM_smote_outliers_False.joblib')
Loading processed X_train dataset from: data/processed/mitbih/X_train.csv
Loading processed y_train dataset from: data/processed/mitbih/y_train.csv


INFO:src.utils.model_saver:Model loaded: src/models/best_simple_models_testing/SVM_smote_outliers_True.joblib


Wrote result for SVM / smote_outliers_False
model_path=PosixPath('src/models/best_simple_models_testing/SVM_smote_outliers_True.joblib')
Loading processed X_train dataset from: data/processed/mitbih/X_train_olr.csv
Loading processed y_train dataset from: data/processed/mitbih/y_train_olr.csv
Wrote result for SVM / smote_outliers_True
Done rebuilding CSV.


# 5. Graphics for GridSearch Run Overview

In [9]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.tri as tri
from pathlib import Path

# Import your existing utilities
from src.utils.model_saver import create_model_saver

# ---------------------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------------------
BASE_DIR = "src/models/best_simple_models_testing"
MODEL_NAME = "XGBoost"                     # e.g., "XGBoost", "ANN", "SVM"
EXPERIMENT_NAME = "smote_outliers_False"   # must match your saved model name

# ---------------------------------------------------------------------
# LOAD MODEL VIA YOUR MODELSAVER
# ---------------------------------------------------------------------
model_saver = create_model_saver(BASE_DIR)
gs = model_saver.load_model(MODEL_NAME, EXPERIMENT_NAME)

print(f"Loaded model type: {type(gs)}")
print(f"Best parameters: {gs.best_params_}")
print(f"Best CV score (refit metric): {gs.best_score_:.4f}")

# ---------------------------------------------------------------------
# LOAD CV RESULTS INTO DATAFRAME
# ---------------------------------------------------------------------
results_df = pd.DataFrame(gs.cv_results_)
print("\nAvailable test metrics:")
print([c for c in results_df.columns if c.startswith("mean_test_")])

# Ensure columns exist
if "mean_test_f1_macro" not in results_df.columns:
    raise KeyError("Missing column 'mean_test_f1_macro' in cv_results_. Check your SCORING dict.")
if "mean_test_bal_acc" not in results_df.columns:
    raise KeyError("Missing column 'mean_test_bal_acc' in cv_results_. Check your SCORING dict.")

# ---------------------------------------------------------------------
# PLOT 1: Performance trade-off
# ---------------------------------------------------------------------
plt.figure(figsize=(8, 6))
sc = plt.scatter(
    results_df["mean_test_bal_acc"],
    results_df["mean_test_f1_macro"],
    c=results_df["mean_fit_time"],
    cmap="viridis",
    alpha=0.8,
    edgecolor="k"
)
plt.xlabel("Balanced Accuracy")
plt.ylabel("F1 Macro")
plt.title(f"{MODEL_NAME}: Performance Trade-off")
plt.colorbar(sc, label="Fit Time (s)")
plt.grid(True)
plt.tight_layout()
plt.show()

# ---------------------------------------------------------------------
# PLOT 2: Parameter influence (example: n_estimators)
# ---------------------------------------------------------------------
param_col = "param_classifier__n_estimators"
if param_col in results_df.columns:
    plt.figure(figsize=(8, 6))
    sc = plt.scatter(
        results_df[param_col].astype(float),
        results_df["mean_test_f1_macro"],
        c=results_df["mean_fit_time"],
        cmap="plasma",
        alpha=0.8,
        edgecolor="k"
    )
    plt.xscale("log")
    plt.xlabel("n_estimators (log scale)")
    plt.ylabel("F1 Macro")
    plt.title(f"{MODEL_NAME}: Effect of n_estimators on F1 Macro")
    plt.colorbar(sc, label="Fit Time (s)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()
else:
    print(f"Parameter column '{param_col}' not found - skipping parameter influence plot.")


AttributeError: Can't get attribute '_PredictScorer' on <module 'sklearn.metrics._scorer' from '/home/christianm/Projects/Repos/heartbeat_classification/.venv/lib/python3.11/site-packages/sklearn/metrics/_scorer.py'>