In [1]:
import sys
sys.path.append('..')

In [2]:
import warnings
import multiprocessing
warnings.filterwarnings("ignore", category=ResourceWarning)

# Also suppress multiprocessing warnings
import sys
import os
os.environ['PYTHONWARNINGS'] = 'ignore::ResourceWarning'

# Data Loading

In [3]:
from pathlib import Path
import pandas as pd

def load_datasets(data_root: str | Path = "data",
                  tasks: tuple[str, ...] = ("binary", "multiclass"),
                  splits: tuple[str, ...] = ("train", "val", "test")) -> dict:

    data_root = Path(data_root)
    datasets  = {}

    for task in tasks:
        task_dir     = data_root / task
        task_dict    = {}

        for split in splits:
            split_dict = {}
            for kind in ("X", "y"):
                file_path = task_dir / f"{kind}_{split}.pkl"
                split_dict[kind] = pd.read_pickle(file_path)
            task_dict[split] = split_dict

        datasets[task] = task_dict

    return datasets

In [4]:
from typing import Tuple, Literal
import pandas as pd

def load_split(
    preprocessing_type: Literal["cleaned_only", "full_process"],
    sampling_method: Literal["undersampled", "oversampled"],
    classification_type: Literal["binary", "multiclass"]
) -> Tuple[
    Tuple[pd.DataFrame, pd.Series],  # train: (X_train, y_train)
    Tuple[pd.DataFrame, pd.Series],  # val: (X_val, y_val)
    Tuple[pd.DataFrame, pd.Series]   # test: (X_test, y_test)
]:
    """
    Load different types of splits from the data
    
    Args:
        preprocessing_type: must be "cleaned_only" or "full_process"
        sampling_method: must be "undersampled" or "oversampled"
        classification_type: must be "binary" or "multiclass"
    
    Returns:
        Tuple of (train, val, test) splits, where each split is (X, y)
        - train: (X_train, y_train)
        - val: (X_val, y_val)  
        - test: (X_test, y_test)
    """
    dataset = load_datasets(
        f"../data/{preprocessing_type}/{sampling_method}")[classification_type]
    split_names = ["train", "val", "test"]

    return tuple([(lambda split: (dataset[split]["X"], dataset[split]["y"]))(split) for split in split_names])

# Experiments

In [5]:
def combine_text(X):
    X = X.copy()

    combined = X["resume_text"].astype(
        str) + " [SEP] " + X["job_description_text"].astype(str)

    return combined.values

In [6]:
splits = load_split(preprocessing_type="cleaned_only", sampling_method="undersampled", classification_type="binary")

In [7]:
SEED = 42

# Experiment 1: Base Parameters

In [None]:
from utils import ExperimentManager, Experiment

manager = ExperimentManager(f"../runs/ensemble/optimization/multinomial", ["Fit", "Not Fit"])

In [9]:
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,StackingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.feature_extraction.text import TfidfVectorizer
from interpret.glassbox import ExplainableBoostingClassifier
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


def compute_cosine_similarity(X):
    """
    Compute cosine similarity between resume_text and job_description_text
    for each row in a DataFrame or compatible input.
    Returns a 2D NumPy array of shape (n_samples, 1).
    """

    # Defensive: ensure X is a DataFrame with expected columns
    if isinstance(X, np.ndarray):
        # If it's already an ndarray, we must know column order
        X = pd.DataFrame(X, columns=["resume_text", "job_description_text"])
    elif not isinstance(X, pd.DataFrame):
        raise ValueError("Input X must be a DataFrame or 2D ndarray.")

    if "resume_text" not in X.columns or "job_description_text" not in X.columns:
        raise ValueError("Expected columns 'resume_text' and 'job_description_text' not found.")

    # Flatten all text for vectorizer fit
    all_texts = X["resume_text"].astype(str).tolist() + X["job_description_text"].astype(str).tolist()
    
    # Fit vectorizer
    vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
    vectorizer.fit(all_texts)

    # Compute cosine similarity for each row
    cosine_scores = []
    for idx, row in X.iterrows():
        resume_text = str(row['resume_text'])
        job_text = str(row['job_description_text'])

        tfidf_matrix = vectorizer.transform([resume_text, job_text])
        cos_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
        cosine_scores.append(cos_sim)

    return np.array(cosine_scores).reshape(-1, 1)

def pipeline_factory(params):

    clf_lr = LogisticRegression(random_state=SEED)
    clf_rf = RandomForestClassifier(random_state=SEED)
    clf_nb = MultinomialNB()

    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ExplainableBoostingClassifier(random_state=SEED),
        cv=5,
        n_jobs=1
    )

    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', TfidfVectorizer()),
                ('selector', SelectKBest(chi2, k=100))
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])


experiment = Experiment(
    name=f"Baseline optimization EBM chi2 stack",
    description=f"No hyperparameter tuning yet but parameters are changed from defaults",
    pipeline_factory=pipeline_factory
)

manager.run_experiment(experiment, splits=splits)



=== Running Experiment: Baseline optimization EBM chi2 stack ===

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6855

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6845
   Micro F1:     0.6855
   Weighted F1:  0.6845

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6880  R: 0.6855
   Micro    - P: 0.6855  R: 0.6855
   Weighted - P: 0.6880  R: 0.6855

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6663     0.7433     0.7027        857
   Not Fit              0.7098     0.6278     0.6663        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6880     0.6855     0.6845       1714
   weighted avg         0.6880     0.6855     0.6845       1714

🔢 CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted →
   True ↓        Fit  Not Fi

<utils.ExperimentManger.Experiment at 0x7f9c2fff3b60>

# Experiment 2: Hyperparameter Optimization

## Conservative Parameter Space

In [10]:
def conservative_ensemble_param_space(trial):
    """
    More conservative parameter space with fewer options
    Good for faster optimization with reasonable performance
    Updated with proper MultinomialNB parameters
    """
    params = {}
    
    # TF-IDF - Limited options
    params['tfidf__ngram_range'] = trial.suggest_categorical('tfidf__ngram_range', [
        (1, 1), (1, 2)
    ])
    params['tfidf__max_features'] = trial.suggest_categorical('tfidf__max_features', [
        5000, 10000, 15000
    ])
    params['tfidf__sublinear_tf'] = trial.suggest_categorical('tfidf__sublinear_tf', [
        True, False
    ])
    
    # Feature Selection
    params['selector__k'] = trial.suggest_categorical('selector__k', [
        100, 500, 1000, 2000
    ])
    
    # Logistic Regression - Simple
    params['clf__estimators__lr__C'] = trial.suggest_float('clf__estimators__lr__C', 
                                                          0.1, 10.0, log=True)
    params['clf__estimators__lr__class_weight'] = trial.suggest_categorical('clf__estimators__lr__class_weight', [
        None, 'balanced'
    ])
    
    # Random Forest - Simple
    params['clf__estimators__rf__n_estimators'] = trial.suggest_categorical('clf__estimators__rf__n_estimators', [
        100, 200
    ])
    params['clf__estimators__rf__max_depth'] = trial.suggest_categorical('clf__estimators__rf__max_depth', [
        None, 10, 20
    ])
    
    # Multinomial Naive Bayes - Simple
    params['clf__estimators__nb__alpha'] = trial.suggest_float('clf__estimators__nb__alpha', 
                                                              0.1, 5.0, log=True)
    params['clf__estimators__nb__fit_prior'] = trial.suggest_categorical('clf__estimators__nb__fit_prior', [
        True, False
    ])
    
    # EBM - Simple
    params['clf__final_estimator__learning_rate'] = trial.suggest_float('clf__final_estimator__learning_rate', 
                                                                        0.01, 0.05)
    params['clf__final_estimator__interactions'] = trial.suggest_categorical('clf__final_estimator__interactions', [
        0, 3
    ])
    
    # Fixed parameters
    params['clf__estimators__lr__random_state'] = 42
    params['clf__estimators__rf__random_state'] = 42
    params['clf__final_estimator__random_state'] = 42
    params['clf__final_estimator__n_jobs'] = 1
    params['clf__estimators__rf__n_jobs'] = 1
    params['clf__n_jobs'] = 1
    
    return params

In [11]:
def pipeline_factory(params):
    """
    Properly handle nested parameters for ensemble pipeline
    """
    
    # ========== EXTRACT PARAMETERS FOR EACH COMPONENT ==========
    
    # TF-IDF parameters
    tfidf_params = {}
    for key, value in params.items():
        if key.startswith('tfidf__'):
            param_name = key.replace('tfidf__', '')
            tfidf_params[param_name] = value
    
    # Feature selector parameters
    selector_params = {}
    for key, value in params.items():
        if key.startswith('selector__'):
            param_name = key.replace('selector__', '')
            selector_params[param_name] = value
    
    # LogisticRegression parameters
    lr_params = {'random_state': SEED}
    for key, value in params.items():
        if key.startswith('clf__estimators__lr__'):
            param_name = key.replace('clf__estimators__lr__', '')
            lr_params[param_name] = value
    
    # RandomForest parameters
    rf_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__estimators__rf__'):
            param_name = key.replace('clf__estimators__rf__', '')
            rf_params[param_name] = value
    
    # NaiveBayes parameters
    nb_params = {}
    for key, value in params.items():
        if key.startswith('clf__estimators__nb__'):
            param_name = key.replace('clf__estimators__nb__', '')
            nb_params[param_name] = value
    
    # EBM parameters
    ebm_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__final_estimator__'):
            param_name = key.replace('clf__final_estimator__', '')
            ebm_params[param_name] = value
    
    # Stacking parameters
    stacking_params = {'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__') and '__' not in key.replace('clf__', ''):
            param_name = key.replace('clf__', '')
            stacking_params[param_name] = value
    
    # ========== CREATE COMPONENTS WITH PARAMETERS ==========
    
    # Create TF-IDF vectorizer
    tfidf = TfidfVectorizer(**tfidf_params)
    
    # Create feature selector
    if not selector_params:
        selector_params['k'] = 100  # Default value
    selector = SelectKBest(chi2, **selector_params)
    
    # Create base estimators
    try:
        clf_lr = LogisticRegression(**lr_params)
    except ValueError as e:
        print(f"LogReg parameter error: {e}")
        # Fallback to safe parameters
        clf_lr = LogisticRegression(random_state=SEED, C=1.0, penalty='l2', solver='lbfgs')
    
    clf_rf = RandomForestClassifier(**rf_params)
    clf_nb = MultinomialNB(**nb_params)
    
    # Create EBM meta-learner
    try:
        ebm = ExplainableBoostingClassifier(**ebm_params)
    except Exception as e:
        print(f"EBM parameter error: {e}")
        # Fallback to safe parameters
        ebm = ExplainableBoostingClassifier(random_state=SEED, n_jobs=1)
    
    # Create stacking classifier
    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ebm,
        **stacking_params
    )
    
    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', tfidf),
                ('selector', selector)
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])

optuna_kwargs = {
    "n_trials": 30,        # Increase from 5 to 20 for better optimization
    "cv_folds": 20,         # Reduce from 10 to 5 for faster training
    "scoring": "accuracy",
    "random_state": SEED,
    "optimise":True
}

experiment = Experiment(
    name=f"L1 Regularization optimization EBM chi2 stack",
    description=f"L1 focused hyperparameter tuning",
    pipeline_factory=pipeline_factory,
    param_space=conservative_ensemble_param_space 
)

manager.run_experiment(experiment, splits=splits, **optuna_kwargs)


=== Running Experiment: L1 Regularization optimization EBM chi2 stack ===


Hyperparameter Optimization (Custom Val Split):   0%|                                         | 0/30 [00:00<?, ?trial/s][I 2025-07-16 02:22:33,287] A new study created in memory with name: no-name-dd4feaea-9d3b-49c8-a431-b01ff5c94499
Hyperparameter Optimization (Custom Val Split):   3%| | 1/30 [00:41<20:05, 41.56s/trial, Train: 0.9580 | Val: 0.7086 | B[I 2025-07-16 02:23:14,852] Trial 0 finished with value: 0.7086021505376344 and parameters: {'tfidf__ngram_range': (1, 2), 'tfidf__max_features': 5000, 'tfidf__sublinear_tf': True, 'selector__k': 500, 'clf__estimators__lr__C': 3.3582287076226405, 'clf__estimators__lr__class_weight': 'balanced', 'clf__estimators__rf__n_estimators': 100, 'clf__estimators__rf__max_depth': None, 'clf__estimators__nb__alpha': 0.1813661200263177, 'clf__estimators__nb__fit_prior': True, 'clf__final_estimator__learning_rate': 0.04560130987670088, 'clf__final_estimator__interactions': 0}. Best is trial 0 with value: 0.7086021505376344.
Hyperparameter Optimization 


🎯 Optimization completed using Custom Val Split!
   Best score: 0.7484
   Total trials: 30
🔧 Training final model with best parameters...
✅ Training complete!
📊 Logging optimization summary...
✅ Optimization summary logged!

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6744

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6738
   Micro F1:     0.6744
   Weighted F1:  0.6738

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6758  R: 0.6744
   Micro    - P: 0.6744  R: 0.6744
   Weighted - P: 0.6758  R: 0.6744

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6909     0.6313     0.6598        857
   Not Fit              0.6606     0.7176     0.6879        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6758     0.6744     0.6738       1714
   weighted avg 

<utils.ExperimentManger.Experiment at 0x7f9c2dd29880>

## L1 Regularization Space

In [12]:
def l1_regularization_param_space(trial):
    """
    Parameter space focused on L1 regularization (Lasso)
    Uses liblinear/saga solvers that support L1
    Updated with proper MultinomialNB parameters
    """
    params = {}
    
    # ========== TF-IDF: AGGRESSIVE REGULARIZATION ==========
    params['tfidf__max_features'] = trial.suggest_categorical('tfidf__max_features', [
        1000, 2000, 3000, 5000
    ])
    params['tfidf__ngram_range'] = trial.suggest_categorical('tfidf__ngram_range', [
        (1, 1), (1, 2)
    ])
    params['tfidf__min_df'] = trial.suggest_categorical('tfidf__min_df', [
        3, 5, 10, 0.01, 0.02
    ])
    params['tfidf__max_df'] = trial.suggest_categorical('tfidf__max_df', [
        0.7, 0.8, 0.85
    ])
    params['tfidf__sublinear_tf'] = True
    params['tfidf__use_idf'] = True
    params['tfidf__stop_words'] = 'english'
    
    # ========== FEATURE SELECTION ==========
    params['selector__k'] = trial.suggest_categorical('selector__k', [
        50, 100, 200, 300, 500
    ])
    
    # ========== LOGISTIC REGRESSION: L1 PENALTY ONLY ==========
    params['clf__estimators__lr__penalty'] = 'l1'  # Fixed to L1
    params['clf__estimators__lr__solver'] = trial.suggest_categorical(
        'clf__estimators__lr__solver', ['liblinear', 'saga']
    )
    params['clf__estimators__lr__C'] = trial.suggest_float(
        'clf__estimators__lr__C', 0.001, 1.0, log=True
    )
    params['clf__estimators__lr__max_iter'] = 1000
    params['clf__estimators__lr__class_weight'] = 'balanced'
    
    # ========== RANDOM FOREST: PREVENT OVERFITTING ==========
    params['clf__estimators__rf__n_estimators'] = trial.suggest_categorical(
        'clf__estimators__rf__n_estimators', [50, 100, 150]
    )
    params['clf__estimators__rf__max_depth'] = trial.suggest_categorical(
        'clf__estimators__rf__max_depth', [3, 5, 7, 10]
    )
    params['clf__estimators__rf__min_samples_split'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_split', [10, 20, 50]
    )
    params['clf__estimators__rf__min_samples_leaf'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_leaf', [5, 10, 20]
    )
    params['clf__estimators__rf__max_features'] = trial.suggest_categorical(
        'clf__estimators__rf__max_features', ['sqrt', 'log2']
    )
    params['clf__estimators__rf__class_weight'] = 'balanced'
    params['clf__estimators__rf__bootstrap'] = True
    
    # ========== MULTINOMIAL NAIVE BAYES ==========
    params['clf__estimators__nb__alpha'] = trial.suggest_float(
        'clf__estimators__nb__alpha', 0.1, 10.0, log=True
    )
    params['clf__estimators__nb__fit_prior'] = trial.suggest_categorical(
        'clf__estimators__nb__fit_prior', [True, False]
    )
    # class_prior is typically left as None to learn from data
    # force_alpha parameter for handling zero probabilities
    params['clf__estimators__nb__force_alpha'] = trial.suggest_categorical(
        'clf__estimators__nb__force_alpha', [True, False]
    )
    
    # ========== EBM: CONSERVATIVE SETTINGS ==========
    params['clf__final_estimator__learning_rate'] = trial.suggest_float(
        'clf__final_estimator__learning_rate', 0.001, 0.02, log=True
    )
    params['clf__final_estimator__max_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__max_rounds', [500, 1000, 2000]
    )
    params['clf__final_estimator__early_stopping_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__early_stopping_rounds', [25, 50, 100]
    )
    params['clf__final_estimator__validation_size'] = 0.2
    params['clf__final_estimator__interactions'] = trial.suggest_categorical(
        'clf__final_estimator__interactions', [0, 1, 2]
    )
    params['clf__final_estimator__max_bins'] = trial.suggest_categorical(
        'clf__final_estimator__max_bins', [32, 64, 128]
    )
    
    # ========== STACKING ==========
    params['clf__cv'] = trial.suggest_categorical('clf__cv', [5, 7, 10])
    
    return params

In [13]:
def pipeline_factory(params):
    """
    Properly handle nested parameters for ensemble pipeline
    """
    
    # ========== EXTRACT PARAMETERS FOR EACH COMPONENT ==========
    
    # TF-IDF parameters
    tfidf_params = {}
    for key, value in params.items():
        if key.startswith('tfidf__'):
            param_name = key.replace('tfidf__', '')
            tfidf_params[param_name] = value
    
    # Feature selector parameters
    selector_params = {}
    for key, value in params.items():
        if key.startswith('selector__'):
            param_name = key.replace('selector__', '')
            selector_params[param_name] = value
    
    # LogisticRegression parameters
    lr_params = {'random_state': SEED}
    for key, value in params.items():
        if key.startswith('clf__estimators__lr__'):
            param_name = key.replace('clf__estimators__lr__', '')
            lr_params[param_name] = value
    
    # RandomForest parameters
    rf_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__estimators__rf__'):
            param_name = key.replace('clf__estimators__rf__', '')
            rf_params[param_name] = value
    
    # NaiveBayes parameters
    nb_params = {}
    for key, value in params.items():
        if key.startswith('clf__estimators__nb__'):
            param_name = key.replace('clf__estimators__nb__', '')
            nb_params[param_name] = value
    
    # EBM parameters
    ebm_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__final_estimator__'):
            param_name = key.replace('clf__final_estimator__', '')
            ebm_params[param_name] = value
    
    # Stacking parameters
    stacking_params = {'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__') and '__' not in key.replace('clf__', ''):
            param_name = key.replace('clf__', '')
            stacking_params[param_name] = value
    
    # ========== CREATE COMPONENTS WITH PARAMETERS ==========
    
    # Create TF-IDF vectorizer
    tfidf = TfidfVectorizer(**tfidf_params)
    
    # Create feature selector
    if not selector_params:
        selector_params['k'] = 100  # Default value
    selector = SelectKBest(chi2, **selector_params)
    
    # Create base estimators
    try:
        clf_lr = LogisticRegression(**lr_params)
    except ValueError as e:
        print(f"LogReg parameter error: {e}")
        # Fallback to safe parameters
        clf_lr = LogisticRegression(random_state=SEED, C=1.0, penalty='l2', solver='lbfgs')
    
    clf_rf = RandomForestClassifier(**rf_params)
    clf_nb = MultinomialNB(**nb_params)
    
    # Create EBM meta-learner
    try:
        ebm = ExplainableBoostingClassifier(**ebm_params)
    except Exception as e:
        print(f"EBM parameter error: {e}")
        # Fallback to safe parameters
        ebm = ExplainableBoostingClassifier(random_state=SEED, n_jobs=1)
    
    # Create stacking classifier
    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ebm,
        **stacking_params
    )
    
    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', tfidf),
                ('selector', selector)
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])

optuna_kwargs = {
    "n_trials": 30,        # Increase from 5 to 20 for better optimization
    "cv_folds": 20,         # Reduce from 10 to 5 for faster training
    "scoring": "accuracy",
    "random_state": SEED,
    "optimise":True
}

experiment = Experiment(
    name=f"L1 Regularization optimization EBM chi2 stack",
    description=f"L1 focused hyperparameter tuning",
    pipeline_factory=pipeline_factory,
    param_space=l1_regularization_param_space
)

manager.run_experiment(experiment, splits=splits, **optuna_kwargs)


=== Running Experiment: L1 Regularization optimization EBM chi2 stack ===


Hyperparameter Optimization (Custom Val Split):   0%|                                         | 0/30 [00:00<?, ?trial/s][I 2025-07-16 02:54:24,818] A new study created in memory with name: no-name-680c42c3-1310-4033-b2b1-c259c5695335
Hyperparameter Optimization (Custom Val Split):   3%| | 1/30 [00:39<19:13, 39.78s/trial, Train: 0.7004 | Val: 0.6742 | B[I 2025-07-16 02:55:04,596] Trial 0 finished with value: 0.6741935483870968 and parameters: {'tfidf__max_features': 3000, 'tfidf__ngram_range': (1, 1), 'tfidf__min_df': 0.01, 'tfidf__max_df': 0.85, 'selector__k': 500, 'clf__estimators__lr__solver': 'liblinear', 'clf__estimators__lr__C': 0.35372493610517736, 'clf__estimators__rf__n_estimators': 150, 'clf__estimators__rf__max_depth': 3, 'clf__estimators__rf__min_samples_split': 10, 'clf__estimators__rf__min_samples_leaf': 5, 'clf__estimators__rf__max_features': 'log2', 'clf__estimators__nb__alpha': 0.4677356556014939, 'clf__estimators__nb__fit_prior': True, 'clf__estimators__nb__force_alpha


🎯 Optimization completed using Custom Val Split!
   Best score: 0.7301
   Total trials: 30
🔧 Training final model with best parameters...
✅ Training complete!
📊 Logging optimization summary...
✅ Optimization summary logged!

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6867

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6856
   Micro F1:     0.6867
   Weighted F1:  0.6856

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6894  R: 0.6867
   Micro    - P: 0.6867  R: 0.6867
   Weighted - P: 0.6894  R: 0.6867

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6667     0.7468     0.7045        857
   Not Fit              0.7122     0.6266     0.6667        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6894     0.6867     0.6856       1714
   weighted avg 

<utils.ExperimentManger.Experiment at 0x7f9c2ddaea20>

## L2 Parameter Space

In [14]:

def l2_regularization_param_space(trial):
    """
    Parameter space focused on L2 regularization (Ridge)
    Uses lbfgs/saga solvers that work well with L2
    Updated with proper MultinomialNB parameters
    """
    params = {}
    
    # ========== TF-IDF: AGGRESSIVE REGULARIZATION ==========
    params['tfidf__max_features'] = trial.suggest_categorical('tfidf__max_features', [
        1000, 2000, 3000, 5000
    ])
    params['tfidf__ngram_range'] = trial.suggest_categorical('tfidf__ngram_range', [
        (1, 1), (1, 2)
    ])
    params['tfidf__min_df'] = trial.suggest_categorical('tfidf__min_df', [
        3, 5, 10, 0.01, 0.02
    ])
    params['tfidf__max_df'] = trial.suggest_categorical('tfidf__max_df', [
        0.7, 0.8, 0.85
    ])
    params['tfidf__sublinear_tf'] = True
    params['tfidf__use_idf'] = True
    params['tfidf__stop_words'] = 'english'
    
    # ========== FEATURE SELECTION ==========
    params['selector__k'] = trial.suggest_categorical('selector__k', [
        50, 100, 200, 300, 500
    ])
    
    # ========== LOGISTIC REGRESSION: L2 PENALTY ONLY ==========
    params['clf__estimators__lr__penalty'] = 'l2'  # Fixed to L2
    params['clf__estimators__lr__solver'] = trial.suggest_categorical(
        'clf__estimators__lr__solver', ['lbfgs', 'saga']
    )
    params['clf__estimators__lr__C'] = trial.suggest_float(
        'clf__estimators__lr__C', 0.001, 1.0, log=True
    )
    params['clf__estimators__lr__max_iter'] = 1000
    params['clf__estimators__lr__class_weight'] = 'balanced'
    
    # ========== RANDOM FOREST: PREVENT OVERFITTING ==========
    params['clf__estimators__rf__n_estimators'] = trial.suggest_categorical(
        'clf__estimators__rf__n_estimators', [50, 100, 150]
    )
    params['clf__estimators__rf__max_depth'] = trial.suggest_categorical(
        'clf__estimators__rf__max_depth', [3, 5, 7, 10]
    )
    params['clf__estimators__rf__min_samples_split'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_split', [10, 20, 50]
    )
    params['clf__estimators__rf__min_samples_leaf'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_leaf', [5, 10, 20]
    )
    params['clf__estimators__rf__max_features'] = trial.suggest_categorical(
        'clf__estimators__rf__max_features', ['sqrt', 'log2']
    )
    params['clf__estimators__rf__class_weight'] = 'balanced'
    params['clf__estimators__rf__bootstrap'] = True
    
    # ========== MULTINOMIAL NAIVE BAYES ==========
    params['clf__estimators__nb__alpha'] = trial.suggest_float(
        'clf__estimators__nb__alpha', 0.1, 10.0, log=True
    )
    params['clf__estimators__nb__fit_prior'] = trial.suggest_categorical(
        'clf__estimators__nb__fit_prior', [True, False]
    )
    # class_prior is typically left as None to learn from data
    # force_alpha parameter for handling zero probabilities
    params['clf__estimators__nb__force_alpha'] = trial.suggest_categorical(
        'clf__estimators__nb__force_alpha', [True, False]
    )
    
    # ========== EBM: CONSERVATIVE SETTINGS ==========
    params['clf__final_estimator__learning_rate'] = trial.suggest_float(
        'clf__final_estimator__learning_rate', 0.001, 0.02, log=True
    )
    params['clf__final_estimator__max_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__max_rounds', [500, 1000, 2000]
    )
    params['clf__final_estimator__early_stopping_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__early_stopping_rounds', [25, 50, 100]
    )
    params['clf__final_estimator__validation_size'] = 0.2
    params['clf__final_estimator__interactions'] = trial.suggest_categorical(
        'clf__final_estimator__interactions', [0, 1, 2]
    )
    params['clf__final_estimator__max_bins'] = trial.suggest_categorical(
        'clf__final_estimator__max_bins', [32, 64, 128]
    )
    
    # ========== STACKING ==========
    params['clf__cv'] = trial.suggest_categorical('clf__cv', [5, 7, 10])
    
    return params

In [15]:
def pipeline_factory(params):
    """
    Properly handle nested parameters for ensemble pipeline
    """
    
    # ========== EXTRACT PARAMETERS FOR EACH COMPONENT ==========
    
    # TF-IDF parameters
    tfidf_params = {}
    for key, value in params.items():
        if key.startswith('tfidf__'):
            param_name = key.replace('tfidf__', '')
            tfidf_params[param_name] = value
    
    # Feature selector parameters
    selector_params = {}
    for key, value in params.items():
        if key.startswith('selector__'):
            param_name = key.replace('selector__', '')
            selector_params[param_name] = value
    
    # LogisticRegression parameters
    lr_params = {'random_state': SEED}
    for key, value in params.items():
        if key.startswith('clf__estimators__lr__'):
            param_name = key.replace('clf__estimators__lr__', '')
            lr_params[param_name] = value
    
    # RandomForest parameters
    rf_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__estimators__rf__'):
            param_name = key.replace('clf__estimators__rf__', '')
            rf_params[param_name] = value
    
    # NaiveBayes parameters
    nb_params = {}
    for key, value in params.items():
        if key.startswith('clf__estimators__nb__'):
            param_name = key.replace('clf__estimators__nb__', '')
            nb_params[param_name] = value
    
    # EBM parameters
    ebm_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__final_estimator__'):
            param_name = key.replace('clf__final_estimator__', '')
            ebm_params[param_name] = value
    
    # Stacking parameters
    stacking_params = {'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__') and '__' not in key.replace('clf__', ''):
            param_name = key.replace('clf__', '')
            stacking_params[param_name] = value
    
    # ========== CREATE COMPONENTS WITH PARAMETERS ==========
    
    # Create TF-IDF vectorizer
    tfidf = TfidfVectorizer(**tfidf_params)
    
    # Create feature selector
    if not selector_params:
        selector_params['k'] = 100  # Default value
    selector = SelectKBest(chi2, **selector_params)
    
    # Create base estimators
    try:
        clf_lr = LogisticRegression(**lr_params)
    except ValueError as e:
        print(f"LogReg parameter error: {e}")
        # Fallback to safe parameters
        clf_lr = LogisticRegression(random_state=SEED, C=1.0, penalty='l2', solver='lbfgs')
    
    clf_rf = RandomForestClassifier(**rf_params)
    clf_nb = MultinomialNB(**nb_params)
    
    # Create EBM meta-learner
    try:
        ebm = ExplainableBoostingClassifier(**ebm_params)
    except Exception as e:
        print(f"EBM parameter error: {e}")
        # Fallback to safe parameters
        ebm = ExplainableBoostingClassifier(random_state=SEED, n_jobs=1)
    
    # Create stacking classifier
    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ebm,
        passthrough=True,
        **stacking_params
    )
    
    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', tfidf),
                ('selector', selector)
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])

optuna_kwargs = {
    "n_trials": 30,        # Increase from 5 to 20 for better optimization
    "cv_folds": 20,         # Reduce from 10 to 5 for faster training
    "scoring": "accuracy",
    "random_state": SEED,
    "optimise":True
}

experiment = Experiment(
    name=f"L2 Regularization optimization EBM chi2 stack",
    description=f"L2 focused hyperparameter tuning",
    pipeline_factory=pipeline_factory,
    param_space=l2_regularization_param_space
)

manager.run_experiment(experiment, splits=splits, **optuna_kwargs)


=== Running Experiment: L2 Regularization optimization EBM chi2 stack ===


Hyperparameter Optimization (Custom Val Split):   0%|                                         | 0/30 [00:00<?, ?trial/s][I 2025-07-16 03:11:30,424] A new study created in memory with name: no-name-2abe57f3-ab3c-46e4-a6d5-92d7788a3722
Hyperparameter Optimization (Custom Val Split):   3%| | 1/30 [01:05<31:45, 65.72s/trial, Train: 0.7286 | Val: 0.6839 | B[I 2025-07-16 03:12:36,145] Trial 0 finished with value: 0.6838709677419355 and parameters: {'tfidf__max_features': 3000, 'tfidf__ngram_range': (1, 1), 'tfidf__min_df': 3, 'tfidf__max_df': 0.8, 'selector__k': 200, 'clf__estimators__lr__solver': 'lbfgs', 'clf__estimators__lr__C': 0.007586638997371254, 'clf__estimators__rf__n_estimators': 100, 'clf__estimators__rf__max_depth': 7, 'clf__estimators__rf__min_samples_split': 50, 'clf__estimators__rf__min_samples_leaf': 20, 'clf__estimators__rf__max_features': 'log2', 'clf__estimators__nb__alpha': 0.3625454806484498, 'clf__estimators__nb__fit_prior': False, 'clf__estimators__nb__force_alpha': Tr


🎯 Optimization completed using Custom Val Split!
   Best score: 0.7403
   Total trials: 30
🔧 Training final model with best parameters...
✅ Training complete!
📊 Logging optimization summary...
✅ Optimization summary logged!

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6680

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6673
   Micro F1:     0.6680
   Weighted F1:  0.6673

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6695  R: 0.6680
   Micro    - P: 0.6680  R: 0.6680
   Weighted - P: 0.6695  R: 0.6680

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6538     0.7141     0.6827        857
   Not Fit              0.6851     0.6219     0.6520        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6695     0.6680     0.6673       1714
   weighted avg 

<utils.ExperimentManger.Experiment at 0x7f9d4f6b56a0>

## Elasticnet Parameter Space

In [16]:
def elasticnet_regularization_param_space(trial):
    """
    Parameter space focused on ElasticNet regularization
    Uses saga solver (only one that supports elasticnet)
    Updated with proper MultinomialNB parameters
    """
    params = {}
    
    # ========== TF-IDF: AGGRESSIVE REGULARIZATION ==========
    params['tfidf__max_features'] = trial.suggest_categorical('tfidf__max_features', [
        1000, 2000, 3000, 5000
    ])
    params['tfidf__ngram_range'] = trial.suggest_categorical('tfidf__ngram_range', [
        (1, 1), (1, 2)
    ])
    params['tfidf__min_df'] = trial.suggest_categorical('tfidf__min_df', [
        3, 5, 10, 0.01, 0.02
    ])
    params['tfidf__max_df'] = trial.suggest_categorical('tfidf__max_df', [
        0.7, 0.8, 0.85
    ])
    params['tfidf__sublinear_tf'] = True
    params['tfidf__use_idf'] = True
    params['tfidf__stop_words'] = 'english'
    
    # ========== FEATURE SELECTION ==========
    params['selector__k'] = trial.suggest_categorical('selector__k', [
        50, 100, 200, 300, 500
    ])
    
    # ========== LOGISTIC REGRESSION: ELASTICNET PENALTY ==========
    params['clf__estimators__lr__penalty'] = 'elasticnet'  # Fixed to elasticnet
    params['clf__estimators__lr__solver'] = 'saga'  # Only solver that supports elasticnet
    params['clf__estimators__lr__C'] = trial.suggest_float(
        'clf__estimators__lr__C', 0.001, 1.0, log=True
    )
    params['clf__estimators__lr__l1_ratio'] = trial.suggest_float(
        'clf__estimators__lr__l1_ratio', 0.1, 0.9
    )
    params['clf__estimators__lr__max_iter'] = 2000  # ElasticNet may need more iterations
    params['clf__estimators__lr__class_weight'] = 'balanced'
    
    # ========== RANDOM FOREST: PREVENT OVERFITTING ==========
    params['clf__estimators__rf__n_estimators'] = trial.suggest_categorical(
        'clf__estimators__rf__n_estimators', [50, 100, 150]
    )
    params['clf__estimators__rf__max_depth'] = trial.suggest_categorical(
        'clf__estimators__rf__max_depth', [3, 5, 7, 10]
    )
    params['clf__estimators__rf__min_samples_split'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_split', [10, 20, 50]
    )
    params['clf__estimators__rf__min_samples_leaf'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_leaf', [5, 10, 20]
    )
    params['clf__estimators__rf__max_features'] = trial.suggest_categorical(
        'clf__estimators__rf__max_features', ['sqrt', 'log2']
    )
    params['clf__estimators__rf__class_weight'] = 'balanced'
    params['clf__estimators__rf__bootstrap'] = True
    
    # ========== MULTINOMIAL NAIVE BAYES ==========
    params['clf__estimators__nb__alpha'] = trial.suggest_float(
        'clf__estimators__nb__alpha', 0.1, 10.0, log=True
    )
    params['clf__estimators__nb__fit_prior'] = trial.suggest_categorical(
        'clf__estimators__nb__fit_prior', [True, False]
    )
    # class_prior is typically left as None to learn from data
    # force_alpha parameter for handling zero probabilities
    params['clf__estimators__nb__force_alpha'] = trial.suggest_categorical(
        'clf__estimators__nb__force_alpha', [True, False]
    )
    
    # ========== EBM: CONSERVATIVE SETTINGS ==========
    params['clf__final_estimator__learning_rate'] = trial.suggest_float(
        'clf__final_estimator__learning_rate', 0.001, 0.02, log=True
    )
    params['clf__final_estimator__max_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__max_rounds', [500, 1000, 2000]
    )
    params['clf__final_estimator__early_stopping_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__early_stopping_rounds', [25, 50, 100]
    )
    params['clf__final_estimator__validation_size'] = 0.2
    params['clf__final_estimator__interactions'] = trial.suggest_categorical(
        'clf__final_estimator__interactions', [0, 1, 2]
    )
    params['clf__final_estimator__max_bins'] = trial.suggest_categorical(
        'clf__final_estimator__max_bins', [32, 64, 128]
    )
    
    # ========== STACKING ==========
    params['clf__cv'] = trial.suggest_categorical('clf__cv', [5, 7, 10])
    
    return params


In [17]:
def pipeline_factory(params):
    """
    Properly handle nested parameters for ensemble pipeline
    """
    
    # ========== EXTRACT PARAMETERS FOR EACH COMPONENT ==========
    
    # TF-IDF parameters
    tfidf_params = {}
    for key, value in params.items():
        if key.startswith('tfidf__'):
            param_name = key.replace('tfidf__', '')
            tfidf_params[param_name] = value
    
    # Feature selector parameters
    selector_params = {}
    for key, value in params.items():
        if key.startswith('selector__'):
            param_name = key.replace('selector__', '')
            selector_params[param_name] = value
    
    # LogisticRegression parameters
    lr_params = {'random_state': SEED}
    for key, value in params.items():
        if key.startswith('clf__estimators__lr__'):
            param_name = key.replace('clf__estimators__lr__', '')
            lr_params[param_name] = value
    
    # RandomForest parameters
    rf_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__estimators__rf__'):
            param_name = key.replace('clf__estimators__rf__', '')
            rf_params[param_name] = value
    
    # NaiveBayes parameters
    nb_params = {}
    for key, value in params.items():
        if key.startswith('clf__estimators__nb__'):
            param_name = key.replace('clf__estimators__nb__', '')
            nb_params[param_name] = value
    
    # EBM parameters
    ebm_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__final_estimator__'):
            param_name = key.replace('clf__final_estimator__', '')
            ebm_params[param_name] = value
    
    # Stacking parameters
    stacking_params = {'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__') and '__' not in key.replace('clf__', ''):
            param_name = key.replace('clf__', '')
            stacking_params[param_name] = value
    
    # ========== CREATE COMPONENTS WITH PARAMETERS ==========
    
    # Create TF-IDF vectorizer
    tfidf = TfidfVectorizer(**tfidf_params)
    
    # Create feature selector
    if not selector_params:
        selector_params['k'] = 100  # Default value
    selector = SelectKBest(chi2, **selector_params)
    
    # Create base estimators
    try:
        clf_lr = LogisticRegression(**lr_params)
    except ValueError as e:
        print(f"LogReg parameter error: {e}")
        # Fallback to safe parameters
        clf_lr = LogisticRegression(random_state=SEED, C=1.0, penalty='l2', solver='lbfgs')
    
    clf_rf = RandomForestClassifier(**rf_params)
    clf_nb = MultinomialNB(**nb_params)
    
    # Create EBM meta-learner
    try:
        ebm = ExplainableBoostingClassifier(**ebm_params)
    except Exception as e:
        print(f"EBM parameter error: {e}")
        # Fallback to safe parameters
        ebm = ExplainableBoostingClassifier(random_state=SEED, n_jobs=1)
    
    # Create stacking classifier
    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ebm,
        **stacking_params
    )
    
    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', tfidf),
                ('selector', selector)
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])

optuna_kwargs = {
    "n_trials": 30,        # Increase from 5 to 20 for better optimization
    "cv_folds": 20,         # Reduce from 10 to 5 for faster training
    "scoring": "accuracy",
    "random_state": SEED,
    "optimise":True
}

experiment = Experiment(
    name=f"ElasticNet Regularization optimization EBM chi2 stack",
    description=f"ElasticNet focused hyperparameter tuning",
    pipeline_factory=pipeline_factory,
    param_space=elasticnet_regularization_param_space
)

manager.run_experiment(experiment, splits=splits, **optuna_kwargs)


=== Running Experiment: ElasticNet Regularization optimization EBM chi2 stack ===


Hyperparameter Optimization (Custom Val Split):   0%|                                         | 0/30 [00:00<?, ?trial/s][I 2025-07-16 04:29:35,048] A new study created in memory with name: no-name-611e9c5f-fb1d-489f-a7c0-1eb9bf56d631
Hyperparameter Optimization (Custom Val Split):   3%| | 1/30 [00:22<10:56, 22.65s/trial, Train: 0.7066 | Val: 0.6699 | B[I 2025-07-16 04:29:57,701] Trial 0 finished with value: 0.6698924731182796 and parameters: {'tfidf__max_features': 3000, 'tfidf__ngram_range': (1, 1), 'tfidf__min_df': 3, 'tfidf__max_df': 0.85, 'selector__k': 200, 'clf__estimators__lr__C': 0.006309383527266579, 'clf__estimators__lr__l1_ratio': 0.3627443373646766, 'clf__estimators__rf__n_estimators': 100, 'clf__estimators__rf__max_depth': 7, 'clf__estimators__rf__min_samples_split': 50, 'clf__estimators__rf__min_samples_leaf': 20, 'clf__estimators__rf__max_features': 'sqrt', 'clf__estimators__nb__alpha': 5.326266514341449, 'clf__estimators__nb__fit_prior': True, 'clf__estimators__nb__forc


🎯 Optimization completed using Custom Val Split!
   Best score: 0.7172
   Total trials: 30
🔧 Training final model with best parameters...




✅ Training complete!
📊 Logging optimization summary...
✅ Optimization summary logged!

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6692

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6691
   Micro F1:     0.6692
   Weighted F1:  0.6691

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6693  R: 0.6692
   Micro    - P: 0.6692  R: 0.6692
   Weighted - P: 0.6693  R: 0.6692

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6651     0.6814     0.6732        857
   Not Fit              0.6734     0.6569     0.6651        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6693     0.6692     0.6691       1714
   weighted avg         0.6693     0.6692     0.6691       1714

🔢 CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted →
   True 

<utils.ExperimentManger.Experiment at 0x7f9d4f604710>

In [18]:
manager.compare_experiments()

dir = "../experiment_summaries/ensemble_optimization"
filename  = "ensemble_optimization_summaries.csv"

manager.export_experiment_summary(dir, filename)


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Baseline optimization EBM chi2 stack 0.6855       ✅ Completed
L1 Regularization optimization EBM chi2 stack 0.6744       ✅ Completed
L1 Regularization optimization EBM chi2 stack 0.6867       ✅ Completed
L2 Regularization optimization EBM chi2 stack 0.6680       ✅ Completed
ElasticNet Regularization optimization EBM chi2 stack 0.6692       ✅ Completed
📊 Experiment summary exported to: ensemble_optimization_summaries.csv


Unnamed: 0,name,status,timestamp,description,test_accuracy,test_macro_precision,test_macro_recall,test_macro_f1,test_micro_precision,test_micro_recall,test_micro_f1,test_weighted_precision,test_weighted_recall,test_weighted_f1,test_num_samples,test_num_classes
0,Baseline optimization EBM chi2 stack,Completed,2025-07-16T02:22:33.214350,No hyperparameter tuning yet but parameters ar...,0.685531,0.68804,0.685531,0.684478,0.685531,0.685531,0.685531,0.68804,0.685531,0.684478,1714,2
1,L1 Regularization optimization EBM chi2 stack,Completed,2025-07-16T02:54:24.744163,L1 focused hyperparameter tuning,0.674446,0.675756,0.674446,0.673838,0.674446,0.674446,0.674446,0.675756,0.674446,0.673838,1714,2
2,L1 Regularization optimization EBM chi2 stack,Completed,2025-07-16T03:11:30.387340,L1 focused hyperparameter tuning,0.686698,0.689434,0.686698,0.685562,0.686698,0.686698,0.686698,0.689434,0.686698,0.685562,1714,2
3,L2 Regularization optimization EBM chi2 stack,Completed,2025-07-16T04:29:34.834919,L2 focused hyperparameter tuning,0.668028,0.669468,0.668028,0.667321,0.668028,0.668028,0.668028,0.669468,0.668028,0.667321,1714,2
4,ElasticNet Regularization optimization EBM chi...,Completed,2025-07-16T04:49:40.700575,ElasticNet focused hyperparameter tuning,0.669195,0.669297,0.669195,0.669145,0.669195,0.669195,0.669195,0.669297,0.669195,0.669145,1714,2


### Comprehensive L2

In [23]:
def fixed_comprehensive_l2_space(trial):
    """
    Fixed comprehensive L2 regularization parameter space.
    
    Fixes:
    - Missing bootstrap parameter
    - Invalid parameter combinations  
    - Reduces excessive parameter space
    - Removes non-existent parameters
    """
    params = {}
    
    # ========== TF-IDF: COMPREHENSIVE BUT VALID ==========
    
    # Vocabulary size - reasonable range
    params['tfidf__max_features'] = trial.suggest_categorical('tfidf__max_features', [
        1000, 2000, 3000, 5000, 7500  # Reduced from 9 to 5 options
    ])
    
    # N-gram ranges - valid combinations
    params['tfidf__ngram_range'] = trial.suggest_categorical('tfidf__ngram_range', [
        (1, 1), (1, 2), (1, 3)  # Removed problematic (2,2) and (2,3)
    ])
    
    # Document frequency - focused range
    params['tfidf__min_df'] = trial.suggest_categorical('tfidf__min_df', [
        2, 3, 5, 10, 0.01, 0.02  # Reduced from 13 to 6 options
    ])
    
    params['tfidf__max_df'] = trial.suggest_categorical('tfidf__max_df', [
        0.7, 0.8, 0.85, 0.9  # Reduced from 8 to 4 options
    ])
    
    # Advanced TF-IDF settings
    params['tfidf__sublinear_tf'] = trial.suggest_categorical('tfidf__sublinear_tf', [
        True, False
    ])
    
    params['tfidf__use_idf'] = trial.suggest_categorical('tfidf__use_idf', [
        True, False
    ])
    
    params['tfidf__stop_words'] = trial.suggest_categorical('tfidf__stop_words', [
        'english', None
    ])
    
    params['tfidf__norm'] = trial.suggest_categorical('tfidf__norm', [
        'l1', 'l2'  # Removed None to ensure normalization
    ])
    
    params['tfidf__binary'] = trial.suggest_categorical('tfidf__binary', [
        True, False
    ])
    
    # ========== FEATURE SELECTION ==========
    params['selector__k'] = trial.suggest_categorical('selector__k', [
        50, 100, 200, 300, 500, 750  # Reduced from 12 to 6 options
    ])
    
    # ========== LOGISTIC REGRESSION: FIXED L2 ==========
    
    params['clf__estimators__lr__penalty'] = 'l2'
    
    # Solvers - removed problematic 'sag' 
    params['clf__estimators__lr__solver'] = trial.suggest_categorical(
        'clf__estimators__lr__solver', [
            'lbfgs', 'saga', 'newton-cg'  # Removed 'sag' for stability
        ]
    )
    
    params['clf__estimators__lr__C'] = trial.suggest_float(
        'clf__estimators__lr__C', 1e-4, 10.0, log=True  # Reduced range
    )
    
    params['clf__estimators__lr__max_iter'] = trial.suggest_categorical(
        'clf__estimators__lr__max_iter', [1000, 2000]  # Reduced options
    )
    
    params['clf__estimators__lr__class_weight'] = trial.suggest_categorical(
        'clf__estimators__lr__class_weight', ['balanced', None]
    )
    
    params['clf__estimators__lr__tol'] = trial.suggest_float(
        'clf__estimators__lr__tol', 1e-5, 1e-3, log=True
    )
    
    # ========== RANDOM FOREST: FIXED BOOTSTRAP ISSUES ==========
    
    params['clf__estimators__rf__n_estimators'] = trial.suggest_categorical(
        'clf__estimators__rf__n_estimators', [100, 150, 200]  # Reduced options
    )
    
    params['clf__estimators__rf__max_depth'] = trial.suggest_categorical(
        'clf__estimators__rf__max_depth', [5, 10, 15, None]  # Reduced options
    )
    
    # Fixed criterion - removed 'log_loss'
    params['clf__estimators__rf__criterion'] = trial.suggest_categorical(
        'clf__estimators__rf__criterion', ['gini', 'entropy']
    )
    
    params['clf__estimators__rf__min_samples_split'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_split', [10, 20, 50]
    )
    
    params['clf__estimators__rf__min_samples_leaf'] = trial.suggest_categorical(
        'clf__estimators__rf__min_samples_leaf', [5, 10, 20]
    )
    
    params['clf__estimators__rf__max_features'] = trial.suggest_categorical(
        'clf__estimators__rf__max_features', ['sqrt', 'log2', None]
    )
    
    # FIXED: Bootstrap parameter (was missing!)
    bootstrap_enabled = trial.suggest_categorical(
        'clf__estimators__rf__bootstrap', [True, False]
    )
    params['clf__estimators__rf__bootstrap'] = bootstrap_enabled
    
    # max_samples only valid when bootstrap=True
    if bootstrap_enabled:
        params['clf__estimators__rf__max_samples'] = trial.suggest_categorical(
            'clf__estimators__rf__max_samples', [None, 0.8, 0.9]
        )
    else:
        params['clf__estimators__rf__max_samples'] = None
    
    params['clf__estimators__rf__class_weight'] = trial.suggest_categorical(
        'clf__estimators__rf__class_weight', ['balanced', None]  # Removed 'balanced_subsample'
    )
    
    # OOB disabled as requested
    params['clf__estimators__rf__oob_score'] = False
    
    # ========== NAIVE BAYES: FIXED PARAMETERS ==========
    
    params['clf__estimators__nb__alpha'] = trial.suggest_float(
        'clf__estimators__nb__alpha', 0.1, 10.0, log=True  # Reduced range
    )
    
    params['clf__estimators__nb__fit_prior'] = trial.suggest_categorical(
        'clf__estimators__nb__fit_prior', [True, False]
    )
    
    # REMOVED: force_alpha doesn't exist in sklearn
    
    # ========== EBM: REASONABLE PARAMETERS ==========
    
    params['clf__final_estimator__learning_rate'] = trial.suggest_float(
        'clf__final_estimator__learning_rate', 0.001, 0.05, log=True
    )
    
    params['clf__final_estimator__max_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__max_rounds', [1000, 2000, 3000]  # Reduced options
    )
    
    params['clf__final_estimator__early_stopping_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__early_stopping_rounds', [50, 100, 150]
    )
    
    params['clf__final_estimator__validation_size'] = trial.suggest_float(
        'clf__final_estimator__validation_size', 0.15, 0.25
    )
    
    params['clf__final_estimator__interactions'] = trial.suggest_categorical(
        'clf__final_estimator__interactions', [0, 1, 2]  # Reduced from 5 options
    )
    
    params['clf__final_estimator__max_bins'] = trial.suggest_categorical(
        'clf__final_estimator__max_bins', [64, 128, 256]  # Reduced options
    )
    
    # EBM regularization
    params['clf__final_estimator__outer_bags'] = trial.suggest_categorical(
        'clf__final_estimator__outer_bags', [8, 12, 16]
    )
    
    params['clf__final_estimator__inner_bags'] = trial.suggest_categorical(
        'clf__final_estimator__inner_bags', [0, 4]
    )
    
    params['clf__final_estimator__smoothing_rounds'] = trial.suggest_categorical(
        'clf__final_estimator__smoothing_rounds', [0, 100]
    )
    
    params['clf__final_estimator__max_leaves'] = trial.suggest_categorical(
        'clf__final_estimator__max_leaves', [3, 4]
    )
    
    # ========== STACKING: REASONABLE CV ==========
    
    params['clf__cv'] = trial.suggest_categorical('clf__cv', [
        10, 15  # Reduced from 4 to 2 options (20 is too high)
    ])
    
    # Fixed stacking method
    params['clf__stack_method'] = trial.suggest_categorical('clf__stack_method', [
        'auto', 'predict_proba'  # Most stable options
    ])
    
    return params


def validate_parameter_space():
    """
    Test function to validate the parameter space works.
    """
    import optuna
    
    # Create a test trial
    study = optuna.create_study()
    trial = study.ask()
    
    try:
        params = fixed_comprehensive_l2_space(trial)
        print("✅ Parameter space is valid!")
        print(f"Sample parameters generated: {len(params)} total")
        
        # Check for required RF parameters
        required_rf_params = [
            'clf__estimators__rf__bootstrap',
            'clf__estimators__rf__max_samples',
            'clf__estimators__rf__oob_score'
        ]
        
        for param in required_rf_params:
            if param in params:
                print(f"✅ {param}: {params[param]}")
            else:
                print(f"❌ Missing: {param}")
                
        return True
        
    except Exception as e:
        print(f"❌ Parameter space invalid: {e}")
        return False

# Usage:
"""
# Test the fixed version:
validate_parameter_space()

# Use in your experiment:
experiment = Experiment(
    name="Fixed Comprehensive L2 Regularization",
    description="Corrected parameter space without conflicts",
    pipeline_factory=pipeline_factory,
    param_space=fixed_comprehensive_l2_space
)
"""

'\n# Test the fixed version:\nvalidate_parameter_space()\n\n# Use in your experiment:\nexperiment = Experiment(\n    name="Fixed Comprehensive L2 Regularization",\n    description="Corrected parameter space without conflicts",\n    pipeline_factory=pipeline_factory,\n    param_space=fixed_comprehensive_l2_space\n)\n'

In [25]:
def pipeline_factory(params):
    """
    Properly handle nested parameters for ensemble pipeline
    """
    
    # ========== EXTRACT PARAMETERS FOR EACH COMPONENT ==========
    
    # TF-IDF parameters
    tfidf_params = {}
    for key, value in params.items():
        if key.startswith('tfidf__'):
            param_name = key.replace('tfidf__', '')
            tfidf_params[param_name] = value
    
    # Feature selector parameters
    selector_params = {}
    for key, value in params.items():
        if key.startswith('selector__'):
            param_name = key.replace('selector__', '')
            selector_params[param_name] = value
    
    # LogisticRegression parameters
    lr_params = {'random_state': SEED}
    for key, value in params.items():
        if key.startswith('clf__estimators__lr__'):
            param_name = key.replace('clf__estimators__lr__', '')
            lr_params[param_name] = value
    
    # RandomForest parameters
    rf_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__estimators__rf__'):
            param_name = key.replace('clf__estimators__rf__', '')
            rf_params[param_name] = value
    
    # NaiveBayes parameters
    nb_params = {}
    for key, value in params.items():
        if key.startswith('clf__estimators__nb__'):
            param_name = key.replace('clf__estimators__nb__', '')
            nb_params[param_name] = value
    
    # EBM parameters
    ebm_params = {'random_state': SEED, 'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__final_estimator__'):
            param_name = key.replace('clf__final_estimator__', '')
            ebm_params[param_name] = value
    
    # Stacking parameters
    stacking_params = {'n_jobs': 1}
    for key, value in params.items():
        if key.startswith('clf__') and '__' not in key.replace('clf__', ''):
            param_name = key.replace('clf__', '')
            stacking_params[param_name] = value
    
    # ========== CREATE COMPONENTS WITH PARAMETERS ==========
    
    # Create TF-IDF vectorizer
    tfidf = TfidfVectorizer(**tfidf_params)
    
    # Create feature selector
    if not selector_params:
        selector_params['k'] = 100  # Default value
    selector = SelectKBest(chi2, **selector_params)
    
    # Create base estimators
    try:
        clf_lr = LogisticRegression(**lr_params)
    except ValueError as e:
        print(f"LogReg parameter error: {e}")
        # Fallback to safe parameters
        clf_lr = LogisticRegression(random_state=SEED, C=1.0, penalty='l2', solver='lbfgs')
    
    clf_rf = RandomForestClassifier(**rf_params)
    clf_nb = MultinomialNB(**nb_params)
    
    # Create EBM meta-learner
    try:
        ebm = ExplainableBoostingClassifier(**ebm_params)
    except Exception as e:
        print(f"EBM parameter error: {e}")
        # Fallback to safe parameters
        ebm = ExplainableBoostingClassifier(random_state=SEED, n_jobs=1)
    
    # Create stacking classifier
    stacking_clf = StackingClassifier(
        estimators=[
            ('lr', clf_lr),
            ('nb', clf_nb),
            ('rf', clf_rf)
        ],
        final_estimator=ebm,
        passthrough=True,
        **stacking_params
    )
    
    return Pipeline([
        ('features', FeatureUnion([
            # TF-IDF features
            ('tfidf_features', Pipeline([
                ("join", FunctionTransformer(combine_text, validate=False)),
                ('tfidf', tfidf),
                ('selector', selector)
            ])),
            
            # Cosine similarity feature with scaling
            ('cosine_sim', Pipeline([
                ('extract', FunctionTransformer(compute_cosine_similarity, validate=False))
            ]))
        ])),
        ('clf', stacking_clf)
    ])

optuna_kwargs = {
    "n_trials": 80,        # Increase from 5 to 20 for better optimization
    "cv_folds": 15,         # Reduce from 10 to 5 for faster training
    "scoring": "accuracy",
    "random_state": SEED,
    "optimise":True
}

experiment = Experiment(
    name=f"Comprehensive L2 Regularization optimization EBM chi2 stack",
    description=f"Wider L2 focused hyperparameter tuning",
    pipeline_factory=pipeline_factory,
    param_space=fixed_comprehensive_l2_space
)

manager.run_experiment(experiment, splits=splits, **optuna_kwargs)


=== Running Experiment: Comprehensive L2 Regularization optimization EBM chi2 stack ===


Hyperparameter Optimization (Custom Val Split):   0%|                                         | 0/80 [00:00<?, ?trial/s][I 2025-07-16 09:00:06,398] A new study created in memory with name: no-name-ccfc2615-c5a3-496e-af9e-0f0b80edba8a
Hyperparameter Optimization (Custom Val Split):   1%| | 1/80 [01:18<1:43:58, 78.97s/trial, Train: 0.7648 | Val: 0.7027 |[I 2025-07-16 09:01:25,374] Trial 0 finished with value: 0.7026881720430107 and parameters: {'tfidf__max_features': 7500, 'tfidf__ngram_range': (1, 3), 'tfidf__min_df': 2, 'tfidf__max_df': 0.7, 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True, 'tfidf__stop_words': None, 'tfidf__norm': 'l1', 'tfidf__binary': True, 'selector__k': 200, 'clf__estimators__lr__solver': 'newton-cg', 'clf__estimators__lr__C': 0.02500304524716945, 'clf__estimators__lr__max_iter': 1000, 'clf__estimators__lr__class_weight': 'balanced', 'clf__estimators__lr__tol': 0.00039979222371292714, 'clf__estimators__rf__n_estimators': 200, 'clf__estimators__rf__max_depth': 


🎯 Optimization completed using Custom Val Split!
   Best score: 0.7419
   Total trials: 80
🔧 Training final model with best parameters...
✅ Training complete!
📊 Logging optimization summary...
✅ Optimization summary logged!

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6844

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.6843
   Micro F1:     0.6844
   Weighted F1:  0.6843

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.6846  R: 0.6844
   Micro    - P: 0.6844  R: 0.6844
   Weighted - P: 0.6846  R: 0.6844

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6783     0.7013     0.6896        857
   Not Fit              0.6908     0.6674     0.6789        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6846     0.6844     0.6843       1714
   weighted avg 

<utils.ExperimentManger.Experiment at 0x7f9c2fdf18b0>