# Table of Contents
1. [Imports and definitions](#imports-and-definitions)
2. [Load data](#load-data)
3. [Prepare data](#prepare-data)
4. [Define objectives](#define-objectives)
5. [Start tuning](#start-tuning)
6. [Conclusion](#conclusion)

# Imports and definitions

In [1]:
from pathlib import Path
import time
import pickle

import polars as pl
import numpy as np

import numpy as np
import xgboost as xgb
from sklearn.metrics import f1_score

from sklearn.model_selection import StratifiedKFold, train_test_split

from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.ensemble import StackingClassifier

import optuna

_ = pl.Config.set_tbl_cols(None)
_ = pl.Config.set_fmt_str_lengths(500)
_ = pl.Config.set_fmt_float("full")

In [2]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='sklearn')
warnings.filterwarnings('ignore', module='lightgbm')

In [None]:
base_dir = Path('/workspaces/data-scientist-at-magenta')
code_dir = base_dir / 'notebooks'
data_dir = code_dir / "data"
features_dir = data_dir / 'features'
train_dir = data_dir / 'train'
db_dir = 'sqlite:///data/models/{}.db'
artifacts_dir = data_dir / "models/artifacts"


In [4]:
# Helper function to load model artifacts
def load_model_artifact(path):
    with open(path, "rb") as f:
        return pickle.load(f)

# Load data

In [11]:
%%time

train = pl.read_parquet(train_dir / 'data-meta-v0-50.parquet')

CPU times: user 5.71 ms, sys: 15.8 ms, total: 21.5 ms
Wall time: 37.2 ms


# Prepare data

In [12]:
X = train.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
y = train.select('has_done_upselling')

In [13]:
X_meta, X_final_meta, y_meta, y_final_meta = train_test_split(
    X, y, test_size=0.5, random_state=42,
)

In [14]:
train_meta = pl.concat([X_meta, y_meta], how='horizontal')

In [15]:
data_data_b1 = train_meta.filter(pl.col("available_gb") < 25).drop('available_gb')
data_data_b2 = train_meta.filter(pl.col("available_gb") < 25).drop('available_gb')

X_train_data_b1 = data_data_b1.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
X_train_data_b2 = data_data_b2.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))

y_train_data_b1 = data_data_b1.select('has_done_upselling')
y_train_data_b2 = data_data_b2.select('has_done_upselling')


In [16]:
data_days_b1 = train_meta.filter(pl.col("contract_lifetime_days") < 1000).drop('contract_lifetime_days')
data_days_b2 = train_meta.filter(pl.col("contract_lifetime_days") >= 1000).drop('contract_lifetime_days')

X_train_days_b1 = data_days_b1.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
X_train_days_b2 = data_days_b2.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))

y_train_days_b1 = data_days_b1.select('has_done_upselling')
y_train_days_b2 = data_days_b2.select('has_done_upselling')


In [17]:
data_age_b1 = train_meta.filter(pl.col("age") < 55).drop('age')
data_age_b2 = train_meta.filter(pl.col("age") >= 55).drop('age')

X_train_age_b1 = data_age_b1.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
X_train_age_b2 = data_age_b2.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))

y_train_age_b1 = data_age_b1.select('has_done_upselling')
y_train_age_b2 = data_age_b2.select('has_done_upselling')


In [18]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Define objectives

In [27]:
def stacking_objective(trial, X, y, skf, base_models):
    # Define Ridge parameters to tune
    ridge_params = {
        'alpha': trial.suggest_float('alpha', 1e-4, 100.0, log=True),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr']),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced'])
    }
    
    # Create stacking classifier with Ridge meta-learner
    stacking_clf = StackingClassifier(
        estimators=base_models,  
        final_estimator=RidgeClassifier(**ridge_params),
        cv='prefit',  # No need for the base models to be trained
        stack_method='predict_proba'
    )
    
    cv_scores = []
    optimal_thresholds = []
    
    # Use the same cross-validation approach as base models
    for train_idx, valid_idx in skf.split(X, y):
        # Split data
        train_x, valid_x = X[train_idx], X[valid_idx] 
        train_y, valid_y = y[train_idx], y[valid_idx]
        
        # Train stacking classifier
        stacking_clf.fit(train_x.to_numpy(), train_y.to_numpy().ravel())
        
        # Get probabilities
        preds = stacking_clf.predict(valid_x.to_numpy())
        
        # Find optimal threshold using your existing function
        f1 = f1_score(valid_y.to_numpy().ravel(), preds)
        
        cv_scores.append(f1)
    
    return np.mean(cv_scores)

# Start tuning

In [None]:
n_trials = 50

## Age

In [19]:
# Prepare base models for B1 and B2 separately (age)
base_models_age_b1 = []
base_models_age_b2 = []

# XGBoost B1 (age)
xgb_clf_age_b1 = load_model_artifact(artifacts_dir / "pre_xgb_age_b1.pkl")
# base_models_age_b1.append(('xgb_age_b1', xgb_clf_age_b1))

# XGBoost B2 (age)
xgb_clf_age_b2 = load_model_artifact(artifacts_dir / "pre_xgb_age_b2.pkl")
base_models_age_b2.append(('xgb_age_b2', xgb_clf_age_b2))

# Random Forest B1 (age)
rf_clf_age_b1 = load_model_artifact(artifacts_dir / "pre_rf_age_b1.pkl")
base_models_age_b1.append(('rf_age_b1', rf_clf_age_b1))

# Random Forest B2 (age)
rf_clf_age_b2 = load_model_artifact(artifacts_dir / "pre_rf_age_b2.pkl")
base_models_age_b2.append(('rf_age_b2', rf_clf_age_b2))

# LightGBM B1 (age)
lgb_clf_age_b1 = load_model_artifact(artifacts_dir / "pre_lgb_age_b1.pkl")
# base_models_age_b1.append(('lgb_age_b1', lgb_clf_age_b1))

# LightGBM B2 (age)
lgb_clf_age_b2 = load_model_artifact(artifacts_dir / "pre_lgb_age_b2.pkl")
# base_models_age_b2.append(('lgb_age_b2', lgb_clf_age_b2))

# CatBoost B1 (age)
cat_clf_age_b1 = load_model_artifact(artifacts_dir / "pre_cat_age_b1.pkl")
base_models_age_b1.append(('cat_age_b1', cat_clf_age_b1))

# CatBoost B2 (age)
cat_clf_age_b2 = load_model_artifact(artifacts_dir / "pre_cat_age_b2.pkl")
# base_models_age_b2.append(('cat_age_b2', cat_clf_age_b2))

# HistGradientBoosting B1 (age)
histgb_clf_age_b1 = load_model_artifact(artifacts_dir / "pre_histgb_age_b1.pkl")
base_models_age_b1.append(('histgb_age_b1', histgb_clf_age_b1))

# HistGradientBoosting B2 (age)
histgb_clf_age_b2 = load_model_artifact(artifacts_dir / "pre_histgb_age_b2.pkl")
base_models_age_b2.append(('histgb_age_b2', histgb_clf_age_b2))


In [65]:
age_b1_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b1_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
age_b1_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_age_b1, y_train_age_b1, skf, base_models_age_b1), n_trials=n_trials)
print(f"\nBest Stacking score: {age_b1_meta_study.best_value}")
print(f"Best Stacking params: {age_b1_meta_study.best_params}")

[I 2025-07-02 09:45:01,087] Using an existing study with name 'meta_ridge_age_b1_optimization' instead of creating a new one.
[I 2025-07-02 09:45:01,840] Trial 202 finished with value: 0.150461467774079 and parameters: {'alpha': 0.019272157857400753, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:45:02,505] Trial 203 finished with value: 0.15009002305991098 and parameters: {'alpha': 0.053158517709567, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:45:03,133] Trial 204 finished with value: 0.1503646022305179 and parameters: {'alpha': 0.03129721662046393, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:45:03,861] Trial 205 finished with value: 0.15039755157033988 and parameters: {'alpha': 0.024086769276383816, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.


Best Stacking score: 0.17275778398665537
Best Stacking params: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}


In [58]:
age_b2_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b2_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
# model: Ridge meta-stacking on age_b2, using X_train_age_b2/y_train_age_b2 and base_models_age_b2
age_b2_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_age_b2, y_train_age_b2, skf, base_models_age_b2), n_trials=n_trials)
print(f"\nBest Stacking score: {age_b2_meta_study.best_value}")
print(f"Best Stacking params: {age_b2_meta_study.best_params}")

[I 2025-07-02 09:39:25,400] Using an existing study with name 'meta_ridge_age_b2_optimization' instead of creating a new one.
[I 2025-07-02 09:39:27,881] Trial 154 finished with value: 0.1338473184009092 and parameters: {'alpha': 6.494589439966312, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:39:30,362] Trial 155 finished with value: 0.1338473184009092 and parameters: {'alpha': 5.072191838784726, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:39:32,912] Trial 156 finished with value: 0.0 and parameters: {'alpha': 11.4459957573685, 'solver': 'lsqr', 'class_weight': None}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:39:35,444] Trial 157 finished with value: 0.1338473184009092 and parameters: {'alpha': 7.398333619302283, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02


Best Stacking score: 0.16895526150137577
Best Stacking params: {'alpha': 9.092803857312077, 'solver': 'lsqr', 'class_weight': 'balanced'}


---

## Contract

In [20]:
# Prepare base models for B1 and B2 separately (days)
base_models_days_b1 = []
base_models_days_b2 = []

# XGBoost B1 (days)
xgb_clf_days_b1 = load_model_artifact(artifacts_dir / "pre_xgb_days_b1.pkl")
# base_models_days_b1.append(('xgb_days_b1', xgb_clf_days_b1))

# XGBoost B2 (days)
xgb_clf_days_b2 = load_model_artifact(artifacts_dir / "pre_xgb_days_b2.pkl")
# base_models_days_b2.append(('xgb_days_b2', xgb_clf_days_b2))

# Random Forest B1 (days)
rf_clf_days_b1 = load_model_artifact(artifacts_dir / "pre_rf_days_b1.pkl")
base_models_days_b1.append(('rf_days_b1', rf_clf_days_b1))

# Random Forest B2 (days)
rf_clf_days_b2 = load_model_artifact(artifacts_dir / "pre_rf_days_b2.pkl")
base_models_days_b2.append(('rf_days_b2', rf_clf_days_b2))

# LightGBM B1 (days)
lgb_clf_days_b1 = load_model_artifact(artifacts_dir / "pre_lgb_days_b1.pkl")
base_models_days_b1.append(('lgb_days_b1', lgb_clf_days_b1))

# LightGBM B2 (days)
lgb_clf_days_b2 = load_model_artifact(artifacts_dir / "pre_lgb_days_b2.pkl")
base_models_days_b2.append(('lgb_days_b2', lgb_clf_days_b2))

# CatBoost B1 (days)
cat_clf_days_b1 = load_model_artifact(artifacts_dir / "pre_cat_days_b1.pkl")
base_models_days_b1.append(('cat_days_b1', cat_clf_days_b1))

# CatBoost B2 (days)
cat_clf_days_b2 = load_model_artifact(artifacts_dir / "pre_cat_days_b2.pkl")
base_models_days_b2.append(('cat_days_b2', cat_clf_days_b2))

# HistGradientBoosting B1 (days)
histgb_clf_days_b1 = load_model_artifact(artifacts_dir / "pre_histgb_days_b1.pkl")
# base_models_days_b1.append(('histgb_days_b1', histgb_clf_days_b1))

# HistGradientBoosting B2 (days)
histgb_clf_days_b2 = load_model_artifact(artifacts_dir / "pre_histgb_days_b2.pkl")
# base_models_days_b2.append(('histgb_days_b2', histgb_clf_days_b2))


In [21]:
days_b1_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b1_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
days_b1_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_days_b1, y_train_days_b1, skf, base_models_days_b1), n_trials=n_trials)
print(f"\nBest Stacking score: {days_b1_meta_study.best_value}")
print(f"Best Stacking params: {days_b1_meta_study.best_params}")

[I 2025-07-02 11:31:00,589] Using an existing study with name 'meta_ridge_age_b1_optimization' instead of creating a new one.


NameError: name 'n_trials' is not defined

In [61]:
days_b2_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b2_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
days_b2_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_days_b2, y_train_days_b2, skf, base_models_days_b2), n_trials=n_trials)
print(f"\nBest Stacking score: {days_b2_meta_study.best_value}")
print(f"Best Stacking params: {days_b2_meta_study.best_params}")

[I 2025-07-02 09:41:41,930] Using an existing study with name 'meta_ridge_age_b2_optimization' instead of creating a new one.




[I 2025-07-02 09:41:42,317] Trial 204 finished with value: 0.1638634058805549 and parameters: {'alpha': 8.03723855555765, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:42,702] Trial 205 finished with value: 0.16291864804403522 and parameters: {'alpha': 4.392385748755567, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:43,090] Trial 206 finished with value: 0.16307925790464953 and parameters: {'alpha': 10.977862442101552, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:43,473] Trial 207 finished with value: 0.16348759760105405 and parameters: {'alpha': 6.049954999569, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:43,873] Trial 208 finished with value: 0.16476123039619406 and parameters: {'alpha': 0.19899427803541372, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:44,268] Trial 209 finished with value: 0.0 and parameters: {'alpha': 17.519132306595623, 'solver': 'lsqr', 'class_weight': None}. Best is trial 152 with value: 0.16895526150137577.












[I 2025-07-02 09:41:44,785] Trial 210 finished with value: 0.16506360405588363 and parameters: {'alpha': 23.21412795553544, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:45,211] Trial 211 finished with value: 0.1622006576908725 and parameters: {'alpha': 10.12828830105241, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:45,592] Trial 212 finished with value: 0.16136919949087036 and parameters: {'alpha': 13.577252050387052, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:45,984] Trial 213 finished with value: 0.1616348596284574 and parameters: {'alpha': 9.417525479489303, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:46,350] Trial 214 finished with value: 0.16378385973818763 and parameters: {'alpha': 7.807941244628349, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:46,717] Trial 215 finished with value: 0.16185104276816173 and parameters: {'alpha': 11.57183625328988, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:47,074] Trial 216 finished with value: 0.1622210815363461 and parameters: {'alpha': 5.732627345663196, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:47,473] Trial 217 finished with value: 0.16310500376719703 and parameters: {'alpha': 3.6040833691850813, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:47,833] Trial 218 finished with value: 0.1626768754941088 and parameters: {'alpha': 15.059424822968344, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:48,193] Trial 219 finished with value: 0.1637938597374932 and parameters: {'alpha': 7.265865221823893, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:48,573] Trial 220 finished with value: 0.1641120237422223 and parameters: {'alpha': 19.515959088326166, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:49,003] Trial 221 finished with value: 0.16199361835339837 and parameters: {'alpha': 10.448957731137092, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:49,387] Trial 222 finished with value: 0.16190381178985258 and parameters: {'alpha': 9.84787088998804, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:49,799] Trial 223 finished with value: 0.16138070239743602 and parameters: {'alpha': 12.84162990262663, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:50,160] Trial 224 finished with value: 0.16214992875569376 and parameters: {'alpha': 4.839736815048022, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:50,514] Trial 225 finished with value: 0.16182530246661572 and parameters: {'alpha': 8.74880911995688, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:50,884] Trial 226 finished with value: 0.1622879116353732 and parameters: {'alpha': 16.379926854893437, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:51,238] Trial 227 finished with value: 0.16340192781912258 and parameters: {'alpha': 6.0996609615629165, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:51,607] Trial 228 finished with value: 0.1616426313311911 and parameters: {'alpha': 11.835469753239911, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:51,995] Trial 229 finished with value: 0.16407437804372632 and parameters: {'alpha': 7.936716333316153, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:52,351] Trial 230 finished with value: 0.1661150204641601 and parameters: {'alpha': 26.324917672265023, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:52,742] Trial 231 finished with value: 0.16767369896409898 and parameters: {'alpha': 33.565394898915976, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:53,100] Trial 232 finished with value: 0.16589609337991154 and parameters: {'alpha': 35.581773097905, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:53,459] Trial 233 finished with value: 0.16650695471784077 and parameters: {'alpha': 30.43375815464205, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:53,817] Trial 234 finished with value: 0.1644414843762229 and parameters: {'alpha': 44.38288074864414, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:54,201] Trial 235 finished with value: 0.16393031726145438 and parameters: {'alpha': 69.02636773703178, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:54,587] Trial 236 finished with value: 0.16598610693217536 and parameters: {'alpha': 29.450515801808084, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:54,946] Trial 237 finished with value: 0.16622724739475353 and parameters: {'alpha': 26.490642609199472, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:55,364] Trial 238 finished with value: 0.16610453980491846 and parameters: {'alpha': 37.2872851525823, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:55,727] Trial 239 finished with value: 0.16517044756698246 and parameters: {'alpha': 23.132358199519775, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:56,135] Trial 240 finished with value: 0.16452679997100836 and parameters: {'alpha': 20.082866168792357, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:56,540] Trial 241 finished with value: 0.1667254490761633 and parameters: {'alpha': 31.16335754951614, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:56,896] Trial 242 finished with value: 0.1667254490761633 and parameters: {'alpha': 31.65105931483511, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:57,253] Trial 243 finished with value: 0.16464120534712318 and parameters: {'alpha': 45.1055779091573, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:57,610] Trial 244 finished with value: 0.1663401757436632 and parameters: {'alpha': 26.557196761523034, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:58,046] Trial 245 finished with value: 0.16337856955057412 and parameters: {'alpha': 58.5075815581327, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:58,417] Trial 246 finished with value: 0.16663021664952202 and parameters: {'alpha': 32.58641580143226, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:58,774] Trial 247 finished with value: 0.16610453980491846 and parameters: {'alpha': 36.25724716061283, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:59,127] Trial 248 finished with value: 0.16247325655811676 and parameters: {'alpha': 50.94944768448995, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:41:59,479] Trial 249 finished with value: 0.0 and parameters: {'alpha': 28.78072431128159, 'solver': 'lsqr', 'class_weight': None}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:41:59,861] Trial 250 finished with value: 0.16502177041397625 and parameters: {'alpha': 37.304884962336445, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:42:00,213] Trial 251 finished with value: 0.16352386438426025 and parameters: {'alpha': 65.42872110929454, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.




[I 2025-07-02 09:42:00,573] Trial 252 finished with value: 0.1649986916907502 and parameters: {'alpha': 22.121468606040253, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.








[I 2025-07-02 09:42:00,994] Trial 253 finished with value: 0.16640282019401337 and parameters: {'alpha': 28.229314902726117, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.



Best Stacking score: 0.16895526150137577
Best Stacking params: {'alpha': 9.092803857312077, 'solver': 'lsqr', 'class_weight': 'balanced'}


---


## Data

In [22]:
# Prepare base models for B1 and B2 separately (data)
base_models_data_b1 = []
base_models_data_b2 = []

# XGBoost B1 (data)
xgb_clf_data_b1 = load_model_artifact(artifacts_dir / "pre_xgb_data_b1.pkl")
base_models_data_b1.append(('xgb_data_b1', xgb_clf_data_b1))

# XGBoost B2 (data)
xgb_clf_data_b2 = load_model_artifact(artifacts_dir / "pre_xgb_data_b2.pkl")
base_models_data_b2.append(('xgb_data_b2', xgb_clf_data_b2))

# Random Forest B1 (data)
rf_clf_data_b1 = load_model_artifact(artifacts_dir / "pre_rf_data_b1.pkl")
# base_models_data_b1.append(('rf_data_b1', rf_clf_data_b1))

# Random Forest B2 (data)
rf_clf_data_b2 = load_model_artifact(artifacts_dir / "pre_rf_data_b2.pkl")
# base_models_data_b2.append(('rf_data_b2', rf_clf_data_b2))

# LightGBM B1 (data)
lgb_clf_data_b1 = load_model_artifact(artifacts_dir / "pre_lgb_data_b1.pkl")
# base_models_data_b1.append(('lgb_data_b1', lgb_clf_data_b1))

# LightGBM B2 (data)
lgb_clf_data_b2 = load_model_artifact(artifacts_dir / "pre_lgb_data_b2.pkl")
# base_models_data_b2.append(('lgb_data_b2', lgb_clf_data_b2))

# CatBoost B1 (data)
cat_clf_data_b1 = load_model_artifact(artifacts_dir / "pre_cat_data_b1.pkl")
base_models_data_b1.append(('cat_data_b1', cat_clf_data_b1))

# CatBoost B2 (data)
cat_clf_data_b2 = load_model_artifact(artifacts_dir / "pre_cat_data_b2.pkl")
base_models_data_b2.append(('cat_data_b2', cat_clf_data_b2))

# HistGradientBoosting B1 (data)
histgb_clf_data_b1 = load_model_artifact(artifacts_dir / "pre_histgb_data_b1.pkl")
# base_models_data_b1.append(('histgb_data_b1', histgb_clf_data_b1))

# HistGradientBoosting B2 (data)
histgb_clf_data_b2 = load_model_artifact(artifacts_dir / "pre_histgb_data_b2.pkl")
# base_models_data_b2.append(('histgb_data_b2', histgb_clf_data_b2))


In [63]:
data_b1_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b1_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
data_b1_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_data_b1, y_train_data_b1, skf, base_models_data_b1), n_trials=n_trials)
print(f"\nBest Stacking score: {data_b1_meta_study.best_value}")
print(f"Best Stacking params: {data_b1_meta_study.best_params}")

[I 2025-07-02 09:42:01,072] Using an existing study with name 'meta_ridge_age_b1_optimization' instead of creating a new one.
[I 2025-07-02 09:42:01,181] Trial 152 finished with value: 0.17275778398665537 and parameters: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:42:01,255] Trial 153 finished with value: 0.17275778398665537 and parameters: {'alpha': 0.023904274181823105, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:42:01,328] Trial 154 finished with value: 0.17275778398665537 and parameters: {'alpha': 0.0325062168170535, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.17275778398665537.
[I 2025-07-02 09:42:01,464] Trial 155 finished with value: 0.17275778398665537 and parameters: {'alpha': 0.02682816786018081, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 152 with value:


Best Stacking score: 0.17275778398665537
Best Stacking params: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}


In [64]:
data_b2_meta_study = optuna.create_study(
    study_name="meta_ridge_age_b2_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)
data_b2_meta_study.optimize(lambda trial: stacking_objective(trial, X_train_data_b2, y_train_data_b2, skf, base_models_data_b2), n_trials=n_trials)
print(f"\nBest Stacking score: {data_b2_meta_study.best_value}")
print(f"Best Stacking params: {data_b2_meta_study.best_params}")

[I 2025-07-02 09:42:04,968] Using an existing study with name 'meta_ridge_age_b2_optimization' instead of creating a new one.
[I 2025-07-02 09:42:05,094] Trial 254 finished with value: 0.1652602633821382 and parameters: {'alpha': 35.36148647859042, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:42:05,181] Trial 255 finished with value: 0.16644046874478918 and parameters: {'alpha': 30.339382782760882, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:42:05,269] Trial 256 finished with value: 0.1632369154883585 and parameters: {'alpha': 45.79977555738733, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.16895526150137577.
[I 2025-07-02 09:42:05,359] Trial 257 finished with value: 0.16644046874478918 and parameters: {'alpha': 30.28631049496538, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 152 with value: 0.168955


Best Stacking score: 0.16906387212859242
Best Stacking params: {'alpha': 25.70489492011668, 'solver': 'auto', 'class_weight': 'balanced'}


---

## Final decision

In [23]:
def create_bins_split_features(X, y, split_configs, base_models, seed=42):
    """
    Create bins split features based on different column thresholds.
    For each row in X, get predictions from all split models and combine them.
    
    Args:
        X (pl.DataFrame): Training features
        y (pl.DataFrame): Training labels
        split_configs (list): List of dicts with keys: 'name', 'column', 'threshold'
        base_models (dict): Dict mapping split names to tuples of (model_b1, model_b2)
        seed (int): Random seed for shuffling
    
    Returns:
        pl.DataFrame: DataFrame with predictions from each split as columns
    """
    predictions_dict = {}
    
    for config in split_configs:
        name = config['name']
        column = config['column']
        threshold = config['threshold']
        
        # Get the models for this split
        model_b1, model_b2 = base_models[name]
        
        # Initialize predictions array for this split
        predictions = np.zeros(X.height)
        
        # For each row, determine which model to use based on the threshold
        column_values = X.select(column).to_numpy().ravel()
        b1_mask = column_values < threshold
        b2_mask = ~b1_mask
        
        # Prepare features without the split column for prediction
        X_for_prediction = X.drop(column).with_row_index("idx")
        
        # Process b1 split (rows where column < threshold)
        if np.any(b1_mask):
            b1_indices = np.where(b1_mask)[0]
            b1_X = X_for_prediction.filter(pl.col("idx").is_in(b1_indices)).drop("idx")
            b1_predictions = model_b1.predict(b1_X.to_numpy())
            predictions[b1_mask] = b1_predictions
        
        # Process b2 split (rows where column >= threshold)
        if np.any(b2_mask):
            b2_indices = np.where(b2_mask)[0]
            b2_X = X_for_prediction.filter(pl.col("idx").is_in(b2_indices)).drop("idx")
            b2_predictions = model_b2.predict(b2_X.to_numpy())
            predictions[b2_mask] = b2_predictions
        
        predictions_dict[name] = predictions
    
    # Create final DataFrame with all predictions as columns plus labels
    predictions_dict['label'] = y.to_numpy().ravel()
    final_df = pl.DataFrame(predictions_dict)
    
    # Shuffle the results
    final_df = final_df.sample(fraction=1.0, with_replacement=False, seed=seed)
    
    return final_df

In [34]:
def final_meta_objective(trial, X, y, skf,
                         split_configs, stacking_models):
    """
    Objective function for optimizing the final meta-learner that takes decisions
    based on predictions from all stacking models (age, days, data splits)
    Args:
        trial: Optuna trial object
        X: Original features including 'age' column
        y: Target variable
        skf: StratifiedKFold object
        stacking_age_b1: StackingClassifier for age < 55
        stacking_age_b2: StackingClassifier for age >= 55
        stacking_days_b1: StackingClassifier for contract_lifetime_days < 1000
        stacking_days_b2: StackingClassifier for contract_lifetime_days >= 1000
        stacking_data_b1: StackingClassifier for available_gb < 25
        stacking_data_b2: StackingClassifier for available_gb >= 25
    """

    ridge_params = {
        'alpha': trial.suggest_float('alpha', 1e-4, 100.0, log=True),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr']),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced'])
    }
    final_meta_clf = RidgeClassifier(**ridge_params)

    cv_scores = []

    for train_idx, valid_idx in skf.split(X, y):
        train_x, valid_x = X[train_idx], X[valid_idx]
        train_y, valid_y = y[train_idx], y[valid_idx]


        meta__train_X_y = create_bins_split_features(train_x, train_y, split_configs, stacking_models)
        final_meta_clf.fit(meta__train_X_y.select(list(stacking_models.keys())).to_numpy(), meta__train_X_y.select('label').to_numpy().ravel())

        meta__val_X_y = create_bins_split_features(valid_x, valid_y, split_configs, stacking_models)
        preds = final_meta_clf.predict(meta__val_X_y.select(list(stacking_models.keys())).to_numpy())
        f1 = f1_score(meta__val_X_y.select('label').to_numpy().ravel(), preds)
        cv_scores.append(f1)

    return np.mean(cv_scores)


In [25]:
# Helper to extract best params and create RidgeClassifier
def ridge_from_study_path(study_path, study_name):
    study = optuna.load_study(study_name=study_name, storage=study_path)
    params = study.best_params.copy()
    valid_keys = {'alpha', 'solver', 'class_weight'}
    params = {k: v for k, v in params.items() if k in valid_keys}
    return RidgeClassifier(**params)

# Age B1 stacking classifier
stacking_age_b1 = StackingClassifier(
    estimators=base_models_age_b1,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b1_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

# Age B2 stacking classifier
stacking_age_b2 = StackingClassifier(
    estimators=base_models_age_b2,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b2_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

# Days B1 stacking classifier
stacking_days_b1 = StackingClassifier(
    estimators=base_models_days_b1,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b1_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

# Days B2 stacking classifier
stacking_days_b2 = StackingClassifier(
    estimators=base_models_days_b2,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b2_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

# Data B1 stacking classifier
stacking_data_b1 = StackingClassifier(
    estimators=base_models_data_b1,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b1_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

# Data B2 stacking classifier
stacking_data_b2 = StackingClassifier(
    estimators=base_models_data_b2,
    final_estimator=ridge_from_study_path(db_dir.format('meta_learners_study'), "meta_ridge_age_b2_optimization"),
    cv='prefit',
    stack_method='predict_proba'
)

In [26]:
# Fit stacking models for each split
stacking_age_b1.fit(X_train_age_b1.to_numpy(), y_train_age_b1.to_numpy().ravel())
stacking_age_b2.fit(X_train_age_b2.to_numpy(), y_train_age_b2.to_numpy().ravel())

stacking_days_b1.fit(X_train_days_b1.to_numpy(), y_train_days_b1.to_numpy().ravel())
stacking_days_b2.fit(X_train_days_b2.to_numpy(), y_train_days_b2.to_numpy().ravel())

stacking_data_b1.fit(X_train_data_b1.to_numpy(), y_train_data_b1.to_numpy().ravel())
stacking_data_b2.fit(X_train_data_b2.to_numpy(), y_train_data_b2.to_numpy().ravel())






0,1,2
,estimators,"[('xgb_data_b2', ...), ('cat_data_b2', ...)]"
,final_estimator,RidgeClassifi...ht='balanced')
,cv,'prefit'
,stack_method,'predict_proba'
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,'gbtree'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6171764116736405
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,alpha,25.70489492011668
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,class_weight,'balanced'
,solver,'auto'
,positive,False
,random_state,


In [None]:
split_configs = [
    {'name': 'age', 'column': 'age', 'threshold': 55},
    {'name': 'days', 'column': 'contract_lifetime_days', 'threshold': 1000}, 
    {'name': 'data', 'column': 'available_gb', 'threshold': 25}
]

stacking_models = {
    'age': (stacking_age_b1, stacking_age_b2),
    'days': (stacking_days_b1, stacking_days_b2),
    'data': (stacking_data_b1, stacking_data_b2)
}

final_meta_study = optuna.create_study(
    study_name="meta_ridge_final_decision_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)

final_meta_study.optimize(
    lambda trial: final_meta_objective(
        trial,
        X_final_meta,
        y_final_meta,
        skf,
        split_configs,
        stacking_models
    ),
    n_trials=n_trials
)

print(f"\nBest Final Meta Stacking score: {final_meta_study.best_value}")
print(f"Best Final Meta Stacking params: {final_meta_study.best_params}")

[I 2025-07-02 10:34:29,946] Using an existing study with name 'meta_ridge_final_decision_optimization' instead of creating a new one.








































[I 2025-07-02 10:34:37,732] Trial 5 finished with value: 0.27347061357085434 and parameters: {'alpha': 3.537691648211495, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.












































[I 2025-07-02 10:34:45,667] Trial 6 finished with value: 0.27347061357085434 and parameters: {'alpha': 27.148421446841496, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.












































[I 2025-07-02 10:34:53,294] Trial 7 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.1009713892975554, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.












































[I 2025-07-02 10:35:01,176] Trial 8 finished with value: 0.0 and parameters: {'alpha': 0.0001526947746660907, 'solver': 'cholesky', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.












































[I 2025-07-02 10:35:08,506] Trial 9 finished with value: 0.0 and parameters: {'alpha': 2.3308892205242193, 'solver': 'auto', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:15,762] Trial 10 finished with value: 0.0 and parameters: {'alpha': 0.00014679384031856372, 'solver': 'svd', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:22,941] Trial 11 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.17484795288463767, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:30,165] Trial 12 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.07734383584631209, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:37,366] Trial 13 finished with value: 0.0 and parameters: {'alpha': 28.64481425977168, 'solver': 'lsqr', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:44,550] Trial 14 finished with value: 0.27347061357085434 and parameters: {'alpha': 13.493947013069553, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:51,755] Trial 15 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.004186748806649165, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:35:58,976] Trial 16 finished with value: 0.27347061357085434 and parameters: {'alpha': 2.2704372992397963, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:06,151] Trial 17 finished with value: 0.27347061357085434 and parameters: {'alpha': 87.17203926680678, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:13,369] Trial 18 finished with value: 0.27347061357085434 and parameters: {'alpha': 2.4392338118398116, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:20,545] Trial 19 finished with value: 0.27347061357085434 and parameters: {'alpha': 7.10972441495784, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:27,792] Trial 20 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.4561716225070326, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:35,024] Trial 21 finished with value: 0.27347061357085434 and parameters: {'alpha': 92.65097591778692, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:42,190] Trial 22 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.008341275056367758, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:49,382] Trial 23 finished with value: 0.0 and parameters: {'alpha': 0.4503145848361527, 'solver': 'cholesky', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:36:56,561] Trial 24 finished with value: 0.27347061357085434 and parameters: {'alpha': 16.75739561006734, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:03,921] Trial 25 finished with value: 0.27347061357085434 and parameters: {'alpha': 5.037377473137735, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:11,161] Trial 26 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.014627967349056463, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:18,396] Trial 27 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.04018200600765321, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:25,661] Trial 28 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.6443615121918139, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:32,984] Trial 29 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.0007808328575114804, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:40,089] Trial 30 finished with value: 0.27347061357085434 and parameters: {'alpha': 32.823359718249534, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:47,164] Trial 31 finished with value: 0.0 and parameters: {'alpha': 1.0528307525990215, 'solver': 'auto', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:37:54,278] Trial 32 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.14226652931755507, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:01,474] Trial 33 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.030922853907976073, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:08,629] Trial 34 finished with value: 0.0 and parameters: {'alpha': 0.0007183481294840072, 'solver': 'cholesky', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:15,835] Trial 35 finished with value: 0.27347061357085434 and parameters: {'alpha': 7.304418416737006, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:22,916] Trial 36 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.11283638230269082, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:30,092] Trial 37 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.15110707555456468, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:37,232] Trial 38 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.30762584702823315, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:44,354] Trial 39 finished with value: 0.0 and parameters: {'alpha': 1.3809469227930822, 'solver': 'lsqr', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:51,425] Trial 40 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.0451337644197793, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:38:58,527] Trial 41 finished with value: 0.0 and parameters: {'alpha': 26.99551289104398, 'solver': 'lsqr', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:05,671] Trial 42 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.2038692765785583, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:12,746] Trial 43 finished with value: 0.27347061357085434 and parameters: {'alpha': 3.968362313659672, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:19,851] Trial 44 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.9870576305911468, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:26,901] Trial 45 finished with value: 0.0 and parameters: {'alpha': 50.257555796551976, 'solver': 'cholesky', 'class_weight': None}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:34,030] Trial 46 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.07972437436403142, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:41,146] Trial 47 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.004365073293321084, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:48,250] Trial 48 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.0214303294695619, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:39:55,386] Trial 49 finished with value: 0.27347061357085434 and parameters: {'alpha': 10.320370447905812, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:40:02,499] Trial 50 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.06330521490906732, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:40:09,553] Trial 51 finished with value: 0.27347061357085434 and parameters: {'alpha': 2.8127935590372575, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:40:16,672] Trial 52 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.010535627894074119, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:40:23,700] Trial 53 finished with value: 0.27347061357085434 and parameters: {'alpha': 0.2482433582226722, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.








































[I 2025-07-02 10:40:30,797] Trial 54 finished with value: 0.27347061357085434 and parameters: {'alpha': 1.717763142980173, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 5 with value: 0.27347061357085434.



Best Final Meta Stacking score: 0.27347061357085434
Best Final Meta Stacking params: {'alpha': 3.537691648211495, 'solver': 'svd', 'class_weight': 'balanced'}


In [8]:
import itertools

# List of all stacking classifier variable names
stacking_clf_names = [
    "stacking_age", "stacking_days", 'stacking_data'
]

# Get all possible non-empty combinations
all_combinations = []
for r in range(2, len(stacking_clf_names)):
    combos = list(itertools.combinations(stacking_clf_names, r))
    all_combinations.extend(combos)

# Print all combinations
for combo in all_combinations:
    print(combo)

('stacking_age', 'stacking_days')
('stacking_age', 'stacking_data')
('stacking_days', 'stacking_data')


In [35]:
split_configs = [
    {'name': 'age', 'column': 'age', 'threshold': 55},
    {'name': 'days', 'column': 'contract_lifetime_days', 'threshold': 1000}, 
]

stacking_models = {
    'age': (stacking_age_b1, stacking_age_b2),
    'days': (stacking_days_b1, stacking_days_b2),
}

final_meta_study = optuna.create_study(
    study_name="meta_ridge_final_decision_age_days_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)

final_meta_study.optimize(
    lambda trial: final_meta_objective(
        trial,
        X_final_meta,
        y_final_meta,
        skf,
        split_configs,
        stacking_models
    ),
    n_trials=n_trials
)

print(f"\nBest Final Meta Stacking score: {final_meta_study.best_value}")
print(f"Best Final Meta Stacking params: {final_meta_study.best_params}")

[I 2025-07-02 11:35:57,301] Using an existing study with name 'meta_ridge_final_decision_age_days_optimization' instead of creating a new one.




































[I 2025-07-02 11:36:00,287] Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.1499221717419494, 'solver': 'cholesky', 'class_weight': None}. Best is trial 3 with value: 0.0.








































[I 2025-07-02 11:36:03,382] Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.6580523741302574, 'solver': 'lsqr', 'class_weight': None}. Best is trial 3 with value: 0.0.








































[I 2025-07-02 11:36:06,372] Trial 5 finished with value: 0.0 and parameters: {'alpha': 3.9843990304032606, 'solver': 'cholesky', 'class_weight': None}. Best is trial 3 with value: 0.0.








































[I 2025-07-02 11:36:09,421] Trial 6 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.10053237676225904, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:12,573] Trial 7 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.3962900273741465, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:15,543] Trial 8 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.16654471239464497, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:18,538] Trial 9 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.01695281906275467, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:21,539] Trial 10 finished with value: 0.0 and parameters: {'alpha': 0.9770425833364799, 'solver': 'svd', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:24,511] Trial 11 finished with value: 0.0 and parameters: {'alpha': 0.00015066026711093306, 'solver': 'svd', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:27,605] Trial 12 finished with value: 0.0 and parameters: {'alpha': 0.00020327198846825062, 'solver': 'auto', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:30,643] Trial 13 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.006522398132600275, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:33,756] Trial 14 finished with value: 0.1521012440464255 and parameters: {'alpha': 41.607124013571976, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:36,979] Trial 15 finished with value: 0.1521012440464255 and parameters: {'alpha': 12.107042778239864, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:40,501] Trial 16 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.012513977745512165, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:43,581] Trial 17 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.002056591257745774, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:46,512] Trial 18 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.1800396105351083, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:49,825] Trial 19 finished with value: 0.1521012440464255 and parameters: {'alpha': 84.33126257160521, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:53,192] Trial 20 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.042652736721030385, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:56,436] Trial 21 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.30549316179486263, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:36:59,540] Trial 22 finished with value: 0.1521012440464255 and parameters: {'alpha': 11.005764797507739, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:02,586] Trial 23 finished with value: 0.1521012440464255 and parameters: {'alpha': 4.473717084744079, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:05,445] Trial 24 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.08565726570270178, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:08,432] Trial 25 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.31752608418172346, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:11,917] Trial 26 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.0020531092785213896, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:15,334] Trial 27 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.04827043322562409, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:18,606] Trial 28 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.9855319648029741, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:21,680] Trial 29 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.27386749195008736, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:24,800] Trial 30 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.031979668299671624, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:28,072] Trial 31 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.108978191354414, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:31,187] Trial 32 finished with value: 0.0 and parameters: {'alpha': 0.14951773656243234, 'solver': 'cholesky', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:34,400] Trial 33 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.414481383215613, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:37,610] Trial 34 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.015305619455819274, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:40,713] Trial 35 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.003820714572176653, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:43,926] Trial 36 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.018907756875241537, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:47,221] Trial 37 finished with value: 0.0 and parameters: {'alpha': 0.0006489442109224028, 'solver': 'cholesky', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:50,443] Trial 38 finished with value: 0.0 and parameters: {'alpha': 2.4576607141330387, 'solver': 'lsqr', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:53,712] Trial 39 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.06859084179475271, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:56,713] Trial 40 finished with value: 0.0 and parameters: {'alpha': 0.7665645099898878, 'solver': 'svd', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:37:59,653] Trial 41 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.007772180481641455, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:02,604] Trial 42 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.17512252916483906, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:05,505] Trial 43 finished with value: 0.0 and parameters: {'alpha': 5.974761287537146, 'solver': 'cholesky', 'class_weight': None}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:08,520] Trial 44 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.0073056459761374785, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:11,424] Trial 45 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.023814029151632086, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:14,575] Trial 46 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.0006698914004862081, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:17,571] Trial 47 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.003739679886164509, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:20,543] Trial 48 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.7452135594258933, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:23,635] Trial 49 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.5313285530723697, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:26,751] Trial 50 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.009529551714246838, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:29,780] Trial 51 finished with value: 0.1521012440464255 and parameters: {'alpha': 21.140060788089222, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.








































[I 2025-07-02 11:38:32,638] Trial 52 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.057340249631567676, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.1521012440464255.



Best Final Meta Stacking score: 0.1521012440464255
Best Final Meta Stacking params: {'alpha': 0.10053237676225904, 'solver': 'auto', 'class_weight': 'balanced'}


In [36]:
split_configs = [
    {'name': 'age', 'column': 'age', 'threshold': 55},
    {'name': 'data', 'column': 'available_gb', 'threshold': 25}
]

stacking_models = {
    'age': (stacking_age_b1, stacking_age_b2),
    'data': (stacking_data_b1, stacking_data_b2)
}

final_meta_study = optuna.create_study(
    study_name="meta_ridge_final_decision_age_data_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)

final_meta_study.optimize(
    lambda trial: final_meta_objective(
        trial,
        X_final_meta,
        y_final_meta,
        skf,
        split_configs,
        stacking_models
    ),
    n_trials=n_trials
)

print(f"\nBest Final Meta Stacking score: {final_meta_study.best_value}")
print(f"Best Final Meta Stacking params: {final_meta_study.best_params}")

[I 2025-07-02 11:38:32,770] Using an existing study with name 'meta_ridge_final_decision_age_data_optimization' instead of creating a new one.
[I 2025-07-02 11:38:35,539] Trial 0 finished with value: 0.1394265483656798 and parameters: {'alpha': 0.0005507046751878807, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 0 with value: 0.1394265483656798.
[I 2025-07-02 11:38:38,251] Trial 1 finished with value: 0.1394265483656798 and parameters: {'alpha': 0.001633500867050798, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 0 with value: 0.1394265483656798.
[I 2025-07-02 11:38:41,007] Trial 2 finished with value: 0.1394265483656798 and parameters: {'alpha': 0.0042856173063518235, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 0 with value: 0.1394265483656798.
[I 2025-07-02 11:38:43,788] Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.21112435325756743, 'solver': 'cholesky', 'class_weight': None}. Best is trial 0 with value: 0.13942654836


Best Final Meta Stacking score: 0.1394265483656798
Best Final Meta Stacking params: {'alpha': 0.0005507046751878807, 'solver': 'auto', 'class_weight': 'balanced'}


In [39]:
split_configs = [
    {'name': 'days', 'column': 'contract_lifetime_days', 'threshold': 1000}, 
    {'name': 'data', 'column': 'available_gb', 'threshold': 25}
]

stacking_models = {
    'days': (stacking_days_b1, stacking_days_b2),
    'data': (stacking_data_b1, stacking_data_b2)
}

final_meta_study = optuna.create_study(
    study_name="meta_ridge_final_decision_days_data_optimization",
    direction="maximize",
    storage=db_dir.format('meta_learners_study'),
    load_if_exists=True
)

final_meta_study.optimize(
    lambda trial: final_meta_objective(
        trial,
        X_final_meta,
        y_final_meta,
        skf,
        split_configs,
        stacking_models
    ),
    n_trials=n_trials
)

print(f"\nBest Final Meta Stacking score: {final_meta_study.best_value}")
print(f"Best Final Meta Stacking params: {final_meta_study.best_params}")

[I 2025-07-02 11:57:35,997] Using an existing study with name 'meta_ridge_final_decision_days_data_optimization' instead of creating a new one.




[I 2025-07-02 11:57:36,457] Trial 2 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.04485671909529745, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:37,109] Trial 3 finished with value: 0.0 and parameters: {'alpha': 2.6034773966469453, 'solver': 'lsqr', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:37,770] Trial 4 finished with value: 0.0 and parameters: {'alpha': 8.550095288785252, 'solver': 'cholesky', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:38,349] Trial 5 finished with value: 0.0 and parameters: {'alpha': 0.0002772660260365341, 'solver': 'auto', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:38,923] Trial 6 finished with value: 0.0 and parameters: {'alpha': 0.01688835977600656, 'solver': 'svd', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:39,473] Trial 7 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.0009906785503064782, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:39,992] Trial 8 finished with value: 0.0 and parameters: {'alpha': 0.0006518243533949861, 'solver': 'cholesky', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:40,618] Trial 9 finished with value: 0.0 and parameters: {'alpha': 0.0012579267429528615, 'solver': 'auto', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:41,163] Trial 10 finished with value: 0.0 and parameters: {'alpha': 13.880829025789662, 'solver': 'cholesky', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:41,804] Trial 11 finished with value: 0.0 and parameters: {'alpha': 0.004095369808166635, 'solver': 'lsqr', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:42,390] Trial 12 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.3107856701070394, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:42,914] Trial 13 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.037677334940461604, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:43,507] Trial 14 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.37293378839137353, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:44,161] Trial 15 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.005770402318402244, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:44,716] Trial 16 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.00014609257835010103, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:45,346] Trial 17 finished with value: 0.1521012440464255 and parameters: {'alpha': 70.58505183405182, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:45,980] Trial 18 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.11716058118943126, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.
















[I 2025-07-02 11:57:46,678] Trial 19 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.0026314799349097667, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:47,156] Trial 20 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.04375562511331984, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:47,711] Trial 21 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.1632988421501689, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:48,250] Trial 22 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.015657825775723777, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:48,814] Trial 23 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.23296371475997815, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:49,495] Trial 24 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.3314566518627719, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:50,026] Trial 25 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.7091455041163166, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:50,773] Trial 26 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.09716406657441785, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:51,413] Trial 27 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.011511383157384635, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:52,122] Trial 28 finished with value: 0.1521012440464255 and parameters: {'alpha': 2.996542539268382, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:52,791] Trial 29 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.00042758044397364213, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:53,376] Trial 30 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.001325146783322329, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:54,029] Trial 31 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.00010026068990855262, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:54,527] Trial 32 finished with value: 0.1521012440464255 and parameters: {'alpha': 3.2019694365299043, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:55,150] Trial 33 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.04866587446180634, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:55,767] Trial 34 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.04686930069079662, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:56,371] Trial 35 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.1322727748922322, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:57,032] Trial 36 finished with value: 0.1521012440464255 and parameters: {'alpha': 1.155598505059076, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:57,666] Trial 37 finished with value: 0.0 and parameters: {'alpha': 0.009261781631822615, 'solver': 'svd', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:58,210] Trial 38 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.03351602675814857, 'solver': 'auto', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:58,798] Trial 39 finished with value: 0.0 and parameters: {'alpha': 8.164092146622265, 'solver': 'cholesky', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:57:59,307] Trial 40 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.001382017560485869, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:57:59,855] Trial 41 finished with value: 0.0 and parameters: {'alpha': 0.021253022662865075, 'solver': 'cholesky', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:58:00,358] Trial 42 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.003066508651589148, 'solver': 'svd', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:00,894] Trial 43 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.37795406875816634, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:01,359] Trial 44 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.1702495906588852, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:58:01,882] Trial 45 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.07935432519313526, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:02,342] Trial 46 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.47869770904258485, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:02,886] Trial 47 finished with value: 0.0 and parameters: {'alpha': 1.2168230481778841, 'solver': 'auto', 'class_weight': None}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:58:03,429] Trial 48 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.007114377867073566, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:03,970] Trial 49 finished with value: 0.1521012440464255 and parameters: {'alpha': 5.720403164263935, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.












[I 2025-07-02 11:58:04,586] Trial 50 finished with value: 0.1521012440464255 and parameters: {'alpha': 28.649107407692668, 'solver': 'lsqr', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.








[I 2025-07-02 11:58:05,114] Trial 51 finished with value: 0.1521012440464255 and parameters: {'alpha': 0.028490694393629, 'solver': 'cholesky', 'class_weight': 'balanced'}. Best is trial 2 with value: 0.1521012440464255.



Best Final Meta Stacking score: 0.1521012440464255
Best Final Meta Stacking params: {'alpha': 0.04485671909529745, 'solver': 'lsqr', 'class_weight': 'balanced'}


In [41]:
def print_study_results(name, study):
    print(f"{name}:")
    print(f"  Best score: {study.best_value}")
    print(f"  Best params: {study.best_params}\n")

storage = db_dir.format('meta_learners_study')

age_b1_meta_study = optuna.load_study(study_name="meta_ridge_age_b1_optimization", storage=storage)
age_b2_meta_study = optuna.load_study(study_name="meta_ridge_age_b2_optimization", storage=storage)
days_b1_meta_study = optuna.load_study(study_name="meta_ridge_age_b1_optimization", storage=storage)
days_b2_meta_study = optuna.load_study(study_name="meta_ridge_age_b2_optimization", storage=storage)
data_b1_meta_study = optuna.load_study(study_name="meta_ridge_age_b1_optimization", storage=storage)
data_b2_meta_study = optuna.load_study(study_name="meta_ridge_age_b2_optimization", storage=storage)
final_meta_study = optuna.load_study(study_name="meta_ridge_final_decision_optimization", storage=storage)
final_meta_age_days_study = optuna.load_study(study_name="meta_ridge_final_decision_age_days_optimization", storage=storage)
final_meta_age_data_study = optuna.load_study(study_name="meta_ridge_final_decision_age_data_optimization", storage=storage)
final_meta_days_data_study = optuna.load_study(study_name="meta_ridge_final_decision_days_data_optimization", storage=storage)


print_study_results("Age B1 Meta Study", age_b1_meta_study)
print_study_results("Age B2 Meta Study", age_b2_meta_study)
print_study_results("Days B1 Meta Study", days_b1_meta_study)
print_study_results("Days B2 Meta Study", days_b2_meta_study)
print_study_results("Data B1 Meta Study", data_b1_meta_study)
print_study_results("Data B2 Meta Study", data_b2_meta_study)
print_study_results("Final Meta Study", final_meta_study)
print_study_results("Final Meta Age-Days Study", final_meta_age_days_study)
print_study_results("Final Meta Age-DataStudy", final_meta_age_data_study)
print_study_results("Final Meta Days-Data Study", final_meta_days_data_study)


Age B1 Meta Study:
  Best score: 0.17275778398665537
  Best params: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}

Age B2 Meta Study:
  Best score: 0.16906387212859242
  Best params: {'alpha': 25.70489492011668, 'solver': 'auto', 'class_weight': 'balanced'}

Days B1 Meta Study:
  Best score: 0.17275778398665537
  Best params: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}

Days B2 Meta Study:
  Best score: 0.16906387212859242
  Best params: {'alpha': 25.70489492011668, 'solver': 'auto', 'class_weight': 'balanced'}

Data B1 Meta Study:
  Best score: 0.17275778398665537
  Best params: {'alpha': 0.02944866210721213, 'solver': 'auto', 'class_weight': 'balanced'}

Data B2 Meta Study:
  Best score: 0.16906387212859242
  Best params: {'alpha': 25.70489492011668, 'solver': 'auto', 'class_weight': 'balanced'}

Final Meta Study:
  Best score: 0.27347061357085434
  Best params: {'alpha': 3.537691648211495, 'solver': 'svd', 'class_weight