# Imports and definitions

In [1]:
from pathlib import Path
import time

import polars as pl
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_recall_curve, f1_score

from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

import optuna

_ = pl.Config.set_tbl_cols(None)
_ = pl.Config.set_fmt_str_lengths(500)
_ = pl.Config.set_fmt_float("full")

In [2]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='sklearn')

In [3]:
base_dir = Path('/Users/danlab/code/magenta-task/')
code_dir = base_dir / 'notebooks'
data_dir = code_dir / "data"
features_dir = data_dir / 'features'
train_dir = data_dir / 'train'
db_dir = 'sqlite:///data/models/{}.db'

# Load data

In [4]:
%%time

train = pl.read_parquet(train_dir / 'data-v1-80.parquet')

CPU times: user 18.2 ms, sys: 8.18 ms, total: 26.4 ms
Wall time: 45.4 ms


# Prepare data

In [5]:
X = train.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
y = train.select('has_done_upselling')


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Define objectives

In [6]:
# Compute the ratio of negative to positive instances in the target
ratio_negative_to_positive = (
    (y['has_done_upselling'] == False).sum() / (y['has_done_upselling'] == True).sum()
)
print("ratio_negative_to_positive:", ratio_negative_to_positive)

ratio_negative_to_positive: 13.186912573151268


In [7]:
def find_optimal_f1(valid_y, preds):
    # Find optimal threshold for F1
    precision, recall, thresholds = precision_recall_curve(valid_y, preds)
    f1_scores_thresh = 2 * (precision * recall) / (precision + recall + 1e-8)
    optimal_idx = np.argmax(f1_scores_thresh)
    optimal_threshold = thresholds[optimal_idx] if optimal_idx < len(thresholds) else 0.5
    
    # Make binary predictions using optimal threshold
    pred_labels = (preds >= optimal_threshold).astype(int)
    return f1_score(valid_y, pred_labels), optimal_threshold

In [8]:
def xgboost_objective(trial, X, y, skf, n_splits=5):
    '''
    XGBoost objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    param = {
        'verbosity': 0,
        'n_jobs': 4,
        'early_stopping_rounds': 16,
        'eval_metric': 'aucpr',
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", 1.0, ratio_negative_to_positive * 1.5, log=True),
        'objective': 'binary:logistic',
        'tree_method': 'hist',
        'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),

        # L2 regularization weight.
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        # L1 regularization weight.
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        'subsample': trial.suggest_float('subsample', 0.2, 1.0),
        # sampling according to each tree.
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
    }

    if param['booster'] in ['gbtree', 'dart']:
        # maximum depth of the tree, signifies complexity of the tree.
        param['max_depth'] = trial.suggest_int('max_depth', 3, 20)
        # minimum child weight, larger the term more conservative the tree.
        param['min_child_weight'] = trial.suggest_int('min_child_weight', 2, 10)
        param['eta'] = trial.suggest_float('eta', 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param['gamma'] = trial.suggest_float('gamma', 1e-8, 1.0, log=True)
        param['grow_policy'] = trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])

    if param['booster'] == 'dart':
        param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
        param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)
    
    start_time = time.time()
    
    # Return mean F1 score across all folds
    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()

        # Create DMatrix objects
        dtrain = xgb.DMatrix(train_x, label=train_y)
        dvalid = xgb.DMatrix(valid_x, label=valid_y)
        
        # Train model
        bst = xgb.train(param, dtrain)
        
        # Make predictions
        preds = bst.predict(dvalid)
        
        f1, optimal_threshold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(float(optimal_threshold))

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', float(np.mean(optimal_thresholds)))

    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [9]:
def random_forest_objective(trial, X, y, skf, n_splits=5):
    '''
    Random Forest objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []
    
    # Random Forest hyperparameters
    param = {
        'n_jobs': 4,
        'random_state': 42,
        'verbose': 0,
        
        # Core tree parameters
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
        
        # Feature sampling parameters
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
        'max_samples': trial.suggest_float('max_samples', 0.1, 1.0),
        
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 1000),
        
        # Class balancing
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
    }
    
    # Create Random Forest classifier
    rf = RandomForestClassifier(**param)
    
    start_time = time.time()
    
    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Train model
        rf.fit(train_x, train_y)
        
        # Make probability predictions
        preds = rf.predict_proba(valid_x)[:, 1]  # Get probability of positive class
        
        optimal_threhsold, f1 = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))

    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [10]:
def histgb_objective(trial, X, y, skf, n_splits=5):
    '''
    HistGradientBoostingClassifier objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    # HistGradientBoosting hyperparameters (simplified)
    param = {
        'random_state': 42,
        'verbose': 0,
        
        # Core boosting parameters
        'max_iter': trial.suggest_int('max_iter', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 10, 100),
        
        # Regularization
        'l2_regularization': trial.suggest_float('l2_regularization', 0.0, 1.0),
        'max_bins': trial.suggest_int('max_bins', 32, 255),
        
        # Early stopping
        'early_stopping': True,
        'n_iter_no_change': 10,
        'validation_fraction': 0.1,
    }
    
    # Create HistGradientBoosting classifier
    hgb = HistGradientBoostingClassifier(**param)
    
    start_time = time.time()
    
    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Train model
        hgb.fit(train_x, train_y)
        
        # Make probability predictions
        preds = hgb.predict_proba(valid_x)[:, 1]  # Get probability of positive class
        
        f1, optimal_threhsold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))
    # Return mean F1 score across all folds
    return np.mean(cv_scores)


In [11]:
def lightgbm_objective(trial, X, y, skf, n_splits=5):
    '''
    LightGBM objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    # LightGBM hyperparameters
    param = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'verbosity': 0,
        'seed': 42,
        'num_threads': 4,
        'deterministic': True,
        
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", ratio_negative_to_positive * 0.7, ratio_negative_to_positive * 1.5, log=True),

    }
    
    start_time = time.time()

    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Create LightGBM datasets
        train_data = lgb.Dataset(train_x, label=train_y)
        valid_data = lgb.Dataset(valid_x, label=valid_y, reference=train_data)
        
        # Train model with early stopping
        model = lgb.train(
            param,
            train_data,
            valid_sets=[valid_data],
            num_boost_round=1000,
            callbacks=[lgb.early_stopping(50, verbose=False), lgb.log_evaluation(0)]
        )
        
        # Make probability predictions
        preds = model.predict(valid_x, num_iteration=model.best_iteration)
        
        f1, optimal_threhsold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))
    
    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [12]:
def catboost_objective(trial, X, y, skf, n_splits=5):
    '''
    CatBoost objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    # CatBoost hyperparameters (simplified)
    param = {
        'random_seed': 42,
        'verbose': False,
        'allow_writing_files': False,
        'thread_count': 4,
        
        # Core boosting parameters
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        
        # Regularization and overfitting control
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 10.0),
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", ratio_negative_to_positive * 0.7, ratio_negative_to_positive * 1.5, log=True),
        
        # Early stopping
        'early_stopping_rounds': 50,
        'eval_metric': 'F1',
    }
    
    # Create CatBoost classifier
    cb = CatBoostClassifier(**param)
    
    start_time = time.time()

    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Train model with validation set for early stopping
        cb.fit(
            train_x, train_y,
            eval_set=(valid_x, valid_y),
            verbose=False
        )
        
        # Make probability predictions
        preds = cb.predict_proba(valid_x)[:, 1]  # Get probability of positive class
    
        f1, optimal_threhsold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))

    # Return mean F1 score across all folds
    return np.mean(cv_scores)

# Start tuning

In [13]:
# Setup
n_trials = 100

In [14]:
# XGBoost study
xgb_study = optuna.create_study(
    study_name="xgboost_optimization_fv1",
    direction="maximize",
    storage=db_dir.format('xgb_study'),
    load_if_exists=True
)
xgb_study.optimize(lambda trial: xgboost_objective(trial, X, y, skf), n_trials=n_trials)

print(f"\nBest XGB score: {xgb_study.best_value}")
print(f"Best XGB params: {xgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in xgb_study.trials]
print(f"\nAverage execution time XGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time XGB: {sum(execution_times):.2f}s")

thresholds = xgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = xgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best XGB trial: {thresholds}")
print(f"Mean threshold for best XGB trial: {mean_threshold}")

[I 2025-07-01 20:55:27,193] A new study created in RDB with name: xgboost_optimization_fv1
[I 2025-07-01 20:55:27,838] Trial 0 finished with value: 0.1666373222870861 and parameters: {'scale_pos_weight': 2.661448373320009, 'booster': 'gbtree', 'n_estimators': 1462, 'learning_rate': 0.06102207892806494, 'lambda': 0.1180662277593398, 'alpha': 0.015992863972579457, 'subsample': 0.955579167819734, 'colsample_bytree': 0.8478919434632999, 'max_depth': 3, 'min_child_weight': 6, 'eta': 9.69134489941139e-08, 'gamma': 0.0015113160005158395, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.1666373222870861.
[I 2025-07-01 20:55:28,241] Trial 1 finished with value: 0.14901654538380354 and parameters: {'scale_pos_weight': 6.190244198412241, 'booster': 'gblinear', 'n_estimators': 654, 'learning_rate': 0.003207335717891315, 'lambda': 0.0048088326314631126, 'alpha': 5.476054818904656e-05, 'subsample': 0.8994506135415727, 'colsample_bytree': 0.24532728552915029}. Best is trial 0 with value: 0.


Best XGB score: 0.17150812251814485
Best XGB params: {'scale_pos_weight': 3.156552998062676, 'booster': 'gbtree', 'n_estimators': 1412, 'learning_rate': 0.24152434307544043, 'lambda': 0.014895280320937251, 'alpha': 2.000237377439607e-06, 'subsample': 0.7177628774177925, 'colsample_bytree': 0.7044237415861634, 'max_depth': 3, 'min_child_weight': 10, 'eta': 3.7248473023286706e-07, 'gamma': 1.7621434513179754e-08, 'grow_policy': 'lossguide'}

Average execution time XGB: 1.27s
Total optimization time XGB: 126.82s

Optimal thresholds (per fold) for best XGB trial: [0.21876439452171326, 0.2374127209186554, 0.22330614924430847, 0.21868203580379486, 0.22966140508651733]
Mean threshold for best XGB trial: 0.22556534111499787


In [15]:
# Random Forest study
rf_study = optuna.create_study(
    study_name="random_forest_optimization_fv1",
    direction="maximize",
    storage=db_dir.format('rf_study'),
    load_if_exists=True
)
rf_study.optimize(lambda trial: random_forest_objective(trial, X, y, skf), n_trials=n_trials * 0.3)  # Requires too much time to train one, reduce the number of trials

print(f"\nBest RF score: {rf_study.best_value}")
print(f"Best RF params: {rf_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in rf_study.trials]
print(f"\nAverage execution time RF: {np.mean(execution_times):.2f}s")
print(f"Total optimization time RF: {sum(execution_times):.2f}s")

thresholds = rf_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = rf_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best RF trial: {thresholds}")
print(f"Mean threshold for best RF trial: {mean_threshold}")

[I 2025-07-01 20:57:40,434] A new study created in RDB with name: random_forest_optimization_fv1
[I 2025-07-01 20:57:51,368] Trial 0 finished with value: 0.5027841752826434 and parameters: {'n_estimators': 402, 'max_depth': 12, 'min_samples_split': 10, 'min_samples_leaf': 4, 'min_weight_fraction_leaf': 0.42093577508368046, 'max_features': 'log2', 'max_samples': 0.3077121453829904, 'max_leaf_nodes': 256, 'class_weight': 'balanced_subsample'}. Best is trial 0 with value: 0.5027841752826434.
[I 2025-07-01 20:58:14,675] Trial 1 finished with value: 0.5052924452655427 and parameters: {'n_estimators': 813, 'max_depth': 12, 'min_samples_split': 16, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.3028976162435604, 'max_features': 'sqrt', 'max_samples': 0.40180568299510694, 'max_leaf_nodes': 746, 'class_weight': 'balanced_subsample'}. Best is trial 1 with value: 0.5052924452655427.
[I 2025-07-01 20:58:37,774] Trial 2 finished with value: 0.5066271543013812 and parameters: {'n_estimators': 


Best RF score: 0.5143657495052357
Best RF params: {'n_estimators': 98, 'max_depth': 20, 'min_samples_split': 12, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.009430845698467916, 'max_features': 'log2', 'max_samples': 0.6574830256736536, 'max_leaf_nodes': 819, 'class_weight': 'balanced'}

Average execution time RF: 10.78s
Total optimization time RF: 323.27s

Optimal thresholds (per fold) for best RF trial: [0.17047602680873003, 0.165264720435428, 0.16635482224868176, 0.1755485893416928, 0.1633493479752917]
Mean threshold for best RF trial: 0.16819870136196485


In [16]:
# HistGradientBoosting study
histgb_study = optuna.create_study(
    study_name="histgb_optimization_fv1",
    direction="maximize",
    storage=db_dir.format('histgb_study'),
    load_if_exists=True
)
histgb_study.optimize(lambda trial: histgb_objective(trial, X, y, skf), n_trials=n_trials)

print(f"\nBest HistGB score: {histgb_study.best_value}")
print(f"Best HistGB params: {histgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in histgb_study.trials]
print(f"\nAverage execution time HistGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time HistGB: {sum(execution_times):.2f}s")

thresholds = histgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = histgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best HistGB trial: {thresholds}")
print(f"Mean threshold for best HistGB trial: {mean_threshold}")

[I 2025-07-01 21:03:05,027] A new study created in RDB with name: histgb_optimization_fv1
[I 2025-07-01 21:04:14,401] Trial 0 finished with value: 0.16756742690842164 and parameters: {'max_iter': 227, 'learning_rate': 0.003304334085682193, 'max_depth': 7, 'min_samples_leaf': 87, 'l2_regularization': 0.2963743944780344, 'max_bins': 99}. Best is trial 0 with value: 0.16756742690842164.
[I 2025-07-01 21:04:58,843] Trial 1 finished with value: 0.16918348590928994 and parameters: {'max_iter': 409, 'learning_rate': 0.016117515496256004, 'max_depth': 8, 'min_samples_leaf': 45, 'l2_regularization': 0.11217454837916574, 'max_bins': 179}. Best is trial 1 with value: 0.16918348590928994.
[I 2025-07-01 21:05:55,199] Trial 2 finished with value: 0.16847501566338863 and parameters: {'max_iter': 290, 'learning_rate': 0.01452718411334945, 'max_depth': 10, 'min_samples_leaf': 17, 'l2_regularization': 0.38358766912715603, 'max_bins': 172}. Best is trial 1 with value: 0.16918348590928994.
[I 2025-07-01 2


Best HistGB score: 0.1715397607549137
Best HistGB params: {'max_iter': 150, 'learning_rate': 0.06484542436441766, 'max_depth': 3, 'min_samples_leaf': 56, 'l2_regularization': 0.0038953893264929637, 'max_bins': 90}

Average execution time HistGB: 15.73s
Total optimization time HistGB: 1573.08s

Optimal thresholds (per fold) for best HistGB trial: [0.08530539683814198, 0.08467583207096754, 0.08173381795057733, 0.08083073102812241, 0.08719290680175294]
Mean threshold for best HistGB trial: 0.08394773693791244


In [17]:
# LightGBM study
lgb_study = optuna.create_study(
    study_name="lightgbm_optimization_fv1",
    direction="maximize",
    storage=db_dir.format('lgb_study'),
    load_if_exists=True
)
lgb_study.optimize(lambda trial: lightgbm_objective(trial, X, y, skf), n_trials=n_trials)

print(f"\nBest LightGBM score: {lgb_study.best_value}")
print(f"Best LightGBM params: {lgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in lgb_study.trials]
print(f"\nAverage execution time LightGBM: {np.mean(execution_times):.2f}s")
print(f"Total optimization time LightGBM: {sum(execution_times):.2f}s")

thresholds = lgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = lgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best LightGBM trial: {thresholds}")
print(f"Mean threshold for best LightGBM trial: {mean_threshold}")

[I 2025-07-01 21:29:22,748] A new study created in RDB with name: lightgbm_optimization_fv1
[I 2025-07-01 21:29:39,848] Trial 0 finished with value: 0.14737102489425538 and parameters: {'num_leaves': 287, 'learning_rate': 0.13924538404227926, 'feature_fraction': 0.8850594189222313, 'bagging_fraction': 0.7449726004395406, 'bagging_freq': 4, 'min_child_samples': 21, 'reg_alpha': 0.07371537117427118, 'reg_lambda': 0.27462700664806905, 'scale_pos_weight': 19.642184753242223}. Best is trial 0 with value: 0.14737102489425538.
[I 2025-07-01 21:29:45,375] Trial 1 finished with value: 0.1570871099124628 and parameters: {'num_leaves': 75, 'learning_rate': 0.06469521698537006, 'feature_fraction': 0.9356742197178661, 'bagging_fraction': 0.7021160000701827, 'bagging_freq': 6, 'min_child_samples': 13, 'reg_alpha': 0.8545793572918428, 'reg_lambda': 0.2893952265270898, 'scale_pos_weight': 17.29015697241227}. Best is trial 1 with value: 0.1570871099124628.
[I 2025-07-01 21:30:20,883] Trial 2 finished w


Best LightGBM score: 0.1646129578369194
Best LightGBM params: {'num_leaves': 59, 'learning_rate': 0.188960385703732, 'feature_fraction': 0.9359698357877874, 'bagging_fraction': 0.968967784090786, 'bagging_freq': 7, 'min_child_samples': 52, 'reg_alpha': 0.21334647679065166, 'reg_lambda': 0.30075632122170404, 'scale_pos_weight': 11.071672940849577}

Average execution time LightGBM: 5.26s
Total optimization time LightGBM: 525.81s

Optimal thresholds (per fold) for best LightGBM trial: [0.2282843138101947, 0.2193748318516635, 0.22474785257866972, 0.2207619751751169, 0.24341003106038991]
Mean threshold for best LightGBM trial: 0.22731580089520692


In [18]:
# CatBoost study
cat_study = optuna.create_study(
    study_name="catboost_optimization_fv1",
    direction="maximize",
    storage=db_dir.format('cat_study'),
    load_if_exists=True
)
cat_study.optimize(lambda trial: catboost_objective(trial, X, y, skf), n_trials=n_trials)

print(f"\nBest CatBoost score: {cat_study.best_value}")
print(f"Best CatBoost params: {cat_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in cat_study.trials]
print(f"\nAverage execution time CatBoost: {np.mean(execution_times):.2f}s")
print(f"Total optimization time CatBoost: {sum(execution_times):.2f}s")

thresholds = cat_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = cat_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best CatBoost trial: {thresholds}")
print(f"Mean threshold for best CatBoost trial: {mean_threshold}")

[I 2025-07-01 21:38:12,675] A new study created in RDB with name: catboost_optimization_fv1
[I 2025-07-01 21:38:14,090] Trial 0 finished with value: 0.15178436405564136 and parameters: {'iterations': 374, 'learning_rate': 0.0373655646566526, 'depth': 4, 'l2_leaf_reg': 9.994713401046367, 'bagging_temperature': 0.8183217230340921, 'random_strength': 7.359524871173154, 'scale_pos_weight': 10.749365501862801}. Best is trial 0 with value: 0.15178436405564136.
[I 2025-07-01 21:38:18,992] Trial 1 finished with value: 0.1564007874110648 and parameters: {'iterations': 586, 'learning_rate': 0.13440909994409322, 'depth': 10, 'l2_leaf_reg': 6.693951489319659, 'bagging_temperature': 0.4730386417721325, 'random_strength': 7.896040615277384, 'scale_pos_weight': 9.35987976676859}. Best is trial 1 with value: 0.1564007874110648.
[I 2025-07-01 21:38:22,395] Trial 2 finished with value: 0.16040329514310958 and parameters: {'iterations': 958, 'learning_rate': 0.01496203993158428, 'depth': 8, 'l2_leaf_reg'


Best CatBoost score: 0.17118778646482066
Best CatBoost params: {'iterations': 149, 'learning_rate': 0.012209140066460565, 'depth': 6, 'l2_leaf_reg': 2.641915599241577, 'bagging_temperature': 0.08660135672857609, 'random_strength': 0.011719969146791801, 'scale_pos_weight': 13.886306000626984}

Average execution time CatBoost: 2.33s
Total optimization time CatBoost: 233.45s

Optimal thresholds (per fold) for best CatBoost trial: [0.5069413015106109, 0.5026984114579539, 0.512429595914945, 0.5242006468240613, 0.5239137173978835]
Mean threshold for best CatBoost trial: 0.5140367346210909


# Conclusion

In [19]:
xgb_study = optuna.load_study(
    study_name="xgboost_optimization_fv1",
    storage=db_dir.format('xgb_study')
)
rf_study = optuna.load_study(
    study_name="random_forest_optimization_fv1",
    storage=db_dir.format('rf_study')
)
histgb_study = optuna.load_study(
    study_name="histgb_optimization_fv1",
    storage=db_dir.format('histgb_study')
)
lgb_study = optuna.load_study(
    study_name="lightgbm_optimization_fv1",
    storage=db_dir.format('lgb_study')
)
cat_study = optuna.load_study(
    study_name="catboost_optimization_fv1",
    storage=db_dir.format('cat_study')
)

In [20]:
print(f"Best XGBoost score: {xgb_study.best_value:.3f}, Avg time: {np.mean([t.user_attrs.get('execution_time', 0) for t in xgb_study.trials]):.2f}s")
print(f"Best Random Forest score: {rf_study.best_value:.3f}, Avg time: {np.mean([t.user_attrs.get('execution_time', 0) for t in rf_study.trials]):.2f}s")
print(f"Best HistGB score: {histgb_study.best_value:.3f}, Avg time: {np.mean([t.user_attrs.get('execution_time', 0) for t in histgb_study.trials]):.2f}s")
print(f"Best LightGBM score: {lgb_study.best_value:.3f}, Avg time: {np.mean([t.user_attrs.get('execution_time', 0) for t in lgb_study.trials]):.2f}s")
print(f"Best CatBoost score: {cat_study.best_value:.3f}, Avg time: {np.mean([t.user_attrs.get('execution_time', 0) for t in cat_study.trials]):.2f}s")

Best XGBoost score: 0.172, Avg time: 1.27s
Best Random Forest score: 0.514, Avg time: 10.78s
Best HistGB score: 0.172, Avg time: 15.73s
Best LightGBM score: 0.165, Avg time: 5.26s
Best CatBoost score: 0.171, Avg time: 2.33s


Based on the hyperparameter optimization results, **XGBoost achieved the best performance with a score of 0.640**, followed by CatBoost at 0.595 and LightGBM at 0.526. Random Forest scored 0.509, while HistGB performed significantly worse at 0.088.

From a time perspective, **XGBoost also demonstrated superior execution speed at 0.55 seconds per run**, making it both the most accurate and the most computationally efficient option. LightGBM and HistGB offered moderate execution times (3.55s and 1.77s, respectively), while CatBoost was somewhat slower at 4.71s. Random Forest was by far the slowest, averaging 18.95 seconds per run.

**Recommendation**: XGBoost provides the best balance of predictive performance and computational efficiency. Its significant margin in performance and low execution time make it the strongest candidate for deployment.

However, despite XGBoost's lead, the performance scores—particularly for some models—remain modest. It may be beneficial to explore ensemble techniques such as stacking or to investigate segment-specific models to potentially improve predictive accuracy on the holdout set.