# Imports and definitions

In [70]:
from pathlib import Path
import time

import polars as pl
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_recall_curve, f1_score

from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

import optuna

_ = pl.Config.set_tbl_cols(None)
_ = pl.Config.set_fmt_str_lengths(500)
_ = pl.Config.set_fmt_float("full")

In [2]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='sklearn')

In [3]:
base_dir = Path('/Users/danlab/code/magenta-task/')
code_dir = base_dir / 'notebooks'
data_dir = code_dir / "data"
features_dir = data_dir / 'features'
train_dir = data_dir / 'train'
db_dir = 'sqlite:///data/models/{}.db'

# Load data

In [4]:
%%time

train = pl.read_parquet(train_dir / 'data-v0-80.parquet')

CPU times: user 24 ms, sys: 13.5 ms, total: 37.5 ms
Wall time: 39.1 ms


# Prepare data

In [49]:
age_b_1 = train.filter(pl.col("age") < 55)
age_b_2 = train.filter(pl.col("age") >= 55)

In [50]:
X_1 = age_b_1.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling', 'age']))
y_1 = age_b_1.select('has_done_upselling')

X_2 = age_b_2.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling', 'age']))
y_2 = age_b_2.select('has_done_upselling')


In [51]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Define objectives

In [52]:
# Compute the ratio of negative to positive instances in the target
ratio_negative_to_positive_b_1 = (
    (y_1['has_done_upselling'] == False).sum() / (y_1['has_done_upselling'] == True).sum()
)
print("ratio_negative_to_positive_b_1:", ratio_negative_to_positive_b_1)

ratio_negative_to_positive_b_1: 12.750193199381762


In [53]:
# Compute the ratio of negative to positive instances in the target
ratio_negative_to_positive_b_2 = (
    (y_2['has_done_upselling'] == False).sum() / (y_2['has_done_upselling'] == True).sum()
)
print("ratio_negative_to_positive_b_1:", ratio_negative_to_positive_b_2)

ratio_negative_to_positive_b_1: 18.069114470842333


In [54]:
def find_optimal_f1(valid_y, preds):
    # Find optimal threshold for F1
    precision, recall, thresholds = precision_recall_curve(valid_y, preds)
    f1_scores_thresh = 2 * (precision * recall) / (precision + recall + 1e-8)
    optimal_idx = np.argmax(f1_scores_thresh)
    optimal_threshold = thresholds[optimal_idx] if optimal_idx < len(thresholds) else 0.5
    
    # Make binary predictions using optimal threshold
    pred_labels = (preds >= optimal_threshold).astype(int)
    return f1_score(valid_y, pred_labels), optimal_threshold

In [55]:
def xgboost_objective(trial, X, y, skf, ratio_negative_to_positive, n_splits=5):
    '''
    XGBoost objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    param = {
        'verbosity': 0,
        'n_jobs': 4,
        'early_stopping_rounds': 16,
        'eval_metric': 'aucpr',
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", 1.0, ratio_negative_to_positive * 1.5, log=True),
        'objective': 'binary:logistic',
        'tree_method': 'hist',
        'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),

        # L2 regularization weight.
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        # L1 regularization weight.
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        'subsample': trial.suggest_float('subsample', 0.2, 1.0),
        # sampling according to each tree.
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
    }

    if param['booster'] in ['gbtree', 'dart']:
        # maximum depth of the tree, signifies complexity of the tree.
        param['max_depth'] = trial.suggest_int('max_depth', 3, 20)
        # minimum child weight, larger the term more conservative the tree.
        param['min_child_weight'] = trial.suggest_int('min_child_weight', 2, 10)
        param['eta'] = trial.suggest_float('eta', 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param['gamma'] = trial.suggest_float('gamma', 1e-8, 1.0, log=True)
        param['grow_policy'] = trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])

    if param['booster'] == 'dart':
        param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
        param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)
    
    start_time = time.time()
    
    # Return mean F1 score across all folds
    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()

        # Create DMatrix objects
        dtrain = xgb.DMatrix(train_x, label=train_y)
        dvalid = xgb.DMatrix(valid_x, label=valid_y)
        
        # Train model
        bst = xgb.train(param, dtrain)
        
        # Make predictions
        preds = bst.predict(dvalid)
        
        f1, optimal_threshold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(float(optimal_threshold))

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', float(np.mean(optimal_thresholds)))

    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [56]:
def random_forest_objective(trial, X, y, skf, n_splits=5):
    '''
    Random Forest objective function using stratified cross-validation

    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''

    cv_scores = []

    # Random Forest hyperparameters
    param = {
        'n_jobs': 4,
        'random_state': 42,
        'verbose': 0,

        # Core tree parameters
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),

        # Feature sampling parameters
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
        'max_samples': trial.suggest_float('max_samples', 0.1, 1.0),

        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 1000),

        # Class balancing
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
    }

    rf = RandomForestClassifier(**param)

    start_time = time.time()

    for train_idx, valid_idx in skf.split(X, y):
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()

        rf.fit(train_x, train_y)

        preds = rf.predict(valid_x)

        f1 = f1_score(valid_y, preds)
        cv_scores.append(f1)

    execution_time = time.time() - start_time

    trial.set_user_attr('execution_time', execution_time)

    return np.mean(cv_scores)

In [57]:
def lightgbm_objective(trial, X, y, skf, ratio_negative_to_positive, n_splits=5):
    '''
    LightGBM objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    # LightGBM hyperparameters
    param = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'verbosity': 0,
        'seed': 42,
        'num_threads': 4,
        'deterministic': True,
        
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", ratio_negative_to_positive * 0.7, ratio_negative_to_positive * 1.5, log=True),

    }
    
    start_time = time.time()

    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Create LightGBM datasets
        train_data = lgb.Dataset(train_x, label=train_y)
        valid_data = lgb.Dataset(valid_x, label=valid_y, reference=train_data)
        
        # Train model with early stopping
        model = lgb.train(
            param,
            train_data,
            valid_sets=[valid_data],
            num_boost_round=1000,
            callbacks=[lgb.early_stopping(50, verbose=False), lgb.log_evaluation(0)]
        )
        
        # Make probability predictions
        preds = model.predict(valid_x, num_iteration=model.best_iteration)
        
        f1, optimal_threhsold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))
    
    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [58]:
def catboost_objective(trial, X, y, skf, ratio_negative_to_positive, n_splits=5):
    '''
    CatBoost objective function using stratified cross-validation
    
    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''
    
    cv_scores = []
    optimal_thresholds = []

    # CatBoost hyperparameters (simplified)
    param = {
        'random_seed': 42,
        'verbose': False,
        'allow_writing_files': False,
        'thread_count': 4,
        
        # Core boosting parameters
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        
        # Regularization and overfitting control
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 10.0),
        'scale_pos_weight': trial.suggest_float("scale_pos_weight", ratio_negative_to_positive * 0.7, ratio_negative_to_positive * 1.5, log=True),
        
        # Early stopping
        'early_stopping_rounds': 50,
        'eval_metric': 'F1',
    }
    
    # Create CatBoost classifier
    cb = CatBoostClassifier(**param)
    
    start_time = time.time()

    # Perform stratified cross-validation
    for train_idx, valid_idx in skf.split(X, y):
        # Split data for current fold
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()
        
        # Train model with validation set for early stopping
        cb.fit(
            train_x, train_y,
            eval_set=(valid_x, valid_y),
            verbose=False
        )
        
        # Make probability predictions
        preds = cb.predict_proba(valid_x)[:, 1]  # Get probability of positive class
    
        f1, optimal_threhsold = find_optimal_f1(valid_y, preds)
        
        cv_scores.append(f1)
        optimal_thresholds.append(optimal_threhsold)

    execution_time = time.time() - start_time
    
    trial.set_user_attr('execution_time', execution_time)
    trial.set_user_attr('optimal_thresholds', optimal_thresholds)
    trial.set_user_attr('threshold', np.mean(optimal_thresholds))

    # Return mean F1 score across all folds
    return np.mean(cv_scores)

In [None]:
def histgb_objective(trial, X, y, skf, n_splits=5):
    '''
    HistGradientBoostingClassifier objective function using stratified cross-validation

    Args:
        trial: Optuna trial object
        X: Feature matrix
        y: Target vector
        skf: Stratified K-Fold cross-validator
        n_splits: Number of folds for cross-validation (default: 5)
    '''

    cv_scores = []

    # HistGradientBoosting hyperparameters (simplified)
    param = {
        'random_state': 42,
        'verbose': 0,

        # Core boosting parameters
        'max_iter': trial.suggest_int('max_iter', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 10, 100),

        # Regularization
        'l2_regularization': trial.suggest_float('l2_regularization', 0.0, 1.0),
        'max_bins': trial.suggest_int('max_bins', 32, 255),

        # Early stopping
        'early_stopping': True,
        'n_iter_no_change': 10,
        'validation_fraction': 0.1,
    }

    hgb = HistGradientBoostingClassifier(**param)

    start_time = time.time()

    for train_idx, valid_idx in skf.split(X, y):
        train_x = X[train_idx].to_numpy()
        valid_x = X[valid_idx].to_numpy()
        train_y = y[train_idx].to_numpy().ravel()
        valid_y = y[valid_idx].to_numpy().ravel()

        hgb.fit(train_x, train_y)

        preds = hgb.predict(valid_x) 
        f1 = f1_score(valid_y, preds)
        cv_scores.append(f1)

    execution_time = time.time() - start_time

    trial.set_user_attr('execution_time', execution_time)

    return np.mean(cv_scores)


# Start tuning

In [59]:
# Setup
n_trials = 100

## XGBoost

In [60]:
# XGBoost study
xgb_study = optuna.create_study(
    study_name="xgboost_optimization_age_b_1",
    direction="maximize",
    storage=db_dir.format('xgb_study'),
    load_if_exists=True
)
xgb_study.optimize(lambda trial: xgboost_objective(trial, X_1, y_1, skf, ratio_negative_to_positive_b_1), n_trials=n_trials)

print(f"\nBest XGB score: {xgb_study.best_value}")
print(f"Best XGB params: {xgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in xgb_study.trials]
print(f"\nAverage execution time XGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time XGB: {sum(execution_times):.2f}s")

thresholds = xgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = xgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best XGB trial: {thresholds}")
print(f"Mean threshold for best XGB trial: {mean_threshold}")

[I 2025-07-01 20:33:42,309] A new study created in RDB with name: xgboost_optimization_age_b_1


[I 2025-07-01 20:33:42,616] Trial 0 finished with value: 0.1524425048454579 and parameters: {'scale_pos_weight': 7.984161063391847, 'booster': 'gblinear', 'n_estimators': 1785, 'learning_rate': 0.167869384753913, 'lambda': 0.006238398022671035, 'alpha': 0.00010691612397185862, 'subsample': 0.41774865476974576, 'colsample_bytree': 0.9463755855789743}. Best is trial 0 with value: 0.1524425048454579.
[I 2025-07-01 20:33:42,851] Trial 1 finished with value: 0.14761179618842574 and parameters: {'scale_pos_weight': 16.716813747443243, 'booster': 'gblinear', 'n_estimators': 1226, 'learning_rate': 0.009412067687142388, 'lambda': 1.6049004194820735e-07, 'alpha': 0.039839063949419364, 'subsample': 0.954224837674676, 'colsample_bytree': 0.21062040170757285}. Best is trial 0 with value: 0.1524425048454579.
[I 2025-07-01 20:33:43,654] Trial 2 finished with value: 0.13798157430301577 and parameters: {'scale_pos_weight': 16.211749844197445, 'booster': 'dart', 'n_estimators': 1899, 'learning_rate': 0.


Best XGB score: 0.16318799437591117
Best XGB params: {'scale_pos_weight': 1.8490891401240388, 'booster': 'dart', 'n_estimators': 298, 'learning_rate': 0.2175665539990347, 'lambda': 1.070284287482838e-06, 'alpha': 2.443988741260388e-08, 'subsample': 0.980210041555195, 'colsample_bytree': 0.6634667101281074, 'max_depth': 3, 'min_child_weight': 4, 'eta': 0.009310315006680062, 'gamma': 2.0644076081618415e-06, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 0.002734203732356561, 'skip_drop': 4.150607859853549e-05}

Average execution time XGB: 0.72s
Total optimization time XGB: 72.17s

Optimal thresholds (per fold) for best XGB trial: [0.14768944680690765, 0.14368414878845215, 0.1315889209508896, 0.14210809767246246, 0.14151643216609955]
Mean threshold for best XGB trial: 0.1413174092769623


In [61]:
# XGBoost study
xgb_study = optuna.create_study(
    study_name="xgboost_optimization_age_b_2",
    direction="maximize",
    storage=db_dir.format('xgb_study'),
    load_if_exists=True
)
xgb_study.optimize(lambda trial: xgboost_objective(trial, X_2, y_2, skf, ratio_negative_to_positive_b_2), n_trials=n_trials)

print(f"\nBest XGB score: {xgb_study.best_value}")
print(f"Best XGB params: {xgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in xgb_study.trials]
print(f"\nAverage execution time XGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time XGB: {sum(execution_times):.2f}s")

thresholds = xgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = xgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best XGB trial: {thresholds}")
print(f"Mean threshold for best XGB trial: {mean_threshold}")

[I 2025-07-01 20:34:58,867] A new study created in RDB with name: xgboost_optimization_age_b_2
[I 2025-07-01 20:34:59,506] Trial 0 finished with value: 0.11728419117263804 and parameters: {'scale_pos_weight': 3.875999566181403, 'booster': 'dart', 'n_estimators': 1254, 'learning_rate': 0.0024376882161510154, 'lambda': 2.0360691685430263e-07, 'alpha': 1.1879096309392409e-08, 'subsample': 0.29926477630364107, 'colsample_bytree': 0.6831803674637287, 'max_depth': 19, 'min_child_weight': 3, 'eta': 0.38779903508849245, 'gamma': 0.0005898454239065433, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.9903380647768043, 'skip_drop': 3.043158219600669e-08}. Best is trial 0 with value: 0.11728419117263804.
[I 2025-07-01 20:35:00,146] Trial 1 finished with value: 0.1182162555370859 and parameters: {'scale_pos_weight': 4.454622061910012, 'booster': 'dart', 'n_estimators': 1475, 'learning_rate': 0.1514758710702782, 'lambda': 6.773461882574638e-05, 'alpha'


Best XGB score: 0.15087501055621325
Best XGB params: {'scale_pos_weight': 14.489749561138987, 'booster': 'dart', 'n_estimators': 1791, 'learning_rate': 0.001207184183290601, 'lambda': 0.018561231411393802, 'alpha': 0.10642542092168038, 'subsample': 0.672715129086682, 'colsample_bytree': 0.7564539530008034, 'max_depth': 5, 'min_child_weight': 4, 'eta': 0.027726198358198426, 'gamma': 2.6629265976902274e-06, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 1.7349811254805127e-07, 'skip_drop': 2.700274707738184e-07}

Average execution time XGB: 0.25s
Total optimization time XGB: 24.73s

Optimal thresholds (per fold) for best XGB trial: [0.44705814123153687, 0.44660136103630066, 0.4449523389339447, 0.4457159638404846, 0.44639208912849426]
Mean threshold for best XGB trial: 0.44614397883415224


## Random Forest

In [62]:
# Random Forest study
rf_study = optuna.create_study(
    study_name="random_forest_optimization_age_b_1",
    direction="maximize",
    storage=db_dir.format('rf_study'),
    load_if_exists=True
)
rf_study.optimize(lambda trial: random_forest_objective(trial, X_1, y_1, skf), n_trials=n_trials)  # Requires too much time to train one, reduce the number of trials

print(f"\nBest RF score: {rf_study.best_value}")
print(f"Best RF params: {rf_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in rf_study.trials]
print(f"\nAverage execution time RF: {np.mean(execution_times):.2f}s")
print(f"Total optimization time RF: {sum(execution_times):.2f}s")

[I 2025-07-01 20:35:29,180] A new study created in RDB with name: random_forest_optimization_age_b_1
[I 2025-07-01 20:35:31,648] Trial 0 finished with value: 0.1554214380723521 and parameters: {'n_estimators': 104, 'max_depth': 20, 'min_samples_split': 18, 'min_samples_leaf': 6, 'min_weight_fraction_leaf': 0.20516041250257794, 'max_features': 'log2', 'max_samples': 0.9192651489057665, 'max_leaf_nodes': 709, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.1554214380723521.
[I 2025-07-01 20:35:43,521] Trial 1 finished with value: 0.15482661926786317 and parameters: {'n_estimators': 710, 'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.23034954893981296, 'max_features': 'sqrt', 'max_samples': 0.5654690400796282, 'max_leaf_nodes': 946, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.1554214380723521.
[I 2025-07-01 20:35:49,355] Trial 2 finished with value: 0.15250130333390527 and parameters: {'n_estimators': 359, 'max_depth'


Best RF score: 0.15949585916468387
Best RF params: {'n_estimators': 414, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0.0322792826276814, 'max_features': 'sqrt', 'max_samples': 0.23951992376128442, 'max_leaf_nodes': 138, 'class_weight': 'balanced'}

Average execution time RF: 10.00s
Total optimization time RF: 999.66s


In [63]:
# Random Forest study
rf_study = optuna.create_study(
    study_name="random_forest_optimization_age_b_2",
    direction="maximize",
    storage=db_dir.format('rf_study'),
    load_if_exists=True
)
rf_study.optimize(lambda trial: random_forest_objective(trial, X_2, y_2, skf), n_trials=n_trials)  # Requires too much time to train one, reduce the number of trials

print(f"\nBest RF score: {rf_study.best_value}")
print(f"Best RF params: {rf_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in rf_study.trials]
print(f"\nAverage execution time RF: {np.mean(execution_times):.2f}s")
print(f"Total optimization time RF: {sum(execution_times):.2f}s")

[I 2025-07-01 20:52:13,447] A new study created in RDB with name: random_forest_optimization_age_b_2
[I 2025-07-01 20:52:14,649] Trial 0 finished with value: 0.0 and parameters: {'n_estimators': 139, 'max_depth': 7, 'min_samples_split': 12, 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0.4557167041727808, 'max_features': 'log2', 'max_samples': 0.343841653636192, 'max_leaf_nodes': 554, 'class_weight': None}. Best is trial 0 with value: 0.0.
[I 2025-07-01 20:52:18,399] Trial 1 finished with value: 0.0 and parameters: {'n_estimators': 486, 'max_depth': 6, 'min_samples_split': 17, 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0.2396488913335687, 'max_features': 'sqrt', 'max_samples': 0.5261726794293164, 'max_leaf_nodes': 196, 'class_weight': None}. Best is trial 0 with value: 0.0.
[I 2025-07-01 20:52:24,759] Trial 2 finished with value: 0.11636131837452182 and parameters: {'n_estimators': 541, 'max_depth': 14, 'min_samples_split': 4, 'min_samples_leaf': 10, 'min_weight_fraction_l


Best RF score: 0.1311699270046216
Best RF params: {'n_estimators': 931, 'max_depth': 4, 'min_samples_split': 5, 'min_samples_leaf': 3, 'min_weight_fraction_leaf': 0.002546502102164078, 'max_features': 'sqrt', 'max_samples': 0.5017851962691026, 'max_leaf_nodes': 286, 'class_weight': 'balanced'}

Average execution time RF: 7.03s
Total optimization time RF: 702.96s


## LightGBM

In [64]:
# LightGBM study
lgb_study = optuna.create_study(
    study_name="lightgbm_optimization_age_b_1",
    direction="maximize",
    storage=db_dir.format('lgb_study'),
    load_if_exists=True
)
lgb_study.optimize(lambda trial: lightgbm_objective(trial, X_1, y_1, skf, ratio_negative_to_positive_b_1), n_trials=n_trials)

print(f"\nBest LightGBM score: {lgb_study.best_value}")
print(f"Best LightGBM params: {lgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in lgb_study.trials]
print(f"\nAverage execution time LightGBM: {np.mean(execution_times):.2f}s")
print(f"Total optimization time LightGBM: {sum(execution_times):.2f}s")

thresholds = lgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = lgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best LightGBM trial: {thresholds}")
print(f"Mean threshold for best LightGBM trial: {mean_threshold}")

[I 2025-07-01 21:04:01,608] A new study created in RDB with name: lightgbm_optimization_age_b_1
[I 2025-07-01 21:04:06,728] Trial 0 finished with value: 0.1506932547856683 and parameters: {'num_leaves': 86, 'learning_rate': 0.28275127942581596, 'feature_fraction': 0.9437613400892697, 'bagging_fraction': 0.6215798053272861, 'bagging_freq': 6, 'min_child_samples': 45, 'reg_alpha': 0.881033374164938, 'reg_lambda': 0.028726716961458676, 'scale_pos_weight': 16.67819681028466}. Best is trial 0 with value: 0.1506932547856683.
[I 2025-07-01 21:04:42,666] Trial 1 finished with value: 0.13929407033424773 and parameters: {'num_leaves': 247, 'learning_rate': 0.29543032339061825, 'feature_fraction': 0.5708786567487729, 'bagging_fraction': 0.8045309326973792, 'bagging_freq': 6, 'min_child_samples': 51, 'reg_alpha': 0.6485360577499517, 'reg_lambda': 0.49738831457982347, 'scale_pos_weight': 11.909098781622221}. Best is trial 0 with value: 0.1506932547856683.
[I 2025-07-01 21:04:45,830] Trial 2 finishe


Best LightGBM score: 0.15961913510127423
Best LightGBM params: {'num_leaves': 21, 'learning_rate': 0.2020132088190523, 'feature_fraction': 0.7733869325685215, 'bagging_fraction': 0.9067648273393395, 'bagging_freq': 5, 'min_child_samples': 93, 'reg_alpha': 0.7516042979209725, 'reg_lambda': 0.44094257157114747, 'scale_pos_weight': 13.871420499157374}

Average execution time LightGBM: 4.98s
Total optimization time LightGBM: 497.92s

Optimal thresholds (per fold) for best LightGBM trial: [0.26321430253790606, 0.24829233558709723, 0.24092588765079767, 0.22782943699653355, 0.23967813377370442]
Mean threshold for best LightGBM trial: 0.2439880193092078


In [65]:
# LightGBM study
lgb_study = optuna.create_study(
    study_name="lightgbm_optimization_age_b_2",
    direction="maximize",
    storage=db_dir.format('lgb_study'),
    load_if_exists=True
)
lgb_study.optimize(lambda trial: lightgbm_objective(trial, X_2, y_2, skf, ratio_negative_to_positive_b_2), n_trials=n_trials)

print(f"\nBest LightGBM score: {lgb_study.best_value}")
print(f"Best LightGBM params: {lgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in lgb_study.trials]
print(f"\nAverage execution time LightGBM: {np.mean(execution_times):.2f}s")
print(f"Total optimization time LightGBM: {sum(execution_times):.2f}s")

thresholds = lgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = lgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best LightGBM trial: {thresholds}")
print(f"Mean threshold for best LightGBM trial: {mean_threshold}")

[I 2025-07-01 21:12:26,369] A new study created in RDB with name: lightgbm_optimization_age_b_2




[I 2025-07-01 21:12:29,492] Trial 0 finished with value: 0.1104167419464301 and parameters: {'num_leaves': 122, 'learning_rate': 0.047398673951844966, 'feature_fraction': 0.5817539209469433, 'bagging_fraction': 0.4170906634470728, 'bagging_freq': 5, 'min_child_samples': 31, 'reg_alpha': 0.4702220703345956, 'reg_lambda': 0.9539468867417271, 'scale_pos_weight': 17.163966647216014}. Best is trial 0 with value: 0.1104167419464301.




[I 2025-07-01 21:12:32,958] Trial 1 finished with value: 0.1150954782213516 and parameters: {'num_leaves': 114, 'learning_rate': 0.038553689272154655, 'feature_fraction': 0.7322057514501731, 'bagging_fraction': 0.7431414793933443, 'bagging_freq': 4, 'min_child_samples': 53, 'reg_alpha': 0.7037811456504239, 'reg_lambda': 0.3328491427951181, 'scale_pos_weight': 23.554293120700788}. Best is trial 1 with value: 0.1150954782213516.




[I 2025-07-01 21:12:36,912] Trial 2 finished with value: 0.11503027176979441 and parameters: {'num_leaves': 250, 'learning_rate': 0.2451385479910715, 'feature_fraction': 0.811447274059773, 'bagging_fraction': 0.47037274054943823, 'bagging_freq': 1, 'min_child_samples': 65, 'reg_alpha': 0.36033696460512876, 'reg_lambda': 0.026224102281885542, 'scale_pos_weight': 15.659648613225153}. Best is trial 1 with value: 0.1150954782213516.




[I 2025-07-01 21:12:39,608] Trial 3 finished with value: 0.10872991550713898 and parameters: {'num_leaves': 205, 'learning_rate': 0.028913769073359036, 'feature_fraction': 0.41527181200186697, 'bagging_fraction': 0.955361004284313, 'bagging_freq': 7, 'min_child_samples': 97, 'reg_alpha': 0.272003401382775, 'reg_lambda': 0.272554129024144, 'scale_pos_weight': 18.031044630188756}. Best is trial 1 with value: 0.1150954782213516.




[I 2025-07-01 21:12:41,981] Trial 4 finished with value: 0.11524300562583376 and parameters: {'num_leaves': 107, 'learning_rate': 0.021441404356149836, 'feature_fraction': 0.6203516701097495, 'bagging_fraction': 0.8711082632568884, 'bagging_freq': 4, 'min_child_samples': 90, 'reg_alpha': 0.8564768114028272, 'reg_lambda': 0.6923355112715082, 'scale_pos_weight': 16.681565751633748}. Best is trial 4 with value: 0.11524300562583376.




[I 2025-07-01 21:12:43,718] Trial 5 finished with value: 0.12306723700980646 and parameters: {'num_leaves': 288, 'learning_rate': 0.02653969087705869, 'feature_fraction': 0.8517616421508353, 'bagging_fraction': 0.5020571070619453, 'bagging_freq': 1, 'min_child_samples': 79, 'reg_alpha': 0.5130262174906366, 'reg_lambda': 0.880469307930579, 'scale_pos_weight': 24.846185943946665}. Best is trial 5 with value: 0.12306723700980646.




[I 2025-07-01 21:12:46,571] Trial 6 finished with value: 0.12336134950112729 and parameters: {'num_leaves': 56, 'learning_rate': 0.022446135381630886, 'feature_fraction': 0.7316644434621996, 'bagging_fraction': 0.9593671796058639, 'bagging_freq': 6, 'min_child_samples': 77, 'reg_alpha': 0.8347996725625697, 'reg_lambda': 0.592083233168873, 'scale_pos_weight': 14.976182445243433}. Best is trial 6 with value: 0.12336134950112729.




[I 2025-07-01 21:12:47,904] Trial 7 finished with value: 0.11780019141174194 and parameters: {'num_leaves': 262, 'learning_rate': 0.021344555956469627, 'feature_fraction': 0.6356930314676048, 'bagging_fraction': 0.42833728344144745, 'bagging_freq': 6, 'min_child_samples': 86, 'reg_alpha': 0.7249801453039771, 'reg_lambda': 0.13283660950831178, 'scale_pos_weight': 20.371023173901307}. Best is trial 6 with value: 0.12336134950112729.




[I 2025-07-01 21:12:58,066] Trial 8 finished with value: 0.1291905335716052 and parameters: {'num_leaves': 149, 'learning_rate': 0.1481489749543082, 'feature_fraction': 0.610511627677113, 'bagging_fraction': 0.9306516193878859, 'bagging_freq': 1, 'min_child_samples': 28, 'reg_alpha': 0.8776646074657214, 'reg_lambda': 0.6549316547924989, 'scale_pos_weight': 20.609583757520564}. Best is trial 8 with value: 0.1291905335716052.




[I 2025-07-01 21:13:02,846] Trial 9 finished with value: 0.12022033936699043 and parameters: {'num_leaves': 138, 'learning_rate': 0.015376585152231642, 'feature_fraction': 0.8031682330857768, 'bagging_fraction': 0.5310074869315744, 'bagging_freq': 3, 'min_child_samples': 27, 'reg_alpha': 0.030624913715211477, 'reg_lambda': 0.39291769558173284, 'scale_pos_weight': 20.20744705911165}. Best is trial 8 with value: 0.1291905335716052.
[I 2025-07-01 21:13:05,297] Trial 10 finished with value: 0.12593211534061194 and parameters: {'num_leaves': 40, 'learning_rate': 0.14405373167200466, 'feature_fraction': 0.9735359933286452, 'bagging_fraction': 0.7352990937408069, 'bagging_freq': 2, 'min_child_samples': 5, 'reg_alpha': 0.6299567438291256, 'reg_lambda': 0.7526844890131971, 'scale_pos_weight': 12.745165573922728}. Best is trial 8 with value: 0.1291905335716052.
[I 2025-07-01 21:13:07,216] Trial 11 finished with value: 0.12421496983305617 and parameters: {'num_leaves': 31, 'learning_rate': 0.1359



[I 2025-07-01 21:13:15,936] Trial 12 finished with value: 0.11152108641049982 and parameters: {'num_leaves': 187, 'learning_rate': 0.11113099178786913, 'feature_fraction': 0.47337760628945147, 'bagging_fraction': 0.622615131947246, 'bagging_freq': 2, 'min_child_samples': 9, 'reg_alpha': 0.6280319330942382, 'reg_lambda': 0.541604779553608, 'scale_pos_weight': 13.417024422865415}. Best is trial 8 with value: 0.1291905335716052.




[I 2025-07-01 21:13:20,235] Trial 13 finished with value: 0.12684265607683004 and parameters: {'num_leaves': 66, 'learning_rate': 0.08315324718363441, 'feature_fraction': 0.992379542345905, 'bagging_fraction': 0.8276446152161216, 'bagging_freq': 2, 'min_child_samples': 27, 'reg_alpha': 0.9451957724689493, 'reg_lambda': 0.789658127077991, 'scale_pos_weight': 21.162021058735323}. Best is trial 8 with value: 0.1291905335716052.




[I 2025-07-01 21:13:24,319] Trial 14 finished with value: 0.11442202307809432 and parameters: {'num_leaves': 73, 'learning_rate': 0.07923365531645654, 'feature_fraction': 0.5522011510041454, 'bagging_fraction': 0.8300019552082729, 'bagging_freq': 1, 'min_child_samples': 36, 'reg_alpha': 0.9363404804112866, 'reg_lambda': 0.8506516422007409, 'scale_pos_weight': 21.9477880521956}. Best is trial 8 with value: 0.1291905335716052.




[I 2025-07-01 21:13:30,863] Trial 15 finished with value: 0.13175920055320753 and parameters: {'num_leaves': 172, 'learning_rate': 0.2236809223267912, 'feature_fraction': 0.8879858671649725, 'bagging_fraction': 0.8577712582516892, 'bagging_freq': 3, 'min_child_samples': 43, 'reg_alpha': 0.832550680650737, 'reg_lambda': 0.6408886270325269, 'scale_pos_weight': 19.832776677465656}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:13:36,685] Trial 16 finished with value: 0.12933324422091844 and parameters: {'num_leaves': 176, 'learning_rate': 0.27736456187470887, 'feature_fraction': 0.8884903331951155, 'bagging_fraction': 0.8943654013670633, 'bagging_freq': 3, 'min_child_samples': 47, 'reg_alpha': 0.8083212083281642, 'reg_lambda': 0.6224753295362774, 'scale_pos_weight': 26.71245361308514}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:13:42,151] Trial 17 finished with value: 0.12193793171203246 and parameters: {'num_leaves': 190, 'learning_rate': 0.2667702463933974, 'feature_fraction': 0.9021725792506918, 'bagging_fraction': 0.6464704995309699, 'bagging_freq': 3, 'min_child_samples': 51, 'reg_alpha': 0.765290656582288, 'reg_lambda': 0.4535350030156611, 'scale_pos_weight': 26.180298526114303}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:13:48,131] Trial 18 finished with value: 0.11971319015546378 and parameters: {'num_leaves': 233, 'learning_rate': 0.2956038298572374, 'feature_fraction': 0.9170724684086791, 'bagging_fraction': 0.8784889088231159, 'bagging_freq': 3, 'min_child_samples': 44, 'reg_alpha': 0.5454396227725355, 'reg_lambda': 0.49625627439806747, 'scale_pos_weight': 23.047166589492235}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:13:54,763] Trial 19 finished with value: 0.12758584199126927 and parameters: {'num_leaves': 172, 'learning_rate': 0.19114425185097922, 'feature_fraction': 0.8830260537388191, 'bagging_fraction': 0.7791606441382737, 'bagging_freq': 5, 'min_child_samples': 63, 'reg_alpha': 0.002355388927721913, 'reg_lambda': 0.6125538798811161, 'scale_pos_weight': 26.97633259438836}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:13:58,299] Trial 20 finished with value: 0.1227612275683562 and parameters: {'num_leaves': 215, 'learning_rate': 0.06760943537868865, 'feature_fraction': 0.7862054316032887, 'bagging_fraction': 0.8934417832457816, 'bagging_freq': 3, 'min_child_samples': 65, 'reg_alpha': 0.22409657650268755, 'reg_lambda': 0.9884063365945137, 'scale_pos_weight': 18.83459443741335}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:14:06,408] Trial 21 finished with value: 0.12509351596746002 and parameters: {'num_leaves': 149, 'learning_rate': 0.187465918003274, 'feature_fraction': 0.6773776836965096, 'bagging_fraction': 0.9989756590217586, 'bagging_freq': 4, 'min_child_samples': 41, 'reg_alpha': 0.8214871395492026, 'reg_lambda': 0.6660451104538155, 'scale_pos_weight': 19.142100934458234}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:14:17,009] Trial 22 finished with value: 0.11754263812826912 and parameters: {'num_leaves': 163, 'learning_rate': 0.18388881904054225, 'feature_fraction': 0.5213909562985196, 'bagging_fraction': 0.932021184473939, 'bagging_freq': 2, 'min_child_samples': 20, 'reg_alpha': 0.9014573268052517, 'reg_lambda': 0.6469540604502169, 'scale_pos_weight': 22.839769392968297}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:14:20,617] Trial 23 finished with value: 0.12548582190183993 and parameters: {'num_leaves': 90, 'learning_rate': 0.12638546799089534, 'feature_fraction': 0.7042156536720361, 'bagging_fraction': 0.7901500020959515, 'bagging_freq': 4, 'min_child_samples': 51, 'reg_alpha': 0.7693609696788694, 'reg_lambda': 0.5559563670024418, 'scale_pos_weight': 24.938737060680428}. Best is trial 15 with value: 0.13175920055320753.




[I 2025-07-01 21:14:29,382] Trial 24 finished with value: 0.13324274815059392 and parameters: {'num_leaves': 135, 'learning_rate': 0.21821811431909405, 'feature_fraction': 0.7741340215640152, 'bagging_fraction': 0.9129704596153349, 'bagging_freq': 1, 'min_child_samples': 20, 'reg_alpha': 0.6545206488293345, 'reg_lambda': 0.4407500568045821, 'scale_pos_weight': 19.930140727734965}. Best is trial 24 with value: 0.13324274815059392.




[I 2025-07-01 21:14:37,917] Trial 25 finished with value: 0.12761856799956245 and parameters: {'num_leaves': 173, 'learning_rate': 0.22936590119294586, 'feature_fraction': 0.8444128841135567, 'bagging_fraction': 0.9986616269748468, 'bagging_freq': 3, 'min_child_samples': 19, 'reg_alpha': 0.6075451887849721, 'reg_lambda': 0.23463552242676317, 'scale_pos_weight': 17.581486888337103}. Best is trial 24 with value: 0.13324274815059392.




[I 2025-07-01 21:14:44,738] Trial 26 finished with value: 0.13353449972911027 and parameters: {'num_leaves': 127, 'learning_rate': 0.010813865097579247, 'feature_fraction': 0.9327128353673954, 'bagging_fraction': 0.831951295347284, 'bagging_freq': 5, 'min_child_samples': 16, 'reg_alpha': 0.6819974528456981, 'reg_lambda': 0.43855966129091994, 'scale_pos_weight': 16.17262823756808}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:14:49,902] Trial 27 finished with value: 0.12068188502999691 and parameters: {'num_leaves': 94, 'learning_rate': 0.010179260472656715, 'feature_fraction': 0.9330990281170304, 'bagging_fraction': 0.674742692253298, 'bagging_freq': 5, 'min_child_samples': 17, 'reg_alpha': 0.6886093358003047, 'reg_lambda': 0.4331037178731648, 'scale_pos_weight': 16.039976683052746}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:14:56,438] Trial 28 finished with value: 0.11676923010132152 and parameters: {'num_leaves': 130, 'learning_rate': 0.09621299446096584, 'feature_fraction': 0.9500602300065705, 'bagging_fraction': 0.8210896490787127, 'bagging_freq': 6, 'min_child_samples': 19, 'reg_alpha': 0.42566643104761687, 'reg_lambda': 0.35258518842534303, 'scale_pos_weight': 14.993693658374028}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:00,279] Trial 29 finished with value: 0.13073837117596118 and parameters: {'num_leaves': 122, 'learning_rate': 0.04146721748836437, 'feature_fraction': 0.758155193215177, 'bagging_fraction': 0.5897295586858624, 'bagging_freq': 5, 'min_child_samples': 36, 'reg_alpha': 0.5545986958490247, 'reg_lambda': 0.4902305153878933, 'scale_pos_weight': 19.261718096699827}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:08,969] Trial 30 finished with value: 0.12583419091146747 and parameters: {'num_leaves': 208, 'learning_rate': 0.010534686439685262, 'feature_fraction': 0.8633303466016299, 'bagging_fraction': 0.848148408949288, 'bagging_freq': 4, 'min_child_samples': 11, 'reg_alpha': 0.4594842034233349, 'reg_lambda': 0.21574262502089012, 'scale_pos_weight': 16.963967886267003}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:12,438] Trial 31 finished with value: 0.12114303930873253 and parameters: {'num_leaves': 124, 'learning_rate': 0.04545832386729145, 'feature_fraction': 0.759886202027164, 'bagging_fraction': 0.5816175746749366, 'bagging_freq': 5, 'min_child_samples': 38, 'reg_alpha': 0.5720616863139091, 'reg_lambda': 0.507340603505388, 'scale_pos_weight': 19.459518540052194}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:16,425] Trial 32 finished with value: 0.12283447246685411 and parameters: {'num_leaves': 110, 'learning_rate': 0.05706144293459485, 'feature_fraction': 0.8274246436834152, 'bagging_fraction': 0.5793686574141718, 'bagging_freq': 5, 'min_child_samples': 32, 'reg_alpha': 0.67188154134673, 'reg_lambda': 0.4493241682861572, 'scale_pos_weight': 18.330356942387787}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:19,735] Trial 33 finished with value: 0.11563783871919378 and parameters: {'num_leaves': 147, 'learning_rate': 0.0380457274761443, 'feature_fraction': 0.7611979344790529, 'bagging_fraction': 0.7604271907149208, 'bagging_freq': 7, 'min_child_samples': 57, 'reg_alpha': 0.7318792911575266, 'reg_lambda': 0.32948593968687934, 'scale_pos_weight': 21.256435995676853}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:23,952] Trial 34 finished with value: 0.11356450963584867 and parameters: {'num_leaves': 92, 'learning_rate': 0.014996768679023288, 'feature_fraction': 0.6682904904084511, 'bagging_fraction': 0.7084466752612468, 'bagging_freq': 6, 'min_child_samples': 33, 'reg_alpha': 0.41093232929410267, 'reg_lambda': 0.3975157698609456, 'scale_pos_weight': 17.90659438425601}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:30,281] Trial 35 finished with value: 0.1149950723975103 and parameters: {'num_leaves': 118, 'learning_rate': 0.045671720747400175, 'feature_fraction': 0.7863285682621627, 'bagging_fraction': 0.8046468559193573, 'bagging_freq': 5, 'min_child_samples': 23, 'reg_alpha': 0.5180365569192306, 'reg_lambda': 0.5010226249857203, 'scale_pos_weight': 16.26177540459251}. Best is trial 26 with value: 0.13353449972911027.
[I 2025-07-01 21:15:36,880] Trial 36 finished with value: 0.11694897426779427 and parameters: {'num_leaves': 135, 'learning_rate': 0.03326806294286152, 'feature_fraction': 0.7256554847393257, 'bagging_fraction': 0.9238664918344592, 'bagging_freq': 4, 'min_child_samples': 14, 'reg_alpha': 0.5977554713985634, 'reg_lambda': 0.3128777705577784, 'scale_pos_weight': 19.78164401350056}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:42,734] Trial 37 finished with value: 0.1156498496451203 and parameters: {'num_leaves': 159, 'learning_rate': 0.05904164232328862, 'feature_fraction': 0.9470046264798611, 'bagging_fraction': 0.8554282679526977, 'bagging_freq': 6, 'min_child_samples': 24, 'reg_alpha': 0.3218276045788143, 'reg_lambda': 0.7170935628728348, 'scale_pos_weight': 14.4783437291479}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:47,026] Trial 38 finished with value: 0.11634119949137363 and parameters: {'num_leaves': 106, 'learning_rate': 0.012221252981786824, 'feature_fraction': 0.8646782398582029, 'bagging_fraction': 0.6798842145116062, 'bagging_freq': 1, 'min_child_samples': 34, 'reg_alpha': 0.6608699054499017, 'reg_lambda': 0.14497181833719264, 'scale_pos_weight': 17.2654965263071}. Best is trial 26 with value: 0.13353449972911027.




[I 2025-07-01 21:15:54,881] Trial 39 finished with value: 0.13827551517777598 and parameters: {'num_leaves': 80, 'learning_rate': 0.21979530795145358, 'feature_fraction': 0.8174117537984181, 'bagging_fraction': 0.9629170289938864, 'bagging_freq': 4, 'min_child_samples': 43, 'reg_alpha': 0.7755279139054861, 'reg_lambda': 0.5791414013058541, 'scale_pos_weight': 18.82185801404129}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:02,190] Trial 40 finished with value: 0.12395191802703594 and parameters: {'num_leaves': 79, 'learning_rate': 0.21962798998282088, 'feature_fraction': 0.8238293575153618, 'bagging_fraction': 0.9642644117975453, 'bagging_freq': 4, 'min_child_samples': 59, 'reg_alpha': 0.7523108522851769, 'reg_lambda': 0.5881601774625178, 'scale_pos_weight': 18.340857251594194}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:09,445] Trial 41 finished with value: 0.13002292401458324 and parameters: {'num_leaves': 54, 'learning_rate': 0.16844885033101833, 'feature_fraction': 0.7737507087509721, 'bagging_fraction': 0.9136115390316578, 'bagging_freq': 5, 'min_child_samples': 45, 'reg_alpha': 0.6774383227622851, 'reg_lambda': 0.5453292387287378, 'scale_pos_weight': 19.03742485252407}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:15,133] Trial 42 finished with value: 0.1329677134843408 and parameters: {'num_leaves': 99, 'learning_rate': 0.23102792598067537, 'feature_fraction': 0.7363896535692609, 'bagging_fraction': 0.9447934142662138, 'bagging_freq': 4, 'min_child_samples': 39, 'reg_alpha': 0.8015020966435535, 'reg_lambda': 0.4280426686348711, 'scale_pos_weight': 21.613359522767844}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:21,319] Trial 43 finished with value: 0.11770463719536602 and parameters: {'num_leaves': 97, 'learning_rate': 0.22017667748802072, 'feature_fraction': 0.8021393636777348, 'bagging_fraction': 0.9623813339807163, 'bagging_freq': 4, 'min_child_samples': 40, 'reg_alpha': 0.7882196015395675, 'reg_lambda': 0.38828321949281097, 'scale_pos_weight': 21.973472552451952}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:26,282] Trial 44 finished with value: 0.11899031728064235 and parameters: {'num_leaves': 57, 'learning_rate': 0.23952821115969128, 'feature_fraction': 0.7177257383936412, 'bagging_fraction': 0.9521711689215272, 'bagging_freq': 4, 'min_child_samples': 29, 'reg_alpha': 0.8422908975000528, 'reg_lambda': 0.2993159442933919, 'scale_pos_weight': 20.184239482353703}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:33,055] Trial 45 finished with value: 0.12186780427287211 and parameters: {'num_leaves': 82, 'learning_rate': 0.16302173436287315, 'feature_fraction': 0.6716724673560656, 'bagging_fraction': 0.8681372474539217, 'bagging_freq': 4, 'min_child_samples': 48, 'reg_alpha': 0.8614093694909614, 'reg_lambda': 0.4293215259720072, 'scale_pos_weight': 20.9478498219499}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:37,891] Trial 46 finished with value: 0.12511007176729125 and parameters: {'num_leaves': 144, 'learning_rate': 0.09760375612108041, 'feature_fraction': 0.8418009573472778, 'bagging_fraction': 0.9065437460089594, 'bagging_freq': 2, 'min_child_samples': 58, 'reg_alpha': 0.9004889116326578, 'reg_lambda': 0.5685973455176594, 'scale_pos_weight': 22.187431789092575}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:39,518] Trial 47 finished with value: 0.12533412378400902 and parameters: {'num_leaves': 24, 'learning_rate': 0.12343981929047565, 'feature_fraction': 0.9166534705320608, 'bagging_fraction': 0.9828165252267198, 'bagging_freq': 3, 'min_child_samples': 68, 'reg_alpha': 0.7217717162967553, 'reg_lambda': 0.02541458889369963, 'scale_pos_weight': 19.96914872885565}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:47,227] Trial 48 finished with value: 0.1244698235168598 and parameters: {'num_leaves': 193, 'learning_rate': 0.02427145690252523, 'feature_fraction': 0.73891594087919, 'bagging_fraction': 0.9414437816587263, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 0.9628183598288632, 'reg_lambda': 0.6966240433672063, 'scale_pos_weight': 23.750440011475373}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:51,126] Trial 49 finished with value: 0.12787945620267122 and parameters: {'num_leaves': 106, 'learning_rate': 0.018342429814552936, 'feature_fraction': 0.8862270360574788, 'bagging_fraction': 0.8900487162568442, 'bagging_freq': 3, 'min_child_samples': 42, 'reg_alpha': 0.7993085749115163, 'reg_lambda': 0.8394616122431847, 'scale_pos_weight': 15.237092350588938}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:16:57,096] Trial 50 finished with value: 0.12395141233531826 and parameters: {'num_leaves': 67, 'learning_rate': 0.15381991834352682, 'feature_fraction': 0.6302700601565433, 'bagging_fraction': 0.8488565498770639, 'bagging_freq': 6, 'min_child_samples': 24, 'reg_alpha': 0.6473919830875827, 'reg_lambda': 0.37646086566655423, 'scale_pos_weight': 14.20516252968407}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:02,583] Trial 51 finished with value: 0.1247299964236741 and parameters: {'num_leaves': 122, 'learning_rate': 0.20246130139955285, 'feature_fraction': 0.7504185574230825, 'bagging_fraction': 0.5327715242531961, 'bagging_freq': 5, 'min_child_samples': 37, 'reg_alpha': 0.7101074442911313, 'reg_lambda': 0.5147874246951475, 'scale_pos_weight': 18.658908445709653}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:06,722] Trial 52 finished with value: 0.12037524640260382 and parameters: {'num_leaves': 161, 'learning_rate': 0.2575093661277941, 'feature_fraction': 0.8097357388998097, 'bagging_fraction': 0.4143420774870554, 'bagging_freq': 4, 'min_child_samples': 54, 'reg_alpha': 0.5605624969049732, 'reg_lambda': 0.45574089978082233, 'scale_pos_weight': 20.55933280453016}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:12,491] Trial 53 finished with value: 0.11736299104833543 and parameters: {'num_leaves': 114, 'learning_rate': 0.29564389810437225, 'feature_fraction': 0.6880529267859595, 'bagging_fraction': 0.9158703523575228, 'bagging_freq': 5, 'min_child_samples': 29, 'reg_alpha': 0.4927910333382096, 'reg_lambda': 0.46972497779464933, 'scale_pos_weight': 19.639474405421257}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:15,025] Trial 54 finished with value: 0.12641429755219255 and parameters: {'num_leaves': 135, 'learning_rate': 0.02875331245138226, 'feature_fraction': 0.787549653205446, 'bagging_fraction': 0.473537619462852, 'bagging_freq': 5, 'min_child_samples': 49, 'reg_alpha': 0.8960263907037032, 'reg_lambda': 0.6029825797626331, 'scale_pos_weight': 21.422405584329372}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:19,289] Trial 55 finished with value: 0.11687954137936545 and parameters: {'num_leaves': 125, 'learning_rate': 0.01844738356979352, 'feature_fraction': 0.6467625913044684, 'bagging_fraction': 0.973916056793443, 'bagging_freq': 4, 'min_child_samples': 38, 'reg_alpha': 0.7461280793583472, 'reg_lambda': 0.5284387706874472, 'scale_pos_weight': 17.769217807292762}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:31,231] Trial 56 finished with value: 0.12292951818242577 and parameters: {'num_leaves': 290, 'learning_rate': 0.18037363328219297, 'feature_fraction': 0.4001751459925161, 'bagging_fraction': 0.87695661248358, 'bagging_freq': 7, 'min_child_samples': 14, 'reg_alpha': 0.20592474625880808, 'reg_lambda': 0.4099696839137496, 'scale_pos_weight': 19.167495283392743}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:33,081] Trial 57 finished with value: 0.12047621456165265 and parameters: {'num_leaves': 154, 'learning_rate': 0.07377067939798458, 'feature_fraction': 0.9680102396913186, 'bagging_fraction': 0.6183583640742938, 'bagging_freq': 3, 'min_child_samples': 100, 'reg_alpha': 0.820667386700721, 'reg_lambda': 0.47402586774483557, 'scale_pos_weight': 20.49848590798116}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:40,342] Trial 58 finished with value: 0.12846276502157744 and parameters: {'num_leaves': 101, 'learning_rate': 0.1348724773355143, 'feature_fraction': 0.7042129902430391, 'bagging_fraction': 0.8272998918699065, 'bagging_freq': 2, 'min_child_samples': 43, 'reg_alpha': 0.7832105471598281, 'reg_lambda': 0.35149042389414437, 'scale_pos_weight': 23.921175728623716}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:44,425] Trial 59 finished with value: 0.11845929107034874 and parameters: {'num_leaves': 264, 'learning_rate': 0.11127003688899906, 'feature_fraction': 0.5921459163231017, 'bagging_fraction': 0.937612438490076, 'bagging_freq': 5, 'min_child_samples': 54, 'reg_alpha': 0.6224607201556567, 'reg_lambda': 0.6438138672990176, 'scale_pos_weight': 22.78397421557186}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:17:50,190] Trial 60 finished with value: 0.1209602599992591 and parameters: {'num_leaves': 44, 'learning_rate': 0.20192160184928284, 'feature_fraction': 0.867779604387973, 'bagging_fraction': 0.7724343001685413, 'bagging_freq': 1, 'min_child_samples': 35, 'reg_alpha': 0.9929587598594243, 'reg_lambda': 0.5726267704293666, 'scale_pos_weight': 16.577840603357867}. Best is trial 39 with value: 0.13827551517777598.
[I 2025-07-01 21:17:56,329] Trial 61 finished with value: 0.12752404340134094 and parameters: {'num_leaves': 52, 'learning_rate': 0.1676068455943058, 'feature_fraction': 0.7606877822675714, 'bagging_fraction': 0.9051318624604565, 'bagging_freq': 5, 'min_child_samples': 43, 'reg_alpha': 0.6771525076183367, 'reg_lambda': 0.5463556591614391, 'scale_pos_weight': 18.995295009364586}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:02,324] Trial 62 finished with value: 0.13036992438786793 and parameters: {'num_leaves': 83, 'learning_rate': 0.2571538931983936, 'feature_fraction': 0.7854180416059588, 'bagging_fraction': 0.9230709180948786, 'bagging_freq': 6, 'min_child_samples': 44, 'reg_alpha': 0.698339702202749, 'reg_lambda': 0.4854399146073902, 'scale_pos_weight': 18.213907747886587}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:08,008] Trial 63 finished with value: 0.11993936526070989 and parameters: {'num_leaves': 85, 'learning_rate': 0.2582704069411694, 'feature_fraction': 0.9058441597859745, 'bagging_fraction': 0.9469984798565492, 'bagging_freq': 6, 'min_child_samples': 46, 'reg_alpha': 0.5811963614803248, 'reg_lambda': 0.4823012595027151, 'scale_pos_weight': 17.330083576377913}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:14,656] Trial 64 finished with value: 0.13029954741029023 and parameters: {'num_leaves': 178, 'learning_rate': 0.23489368503000516, 'feature_fraction': 0.8308390739864997, 'bagging_fraction': 0.9843623623548878, 'bagging_freq': 7, 'min_child_samples': 30, 'reg_alpha': 0.7022681398853021, 'reg_lambda': 0.41870441773578193, 'scale_pos_weight': 18.259462152959035}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:21,497] Trial 65 finished with value: 0.12521925479774398 and parameters: {'num_leaves': 141, 'learning_rate': 0.21297648113613588, 'feature_fraction': 0.730110911609201, 'bagging_fraction': 0.863275918045925, 'bagging_freq': 6, 'min_child_samples': 40, 'reg_alpha': 0.5358115807117627, 'reg_lambda': 0.6784420079256769, 'scale_pos_weight': 19.391177295313355}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:27,231] Trial 66 finished with value: 0.12784176881657186 and parameters: {'num_leaves': 75, 'learning_rate': 0.29604196157169915, 'feature_fraction': 0.8060104984633332, 'bagging_fraction': 0.8893339643506211, 'bagging_freq': 6, 'min_child_samples': 5, 'reg_alpha': 0.6239100107874767, 'reg_lambda': 0.6346395184246223, 'scale_pos_weight': 15.691885370522852}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:33,868] Trial 67 finished with value: 0.12054872377051273 and parameters: {'num_leaves': 114, 'learning_rate': 0.2575364970285511, 'feature_fraction': 0.7865780525753108, 'bagging_fraction': 0.8051297356750179, 'bagging_freq': 3, 'min_child_samples': 26, 'reg_alpha': 0.858640249785771, 'reg_lambda': 0.26143646665620135, 'scale_pos_weight': 16.817193402775356}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:36,839] Trial 68 finished with value: 0.11742690032012186 and parameters: {'num_leaves': 66, 'learning_rate': 0.03610317030472079, 'feature_fraction': 0.9937627159815772, 'bagging_fraction': 0.9288815801227952, 'bagging_freq': 4, 'min_child_samples': 51, 'reg_alpha': 0.7648683379953832, 'reg_lambda': 0.724738541071723, 'scale_pos_weight': 20.956187188799817}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:42,982] Trial 69 finished with value: 0.12005398395066313 and parameters: {'num_leaves': 130, 'learning_rate': 0.14474649443100182, 'feature_fraction': 0.9346307802693323, 'bagging_fraction': 0.5389949502803506, 'bagging_freq': 2, 'min_child_samples': 22, 'reg_alpha': 0.7318755764452082, 'reg_lambda': 0.5036782412747169, 'scale_pos_weight': 21.552552751746333}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:51,390] Trial 70 finished with value: 0.1310741762971277 and parameters: {'num_leaves': 167, 'learning_rate': 0.187721241690938, 'feature_fraction': 0.8532206266416816, 'bagging_fraction': 0.7454611123730955, 'bagging_freq': 5, 'min_child_samples': 15, 'reg_alpha': 0.6512829791759013, 'reg_lambda': 0.3770752480099471, 'scale_pos_weight': 19.96060369894917}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:18:57,281] Trial 71 finished with value: 0.12025366346592278 and parameters: {'num_leaves': 170, 'learning_rate': 0.2685518599990892, 'feature_fraction': 0.8762442060343256, 'bagging_fraction': 0.7267698475023431, 'bagging_freq': 5, 'min_child_samples': 18, 'reg_alpha': 0.647510959788858, 'reg_lambda': 0.3600714526691738, 'scale_pos_weight': 19.96570923750524}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:06,617] Trial 72 finished with value: 0.1266205377158347 and parameters: {'num_leaves': 193, 'learning_rate': 0.18948805131875807, 'feature_fraction': 0.8446409945832024, 'bagging_fraction': 0.8463871428911149, 'bagging_freq': 5, 'min_child_samples': 14, 'reg_alpha': 0.5948409533930731, 'reg_lambda': 0.4385941877474394, 'scale_pos_weight': 18.48588588977017}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:13,018] Trial 73 finished with value: 0.12507736947391096 and parameters: {'num_leaves': 150, 'learning_rate': 0.23316606628395772, 'feature_fraction': 0.9003076467009375, 'bagging_fraction': 0.7430129376529157, 'bagging_freq': 4, 'min_child_samples': 7, 'reg_alpha': 0.6956395248824376, 'reg_lambda': 0.2892196904144431, 'scale_pos_weight': 19.472523569968207}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:19,445] Trial 74 finished with value: 0.13011039015675288 and parameters: {'num_leaves': 88, 'learning_rate': 0.20283054523599248, 'feature_fraction': 0.7484003598706623, 'bagging_fraction': 0.7993976738689268, 'bagging_freq': 6, 'min_child_samples': 32, 'reg_alpha': 0.8204108274749448, 'reg_lambda': 0.5268963406147964, 'scale_pos_weight': 17.98961107902577}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:24,992] Trial 75 finished with value: 0.119379746169438 and parameters: {'num_leaves': 181, 'learning_rate': 0.05166576177249112, 'feature_fraction': 0.7755710283930299, 'bagging_fraction': 0.6883617241706913, 'bagging_freq': 5, 'min_child_samples': 16, 'reg_alpha': 0.7562143200901178, 'reg_lambda': 0.39709902551171566, 'scale_pos_weight': 18.682400043775296}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:32,492] Trial 76 finished with value: 0.1241363789535868 and parameters: {'num_leaves': 99, 'learning_rate': 0.1763828527104659, 'feature_fraction': 0.8552999012689424, 'bagging_fraction': 0.6437483035345938, 'bagging_freq': 4, 'min_child_samples': 21, 'reg_alpha': 0.0883039644122382, 'reg_lambda': 0.5928829549866702, 'scale_pos_weight': 20.77200978858735}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:39,305] Trial 77 finished with value: 0.12034211331740234 and parameters: {'num_leaves': 155, 'learning_rate': 0.24301306264395733, 'feature_fraction': 0.8282121468997587, 'bagging_fraction': 0.7567127896962514, 'bagging_freq': 5, 'min_child_samples': 12, 'reg_alpha': 0.487269264422709, 'reg_lambda': 0.4520431302367894, 'scale_pos_weight': 22.301302815407563}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:44,631] Trial 78 finished with value: 0.1232815576842536 and parameters: {'num_leaves': 218, 'learning_rate': 0.014483057493269133, 'feature_fraction': 0.8015589126409871, 'bagging_fraction': 0.8795141985605517, 'bagging_freq': 6, 'min_child_samples': 26, 'reg_alpha': 0.6399229414918588, 'reg_lambda': 0.3302182312155072, 'scale_pos_weight': 20.247884500315156}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:45,948] Trial 79 finished with value: 0.12474052357039947 and parameters: {'num_leaves': 168, 'learning_rate': 0.15590092043837084, 'feature_fraction': 0.7436581257204902, 'bagging_fraction': 0.439573911676602, 'bagging_freq': 7, 'min_child_samples': 88, 'reg_alpha': 0.7919051476317088, 'reg_lambda': 0.4918151375137882, 'scale_pos_weight': 17.54465151573004}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:52,204] Trial 80 finished with value: 0.12114542466610874 and parameters: {'num_leaves': 130, 'learning_rate': 0.28072713443828157, 'feature_fraction': 0.7137945314577163, 'bagging_fraction': 0.8984914062095936, 'bagging_freq': 4, 'min_child_samples': 8, 'reg_alpha': 0.9291199982451286, 'reg_lambda': 0.37928715180367806, 'scale_pos_weight': 13.308673630182756}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:19:58,800] Trial 81 finished with value: 0.13574391624390122 and parameters: {'num_leaves': 185, 'learning_rate': 0.21983563400747116, 'feature_fraction': 0.8294971450283944, 'bagging_fraction': 0.9804718821698833, 'bagging_freq': 7, 'min_child_samples': 35, 'reg_alpha': 0.7082481268225561, 'reg_lambda': 0.4339451272752179, 'scale_pos_weight': 18.10289949140555}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:05,393] Trial 82 finished with value: 0.1278367570555498 and parameters: {'num_leaves': 183, 'learning_rate': 0.21435411248792338, 'feature_fraction': 0.7721795701261482, 'bagging_fraction': 0.9930235028351551, 'bagging_freq': 7, 'min_child_samples': 39, 'reg_alpha': 0.667166934104918, 'reg_lambda': 0.40884770613273846, 'scale_pos_weight': 18.921142063028412}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:12,616] Trial 83 finished with value: 0.12035676606926556 and parameters: {'num_leaves': 203, 'learning_rate': 0.19848893179378835, 'feature_fraction': 0.8198242612225543, 'bagging_fraction': 0.9528066514720375, 'bagging_freq': 7, 'min_child_samples': 35, 'reg_alpha': 0.7207189962265834, 'reg_lambda': 0.4688423503548253, 'scale_pos_weight': 18.151911610557146}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:19,092] Trial 84 finished with value: 0.12223643708021865 and parameters: {'num_leaves': 104, 'learning_rate': 0.22203604692456072, 'feature_fraction': 0.8410922423906205, 'bagging_fraction': 0.9634355450568013, 'bagging_freq': 6, 'min_child_samples': 47, 'reg_alpha': 0.5396980727526149, 'reg_lambda': 0.5636386210790475, 'scale_pos_weight': 19.844599387602916}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:24,594] Trial 85 finished with value: 0.1275450732988226 and parameters: {'num_leaves': 140, 'learning_rate': 0.24728100974662026, 'feature_fraction': 0.7978162972048776, 'bagging_fraction': 0.9275268835776975, 'bagging_freq': 6, 'min_child_samples': 44, 'reg_alpha': 0.8424740311948646, 'reg_lambda': 0.4247152757236721, 'scale_pos_weight': 19.360016436265276}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:29,513] Trial 86 finished with value: 0.12487111402621584 and parameters: {'num_leaves': 202, 'learning_rate': 0.09160005292975608, 'feature_fraction': 0.9291525511242935, 'bagging_fraction': 0.9144327046835126, 'bagging_freq': 5, 'min_child_samples': 36, 'reg_alpha': 0.6003734593724085, 'reg_lambda': 0.36242181315620325, 'scale_pos_weight': 21.49805035339642}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:37,526] Trial 87 finished with value: 0.11348289095806538 and parameters: {'num_leaves': 118, 'learning_rate': 0.180779103163521, 'feature_fraction': 0.4637281716629602, 'bagging_fraction': 0.9703626381521612, 'bagging_freq': 7, 'min_child_samples': 50, 'reg_alpha': 0.7372812777387665, 'reg_lambda': 0.6140386524488246, 'scale_pos_weight': 16.252379912622818}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:41,609] Trial 88 finished with value: 0.125190799205523 and parameters: {'num_leaves': 164, 'learning_rate': 0.06718780864034311, 'feature_fraction': 0.8968257423749556, 'bagging_fraction': 0.8385929765141834, 'bagging_freq': 4, 'min_child_samples': 41, 'reg_alpha': 0.4557121148981691, 'reg_lambda': 0.5295806630427151, 'scale_pos_weight': 17.11116414511801}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:45,028] Trial 89 finished with value: 0.12156916378962117 and parameters: {'num_leaves': 75, 'learning_rate': 0.04280246032518094, 'feature_fraction': 0.8580368847806557, 'bagging_fraction': 0.5687286820906761, 'bagging_freq': 2, 'min_child_samples': 16, 'reg_alpha': 0.6933501780233122, 'reg_lambda': 0.7778597868894812, 'scale_pos_weight': 20.25638528532735}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:50,450] Trial 90 finished with value: 0.1312131875995114 and parameters: {'num_leaves': 96, 'learning_rate': 0.283114216168691, 'feature_fraction': 0.9583692681511524, 'bagging_fraction': 0.9795813099555297, 'bagging_freq': 3, 'min_child_samples': 31, 'reg_alpha': 0.7768638988785412, 'reg_lambda': 0.4406791016565389, 'scale_pos_weight': 18.674380838215964}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:20:55,655] Trial 91 finished with value: 0.12964894714271497 and parameters: {'num_leaves': 67, 'learning_rate': 0.2764153841222869, 'feature_fraction': 0.9730601101832371, 'bagging_fraction': 0.9990747095602449, 'bagging_freq': 3, 'min_child_samples': 32, 'reg_alpha': 0.7708690981669369, 'reg_lambda': 0.45465180878890316, 'scale_pos_weight': 18.75930381386788}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:01,398] Trial 92 finished with value: 0.12985372627607167 and parameters: {'num_leaves': 96, 'learning_rate': 0.2338785092117094, 'feature_fraction': 0.9562073204016094, 'bagging_fraction': 0.9377240524000003, 'bagging_freq': 3, 'min_child_samples': 38, 'reg_alpha': 0.8680307932284563, 'reg_lambda': 0.489145086527257, 'scale_pos_weight': 17.899624424988918}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:07,397] Trial 93 finished with value: 0.12708501596173394 and parameters: {'num_leaves': 109, 'learning_rate': 0.2804927193520065, 'feature_fraction': 0.8745102978391194, 'bagging_fraction': 0.9782012212581133, 'bagging_freq': 4, 'min_child_samples': 27, 'reg_alpha': 0.7085652707682338, 'reg_lambda': 0.4276584576448222, 'scale_pos_weight': 17.53515189503575}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:13,373] Trial 94 finished with value: 0.12013053753041991 and parameters: {'num_leaves': 187, 'learning_rate': 0.20936599116623594, 'feature_fraction': 0.9254358132599371, 'bagging_fraction': 0.7173650461407729, 'bagging_freq': 3, 'min_child_samples': 33, 'reg_alpha': 0.7969099142325168, 'reg_lambda': 0.37934742714582254, 'scale_pos_weight': 19.603347214085897}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:19,358] Trial 95 finished with value: 0.12567142159709 and parameters: {'num_leaves': 92, 'learning_rate': 0.25248066447752626, 'feature_fraction': 0.7684988369485628, 'bagging_fraction': 0.9615973086968177, 'bagging_freq': 5, 'min_child_samples': 45, 'reg_alpha': 0.8282310281570758, 'reg_lambda': 0.32137494125757626, 'scale_pos_weight': 19.214236232339616}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:23,052] Trial 96 finished with value: 0.12176774377530097 and parameters: {'num_leaves': 80, 'learning_rate': 0.01296476721712918, 'feature_fraction': 0.8173720224113491, 'bagging_fraction': 0.950635417762929, 'bagging_freq': 1, 'min_child_samples': 41, 'reg_alpha': 0.657143517982916, 'reg_lambda': 0.5170826197675809, 'scale_pos_weight': 18.456070884506254}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:30,570] Trial 97 finished with value: 0.12953943438966029 and parameters: {'num_leaves': 127, 'learning_rate': 0.1909210069392417, 'feature_fraction': 0.9573921436922423, 'bagging_fraction': 0.9808691424062328, 'bagging_freq': 4, 'min_child_samples': 20, 'reg_alpha': 0.566534301935466, 'reg_lambda': 0.46432770655225125, 'scale_pos_weight': 16.531336607369283}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:39,190] Trial 98 finished with value: 0.12039671842783697 and parameters: {'num_leaves': 119, 'learning_rate': 0.16561336233501162, 'feature_fraction': 0.6890004778941474, 'bagging_fraction': 0.9152004387349746, 'bagging_freq': 2, 'min_child_samples': 24, 'reg_alpha': 0.7477183552806247, 'reg_lambda': 0.3438463684664984, 'scale_pos_weight': 20.010868634553482}. Best is trial 39 with value: 0.13827551517777598.




[I 2025-07-01 21:21:45,184] Trial 99 finished with value: 0.12459925198702074 and parameters: {'num_leaves': 112, 'learning_rate': 0.13179414894019087, 'feature_fraction': 0.7931108410261902, 'bagging_fraction': 0.8625404895248838, 'bagging_freq': 5, 'min_child_samples': 29, 'reg_alpha': 0.6188659745982864, 'reg_lambda': 0.5800966037449993, 'scale_pos_weight': 20.72822818960656}. Best is trial 39 with value: 0.13827551517777598.



Best LightGBM score: 0.13827551517777598
Best LightGBM params: {'num_leaves': 80, 'learning_rate': 0.21979530795145358, 'feature_fraction': 0.8174117537984181, 'bagging_fraction': 0.9629170289938864, 'bagging_freq': 4, 'min_child_samples': 43, 'reg_alpha': 0.7755279139054861, 'reg_lambda': 0.5791414013058541, 'scale_pos_weight': 18.82185801404129}

Average execution time LightGBM: 5.53s
Total optimization time LightGBM: 552.92s

Optimal thresholds (per fold) for best LightGBM trial: [0.20704729928377782, 0.13647176879474174, 0.09583739996836785, 0.002978797336177862, 0.145099758966053]
Mean threshold for best LightGBM trial: 0.11748700486982366


## Catboost

In [66]:
# CatBoost study
cat_study = optuna.create_study(
    study_name="catboost_optimization_age_b_1",
    direction="maximize",
    storage=db_dir.format('cat_study'),
    load_if_exists=True
)
cat_study.optimize(lambda trial: catboost_objective(trial, X_1, y_1, skf, ratio_negative_to_positive_b_1), n_trials=n_trials)

print(f"\nBest CatBoost score: {cat_study.best_value}")
print(f"Best CatBoost params: {cat_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in cat_study.trials]
print(f"\nAverage execution time CatBoost: {np.mean(execution_times):.2f}s")
print(f"Total optimization time CatBoost: {sum(execution_times):.2f}s")

thresholds = cat_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = cat_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best CatBoost trial: {thresholds}")
print(f"Mean threshold for best CatBoost trial: {mean_threshold}")

[I 2025-07-01 21:21:45,232] A new study created in RDB with name: catboost_optimization_age_b_1
[I 2025-07-01 21:21:51,345] Trial 0 finished with value: 0.151411944098984 and parameters: {'iterations': 925, 'learning_rate': 0.02544961434651239, 'depth': 8, 'l2_leaf_reg': 8.328232900841115, 'bagging_temperature': 0.6521954214050382, 'random_strength': 7.036475930013167, 'scale_pos_weight': 16.114441474271366}. Best is trial 0 with value: 0.151411944098984.
[I 2025-07-01 21:21:55,328] Trial 1 finished with value: 0.15436418138338298 and parameters: {'iterations': 372, 'learning_rate': 0.016990797213165553, 'depth': 7, 'l2_leaf_reg': 8.129463054003569, 'bagging_temperature': 0.43058878881313944, 'random_strength': 8.035742028504956, 'scale_pos_weight': 14.65483230709347}. Best is trial 1 with value: 0.15436418138338298.
[I 2025-07-01 21:21:58,588] Trial 2 finished with value: 0.15780078830376296 and parameters: {'iterations': 549, 'learning_rate': 0.0444945324884842, 'depth': 7, 'l2_leaf_


Best CatBoost score: 0.1631664990916303
Best CatBoost params: {'iterations': 289, 'learning_rate': 0.017570438392137358, 'depth': 5, 'l2_leaf_reg': 4.1422069039013945, 'bagging_temperature': 0.584124951553705, 'random_strength': 0.038448146342877514, 'scale_pos_weight': 19.07270027359853}

Average execution time CatBoost: 4.69s
Total optimization time CatBoost: 468.88s

Optimal thresholds (per fold) for best CatBoost trial: [0.6088363935948802, 0.5604662954965043, 0.5482290922945263, 0.6297078007279239, 0.5314413501457333]
Mean threshold for best CatBoost trial: 0.5757361864519136


In [67]:
# CatBoost study
cat_study = optuna.create_study(
    study_name="catboost_optimization_age_b_2",
    direction="maximize",
    storage=db_dir.format('cat_study'),
    load_if_exists=True
)
cat_study.optimize(lambda trial: catboost_objective(trial, X_2, y_2, skf, ratio_negative_to_positive_b_2), n_trials=n_trials)

print(f"\nBest CatBoost score: {cat_study.best_value}")
print(f"Best CatBoost params: {cat_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in cat_study.trials]
print(f"\nAverage execution time CatBoost: {np.mean(execution_times):.2f}s")
print(f"Total optimization time CatBoost: {sum(execution_times):.2f}s")

thresholds = cat_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = cat_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best CatBoost trial: {thresholds}")
print(f"Mean threshold for best CatBoost trial: {mean_threshold}")

[I 2025-07-01 21:29:38,194] A new study created in RDB with name: catboost_optimization_age_b_2
[I 2025-07-01 21:29:39,611] Trial 0 finished with value: 0.13552584542719676 and parameters: {'iterations': 940, 'learning_rate': 0.08906080018010061, 'depth': 8, 'l2_leaf_reg': 3.3080026642703078, 'bagging_temperature': 0.07565468883394633, 'random_strength': 5.149526057430088, 'scale_pos_weight': 20.0595459020457}. Best is trial 0 with value: 0.13552584542719676.
[I 2025-07-01 21:29:40,838] Trial 1 finished with value: 0.13596523565175897 and parameters: {'iterations': 367, 'learning_rate': 0.010912675091097543, 'depth': 5, 'l2_leaf_reg': 4.52964840859399, 'bagging_temperature': 0.31001988952353243, 'random_strength': 1.927966565084669, 'scale_pos_weight': 26.41691346765666}. Best is trial 1 with value: 0.13596523565175897.
[I 2025-07-01 21:29:42,212] Trial 2 finished with value: 0.1325589248312043 and parameters: {'iterations': 525, 'learning_rate': 0.05595824182507507, 'depth': 8, 'l2_le


Best CatBoost score: 0.15797597987727255
Best CatBoost params: {'iterations': 699, 'learning_rate': 0.07868461607441338, 'depth': 7, 'l2_leaf_reg': 9.63938710311811, 'bagging_temperature': 0.49506113817595054, 'random_strength': 0.7251009782304432, 'scale_pos_weight': 20.39226476767552}

Average execution time CatBoost: 0.96s
Total optimization time CatBoost: 95.97s

Optimal thresholds (per fold) for best CatBoost trial: [0.5335137159859573, 0.5169955449994765, 0.5567414823998357, 0.5168239654531086, 0.5607729781292473]
Mean threshold for best CatBoost trial: 0.536969537393525


## HistGradientBoosting

In [71]:
# HistGradientBoosting study
histgb_study = optuna.create_study(
    study_name="histgb_optimization_age_b_1",
    direction="maximize",
    storage=db_dir.format('histgb_study'),
    load_if_exists=True
)
histgb_study.optimize(lambda trial: histgb_objective(trial, X_1, y_1, skf), n_trials=n_trials)

print(f"\nBest HistGB score: {histgb_study.best_value}")
print(f"Best HistGB params: {histgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in histgb_study.trials]
print(f"\nAverage execution time HistGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time HistGB: {sum(execution_times):.2f}s")

thresholds = histgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = histgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best HistGB trial: {thresholds}")
print(f"Mean threshold for best HistGB trial: {mean_threshold}")

[I 2025-07-01 22:56:59,491] A new study created in RDB with name: histgb_optimization_age_b_1
[I 2025-07-01 22:57:00,239] Trial 0 finished with value: 0.16180670560687382 and parameters: {'max_iter': 231, 'learning_rate': 0.24424668172093783, 'max_depth': 3, 'min_samples_leaf': 23, 'l2_regularization': 0.7544680299396979, 'max_bins': 121}. Best is trial 0 with value: 0.16180670560687382.
[I 2025-07-01 22:57:01,306] Trial 1 finished with value: 0.15690360728713326 and parameters: {'max_iter': 200, 'learning_rate': 0.17514744302736057, 'max_depth': 8, 'min_samples_leaf': 26, 'l2_regularization': 0.16712287774578183, 'max_bins': 182}. Best is trial 0 with value: 0.16180670560687382.
[I 2025-07-01 22:57:04,384] Trial 2 finished with value: 0.16057210893693802 and parameters: {'max_iter': 159, 'learning_rate': 0.02558241028659679, 'max_depth': 5, 'min_samples_leaf': 94, 'l2_regularization': 0.99470985709467, 'max_bins': 207}. Best is trial 0 with value: 0.16180670560687382.
[I 2025-07-01 22


Best HistGB score: 0.1635457725323755
Best HistGB params: {'max_iter': 157, 'learning_rate': 0.09137833371402335, 'max_depth': 3, 'min_samples_leaf': 98, 'l2_regularization': 0.25018585110288427, 'max_bins': 173}

Average execution time HistGB: 1.90s
Total optimization time HistGB: 189.53s

Optimal thresholds (per fold) for best HistGB trial: [0.07437066181268026, 0.07778512063221418, 0.08680480075314145, 0.08206459212665583, 0.08030071354920651]
Mean threshold for best HistGB trial: 0.08026517777477964


In [72]:
# HistGradientBoosting study
histgb_study = optuna.create_study(
    study_name="histgb_optimization_age_b_2",
    direction="maximize",
    storage=db_dir.format('histgb_study'),
    load_if_exists=True
)
histgb_study.optimize(lambda trial: histgb_objective(trial, X_2, y_2, skf), n_trials=n_trials)

print(f"\nBest HistGB score: {histgb_study.best_value}")
print(f"Best HistGB params: {histgb_study.best_params}")

execution_times = [t.user_attrs.get('execution_time', 0) for t in histgb_study.trials]
print(f"\nAverage execution time HistGB: {np.mean(execution_times):.2f}s")
print(f"Total optimization time HistGB: {sum(execution_times):.2f}s")

thresholds = histgb_study.best_trial.user_attrs.get('optimal_thresholds', [])
mean_threshold = histgb_study.best_trial.user_attrs.get('threshold', None)
print(f"\nOptimal thresholds (per fold) for best HistGB trial: {thresholds}")
print(f"Mean threshold for best HistGB trial: {mean_threshold}")

[I 2025-07-01 23:00:13,256] A new study created in RDB with name: histgb_optimization_age_b_2
[I 2025-07-01 23:00:16,401] Trial 0 finished with value: 0.12401049216597874 and parameters: {'max_iter': 291, 'learning_rate': 0.0028702205188732696, 'max_depth': 6, 'min_samples_leaf': 19, 'l2_regularization': 0.04555690680234614, 'max_bins': 181}. Best is trial 0 with value: 0.12401049216597874.
[I 2025-07-01 23:00:16,959] Trial 1 finished with value: 0.1327257162116548 and parameters: {'max_iter': 383, 'learning_rate': 0.21540493409214456, 'max_depth': 6, 'min_samples_leaf': 73, 'l2_regularization': 0.6981161340313383, 'max_bins': 243}. Best is trial 1 with value: 0.1327257162116548.
[I 2025-07-01 23:00:20,609] Trial 2 finished with value: 0.1171148841737077 and parameters: {'max_iter': 472, 'learning_rate': 0.003704221446184845, 'max_depth': 7, 'min_samples_leaf': 94, 'l2_regularization': 0.6758497112645396, 'max_bins': 198}. Best is trial 1 with value: 0.1327257162116548.
[I 2025-07-01 2


Best HistGB score: 0.1452386253041233
Best HistGB params: {'max_iter': 323, 'learning_rate': 0.2053520596646573, 'max_depth': 4, 'min_samples_leaf': 25, 'l2_regularization': 0.25776573696908545, 'max_bins': 123}

Average execution time HistGB: 0.68s
Total optimization time HistGB: 67.87s

Optimal thresholds (per fold) for best HistGB trial: [0.07255081379937263, 0.08841334514404836, 0.06557626317399529, 0.055622585809771655, 0.06735443810320485]
Mean threshold for best HistGB trial: 0.06990348920607856


# Conclusion

In [73]:
xgb_study = optuna.load_study(
    study_name="xgboost_optimization_age_b_1",
    storage=db_dir.format('xgb_study')
)
xgb_study_b2 = optuna.load_study(
    study_name="xgboost_optimization_age_b_2",
    storage=db_dir.format('xgb_study')
)
rf_study = optuna.load_study(
    study_name="random_forest_optimization_age_b_1",
    storage=db_dir.format('rf_study')
)
rf_study_b2 = optuna.load_study(
    study_name="random_forest_optimization_age_b_2",
    storage=db_dir.format('rf_study')
)
lgb_study = optuna.load_study(
    study_name="lightgbm_optimization_age_b_1",
    storage=db_dir.format('lgb_study')
)
lgb_study_b2 = optuna.load_study(
    study_name="lightgbm_optimization_age_b_2",
    storage=db_dir.format('lgb_study')
)
cat_study = optuna.load_study(
    study_name="catboost_optimization_age_b_1",
    storage=db_dir.format('cat_study')
)
cat_study_b2 = optuna.load_study(
    study_name="catboost_optimization_age_b_2",
    storage=db_dir.format('cat_study')
)
histgb_study = optuna.load_study(
    study_name="histgb_optimization_age_b_1",
    storage=db_dir.format('histgb_study')
)
histgb_study_b2 = optuna.load_study(
    study_name="histgb_optimization_age_b_2",
    storage=db_dir.format('histgb_study')
)


In [74]:
print("XGBoost age_b_1 best score:", xgb_study.best_value)
print("XGBoost age_b_2 best score:", xgb_study_b2.best_value)

print("Random Forest age_b_1 best score:", rf_study.best_value)
print("Random Forest age_b_2 best score:", rf_study_b2.best_value)

print("LightGBM age_b_1 best score:", lgb_study.best_value)
print("LightGBM age_b_2 best score:", lgb_study_b2.best_value)

print("CatBoost age_b_1 best score:", cat_study.best_value)
print("CatBoost age_b_2 best score:", cat_study_b2.best_value)

print("HistGB age_b_1 best score:", histgb_study.best_value)
print("HistGB age_b_2 best score:", histgb_study_b2.best_value)

XGBoost age_b_1 best score: 0.16318799437591117
XGBoost age_b_2 best score: 0.15087501055621325
Random Forest age_b_1 best score: 0.15949585916468387
Random Forest age_b_2 best score: 0.1311699270046216
LightGBM age_b_1 best score: 0.15961913510127423
LightGBM age_b_2 best score: 0.13827551517777598
CatBoost age_b_1 best score: 0.1631664990916303
CatBoost age_b_2 best score: 0.15797597987727255
HistGB age_b_1 best score: 0.1635457725323755
HistGB age_b_2 best score: 0.1452386253041233
