In [2]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc5_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [3]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [4]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [5]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [6]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [7]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [8]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [9]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [10]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [14]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 123
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [15]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-07 15:59:46,157] A new study created in memory with name: no-name-d99c51ce-92a4-4e0a-8cd3-113c5929436b



2. Starting Optuna study...


Best trial: 0. Best value: 0.520148:   1%|          | 1/100 [00:03<05:17,  3.21s/it]

[I 2025-11-07 15:59:49,369] Trial 0 finished with value: 0.5201482167670217 and parameters: {'learning_rate': 0.22100657303991095, 'depth': 9, 'l2_leaf_reg': 0.03702858950979994, 'subsample': 0.8, 'random_strength': 0.3684049402345006, 'bagging_temperature': 0.29539760006196136, 'border_count': 172, 'scale_pos_weight': 9.593083356167243}. Best is trial 0 with value: 0.5201482167670217.


Best trial: 1. Best value: 0.528785:   2%|▏         | 2/100 [00:05<04:44,  2.91s/it]

[I 2025-11-07 15:59:52,061] Trial 1 finished with value: 0.5287846481876333 and parameters: {'learning_rate': 0.021443406787079275, 'depth': 6, 'l2_leaf_reg': 0.021288614143126574, 'subsample': 1.0, 'random_strength': 4.433165834598366e-05, 'bagging_temperature': 0.636199092165564, 'border_count': 226, 'scale_pos_weight': 7.668908070635955}. Best is trial 1 with value: 0.5287846481876333.


Best trial: 2. Best value: 0.597867:   3%|▎         | 3/100 [00:10<05:51,  3.63s/it]

[I 2025-11-07 15:59:56,550] Trial 2 finished with value: 0.5978672170622635 and parameters: {'learning_rate': 0.0189630803376557, 'depth': 3, 'l2_leaf_reg': 1.4327644691960766, 'subsample': 0.7, 'random_strength': 1.1566769775192785e-07, 'bagging_temperature': 0.4026656809882977, 'border_count': 86, 'scale_pos_weight': 2.269644237702876}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   4%|▍         | 4/100 [00:14<05:51,  3.66s/it]

[I 2025-11-07 16:00:00,251] Trial 3 finished with value: 0.5439286650931026 and parameters: {'learning_rate': 0.1492464753171525, 'depth': 10, 'l2_leaf_reg': 0.024759182627359155, 'subsample': 0.6, 'random_strength': 0.006037952005103673, 'bagging_temperature': 0.7207853303527829, 'border_count': 63, 'scale_pos_weight': 5.708620668368907}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   5%|▌         | 5/100 [00:17<05:53,  3.72s/it]

[I 2025-11-07 16:00:04,076] Trial 4 finished with value: 0.5899648449984021 and parameters: {'learning_rate': 0.022067894781012863, 'depth': 4, 'l2_leaf_reg': 9.525084892384108, 'subsample': 0.9, 'random_strength': 0.04592522640789255, 'bagging_temperature': 0.2979053260271838, 'border_count': 33, 'scale_pos_weight': 2.832022937044676}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   6%|▌         | 6/100 [00:18<04:09,  2.65s/it]

[I 2025-11-07 16:00:04,658] Trial 5 finished with value: 0.5495836487509462 and parameters: {'learning_rate': 0.29526115504427375, 'depth': 3, 'l2_leaf_reg': 0.0034032952891669055, 'subsample': 0.9, 'random_strength': 4.696911998303643e-08, 'bagging_temperature': 0.6683848538034187, 'border_count': 167, 'scale_pos_weight': 5.356203656806276}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   7%|▋         | 7/100 [00:24<05:36,  3.62s/it]

[I 2025-11-07 16:00:10,274] Trial 6 finished with value: 0.5766978922716628 and parameters: {'learning_rate': 0.014584443444495936, 'depth': 4, 'l2_leaf_reg': 0.004955763583103931, 'subsample': 1.0, 'random_strength': 0.001074076187508268, 'bagging_temperature': 0.6789737894329233, 'border_count': 224, 'scale_pos_weight': 3.670781222557797}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   8%|▊         | 8/100 [00:27<05:30,  3.59s/it]

[I 2025-11-07 16:00:13,797] Trial 7 finished with value: 0.5769230769230769 and parameters: {'learning_rate': 0.01950770717720563, 'depth': 5, 'l2_leaf_reg': 0.001262334319164819, 'subsample': 0.5, 'random_strength': 0.00510806865387711, 'bagging_temperature': 0.3768128151165996, 'border_count': 61, 'scale_pos_weight': 3.695858203241772}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:   9%|▉         | 9/100 [00:30<04:52,  3.22s/it]

[I 2025-11-07 16:00:16,199] Trial 8 finished with value: 0.5275240103068635 and parameters: {'learning_rate': 0.023985014413950845, 'depth': 6, 'l2_leaf_reg': 0.036580893635322125, 'subsample': 0.9, 'random_strength': 0.00388507370935026, 'bagging_temperature': 0.4989578292853464, 'border_count': 127, 'scale_pos_weight': 8.190236543969542}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  10%|█         | 10/100 [00:31<03:56,  2.63s/it]

[I 2025-11-07 16:00:17,506] Trial 9 finished with value: 0.5516366404465871 and parameters: {'learning_rate': 0.0782458417748223, 'depth': 4, 'l2_leaf_reg': 4.920177239851522, 'subsample': 0.8, 'random_strength': 0.0031564905325209207, 'bagging_temperature': 0.2425474369892905, 'border_count': 192, 'scale_pos_weight': 5.574922514986541}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  11%|█         | 11/100 [00:35<04:25,  2.99s/it]

[I 2025-11-07 16:00:21,304] Trial 10 finished with value: 0.5370796867802856 and parameters: {'learning_rate': 0.04276752409678639, 'depth': 8, 'l2_leaf_reg': 0.8016557947199427, 'subsample': 0.6, 'random_strength': 1.8408120617718176e-08, 'bagging_temperature': 0.04449774603417622, 'border_count': 107, 'scale_pos_weight': 1.1698000384835054}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  12%|█▏        | 12/100 [00:38<04:40,  3.19s/it]

[I 2025-11-07 16:00:24,957] Trial 11 finished with value: 0.5586145648312612 and parameters: {'learning_rate': 0.040570515225008624, 'depth': 3, 'l2_leaf_reg': 8.17049522079751, 'subsample': 0.7, 'random_strength': 1.1578188565020158e-06, 'bagging_temperature': 0.11779692083499871, 'border_count': 39, 'scale_pos_weight': 1.1917761868765098}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  13%|█▎        | 13/100 [00:44<05:41,  3.92s/it]

[I 2025-11-07 16:00:30,568] Trial 12 finished with value: 0.5910240202275601 and parameters: {'learning_rate': 0.010006637626347998, 'depth': 4, 'l2_leaf_reg': 0.9689476220863074, 'subsample': 0.7, 'random_strength': 0.9674384027514668, 'bagging_temperature': 0.9998805931128572, 'border_count': 94, 'scale_pos_weight': 2.837464825388501}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  14%|█▍        | 14/100 [00:50<06:40,  4.66s/it]

[I 2025-11-07 16:00:36,932] Trial 13 finished with value: 0.5963422599608099 and parameters: {'learning_rate': 0.01152822685134046, 'depth': 7, 'l2_leaf_reg': 0.49691706678543, 'subsample': 0.7, 'random_strength': 1.0975336642209474e-05, 'bagging_temperature': 0.9843025955825602, 'border_count': 97, 'scale_pos_weight': 2.7781692119581587}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  15%|█▌        | 15/100 [00:58<07:49,  5.52s/it]

[I 2025-11-07 16:00:44,444] Trial 14 finished with value: 0.5958927949878177 and parameters: {'learning_rate': 0.012701023149529818, 'depth': 7, 'l2_leaf_reg': 0.3901110729800285, 'subsample': 0.6, 'random_strength': 2.9250488803231725e-06, 'bagging_temperature': 0.9804681768802062, 'border_count': 88, 'scale_pos_weight': 2.3753283926845388}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  16%|█▌        | 16/100 [01:00<06:08,  4.38s/it]

[I 2025-11-07 16:00:46,186] Trial 15 finished with value: 0.5680891546615928 and parameters: {'learning_rate': 0.0685103986439431, 'depth': 7, 'l2_leaf_reg': 0.21480685546149728, 'subsample': 0.7, 'random_strength': 3.429704578548943e-07, 'bagging_temperature': 0.8342834749895356, 'border_count': 128, 'scale_pos_weight': 4.4395469089013515}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  17%|█▋        | 17/100 [01:05<06:19,  4.58s/it]

[I 2025-11-07 16:00:51,211] Trial 16 finished with value: 0.5889432117337345 and parameters: {'learning_rate': 0.0318710138064894, 'depth': 8, 'l2_leaf_reg': 2.02663180922996, 'subsample': 0.5, 'random_strength': 2.3462768838614196e-05, 'bagging_temperature': 0.4319383603902231, 'border_count': 75, 'scale_pos_weight': 2.0046315988974643}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  18%|█▊        | 18/100 [01:10<06:48,  4.98s/it]

[I 2025-11-07 16:00:57,143] Trial 17 finished with value: 0.5690329793180547 and parameters: {'learning_rate': 0.010494729161639257, 'depth': 5, 'l2_leaf_reg': 0.09928515358357504, 'subsample': 0.8, 'random_strength': 4.895259520046816e-06, 'bagging_temperature': 0.8044900106798851, 'border_count': 117, 'scale_pos_weight': 4.161522810194477}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  19%|█▉        | 19/100 [01:15<06:43,  4.98s/it]

[I 2025-11-07 16:01:02,123] Trial 18 finished with value: 0.5396432111000992 and parameters: {'learning_rate': 0.01611155502746077, 'depth': 8, 'l2_leaf_reg': 2.7528103316438077, 'subsample': 0.6, 'random_strength': 1.2359330952599867e-07, 'bagging_temperature': 0.5372192121391248, 'border_count': 163, 'scale_pos_weight': 6.716028449732972}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  20%|██        | 20/100 [01:20<06:22,  4.78s/it]

[I 2025-11-07 16:01:06,444] Trial 19 finished with value: 0.5692913385826772 and parameters: {'learning_rate': 0.10144325739502506, 'depth': 10, 'l2_leaf_reg': 0.1602729746734939, 'subsample': 0.7, 'random_strength': 0.00025724686121398693, 'bagging_temperature': 0.5588513171587017, 'border_count': 137, 'scale_pos_weight': 1.8351590381827472}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 2. Best value: 0.597867:  21%|██        | 21/100 [01:22<05:21,  4.07s/it]

[I 2025-11-07 16:01:08,853] Trial 20 finished with value: 0.5596184419713831 and parameters: {'learning_rate': 0.030171442035044124, 'depth': 5, 'l2_leaf_reg': 0.7072401187635777, 'subsample': 0.8, 'random_strength': 1.1396147416175056e-08, 'bagging_temperature': 0.1759419011174027, 'border_count': 99, 'scale_pos_weight': 4.873500845638436}. Best is trial 2 with value: 0.5978672170622635.


Best trial: 21. Best value: 0.60105:  22%|██▏       | 22/100 [01:28<06:06,  4.70s/it]

[I 2025-11-07 16:01:15,004] Trial 21 finished with value: 0.6010498687664042 and parameters: {'learning_rate': 0.013160243418169341, 'depth': 7, 'l2_leaf_reg': 0.3257414117657079, 'subsample': 0.6, 'random_strength': 3.7582502867577865e-06, 'bagging_temperature': 0.98722034596729, 'border_count': 86, 'scale_pos_weight': 2.8486370731804715}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  23%|██▎       | 23/100 [01:34<06:13,  4.85s/it]

[I 2025-11-07 16:01:20,229] Trial 22 finished with value: 0.5953905249679897 and parameters: {'learning_rate': 0.014943051974377031, 'depth': 7, 'l2_leaf_reg': 1.7854566529347462, 'subsample': 0.6, 'random_strength': 1.287729071525523e-05, 'bagging_temperature': 0.8549826662967753, 'border_count': 73, 'scale_pos_weight': 2.9588072794621567}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  24%|██▍       | 24/100 [01:39<06:22,  5.03s/it]

[I 2025-11-07 16:01:25,673] Trial 23 finished with value: 0.5836909871244635 and parameters: {'learning_rate': 0.012295553962831069, 'depth': 6, 'l2_leaf_reg': 0.13059201572923218, 'subsample': 0.5, 'random_strength': 4.8807511286584e-07, 'bagging_temperature': 0.9185662748786546, 'border_count': 51, 'scale_pos_weight': 3.3005461644373426}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  25%|██▌       | 25/100 [01:46<07:05,  5.68s/it]

[I 2025-11-07 16:01:32,857] Trial 24 finished with value: 0.5668494306199916 and parameters: {'learning_rate': 0.017020388356270566, 'depth': 9, 'l2_leaf_reg': 0.43313388485826293, 'subsample': 0.7, 'random_strength': 2.7435719048581486e-06, 'bagging_temperature': 0.778969443815146, 'border_count': 88, 'scale_pos_weight': 1.5193157728175493}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  26%|██▌       | 26/100 [01:49<06:05,  4.94s/it]

[I 2025-11-07 16:01:36,092] Trial 25 finished with value: 0.5973763874873865 and parameters: {'learning_rate': 0.028273721932985215, 'depth': 7, 'l2_leaf_reg': 0.2898784180210026, 'subsample': 0.6, 'random_strength': 0.00016017592361467986, 'bagging_temperature': 0.9256547567187755, 'border_count': 148, 'scale_pos_weight': 2.575066664117215}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  27%|██▋       | 27/100 [01:53<05:25,  4.46s/it]

[I 2025-11-07 16:01:39,435] Trial 26 finished with value: 0.5874635568513119 and parameters: {'learning_rate': 0.029545015522277, 'depth': 8, 'l2_leaf_reg': 0.07316032162566835, 'subsample': 0.6, 'random_strength': 0.00015320517772199547, 'bagging_temperature': 0.9092697878933689, 'border_count': 159, 'scale_pos_weight': 2.0797867631716205}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  28%|██▊       | 28/100 [01:57<05:12,  4.34s/it]

[I 2025-11-07 16:01:43,477] Trial 27 finished with value: 0.5806067816775728 and parameters: {'learning_rate': 0.04004137019553674, 'depth': 9, 'l2_leaf_reg': 0.2265135128095802, 'subsample': 0.5, 'random_strength': 2.3676745234813027e-07, 'bagging_temperature': 0.3749651274460396, 'border_count': 204, 'scale_pos_weight': 4.088064579758062}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  29%|██▉       | 29/100 [01:59<04:24,  3.73s/it]

[I 2025-11-07 16:01:45,784] Trial 28 finished with value: 0.5952800281789362 and parameters: {'learning_rate': 0.0541382642049124, 'depth': 6, 'l2_leaf_reg': 1.2313891294579629, 'subsample': 0.6, 'random_strength': 0.00032195532713521455, 'bagging_temperature': 0.7483001644263424, 'border_count': 150, 'scale_pos_weight': 2.256428504735812}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  30%|███       | 30/100 [02:01<03:38,  3.13s/it]

[I 2025-11-07 16:01:47,502] Trial 29 finished with value: 0.5138888888888888 and parameters: {'learning_rate': 0.026322665597845987, 'depth': 5, 'l2_leaf_reg': 0.06021871665164525, 'subsample': 0.5, 'random_strength': 6.74365788038182e-08, 'bagging_temperature': 0.5971649630905491, 'border_count': 197, 'scale_pos_weight': 9.214103494510226}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 21. Best value: 0.60105:  31%|███       | 31/100 [02:05<03:49,  3.33s/it]

[I 2025-11-07 16:01:51,313] Trial 30 finished with value: 0.5419258522020403 and parameters: {'learning_rate': 0.020018953645278743, 'depth': 7, 'l2_leaf_reg': 4.191619480764109, 'subsample': 0.6, 'random_strength': 7.188754198278013e-05, 'bagging_temperature': 0.8945149454335006, 'border_count': 179, 'scale_pos_weight': 6.360155414021668}. Best is trial 21 with value: 0.6010498687664042.


Best trial: 31. Best value: 0.601942:  32%|███▏      | 32/100 [02:11<04:43,  4.16s/it]

[I 2025-11-07 16:01:57,417] Trial 31 finished with value: 0.6019417475728155 and parameters: {'learning_rate': 0.012521488824550344, 'depth': 7, 'l2_leaf_reg': 0.37319814792803824, 'subsample': 0.7, 'random_strength': 1.1038850104000488e-05, 'bagging_temperature': 0.9536511771248262, 'border_count': 107, 'scale_pos_weight': 2.6189739604075983}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  33%|███▎      | 33/100 [02:15<04:48,  4.30s/it]

[I 2025-11-07 16:02:02,053] Trial 32 finished with value: 0.5852618757612668 and parameters: {'learning_rate': 0.01859401606490724, 'depth': 8, 'l2_leaf_reg': 0.3306716234544713, 'subsample': 0.7, 'random_strength': 1.0131109135360793e-06, 'bagging_temperature': 0.9185273000733956, 'border_count': 115, 'scale_pos_weight': 3.5145936031524956}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  34%|███▍      | 34/100 [02:22<05:37,  5.11s/it]

[I 2025-11-07 16:02:09,044] Trial 33 finished with value: 0.5975692099932478 and parameters: {'learning_rate': 0.01484783752315575, 'depth': 6, 'l2_leaf_reg': 1.3925102990258842, 'subsample': 0.7, 'random_strength': 4.288013011611147e-05, 'bagging_temperature': 0.4660590982288639, 'border_count': 142, 'scale_pos_weight': 2.545752194403538}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  35%|███▌      | 35/100 [02:31<06:47,  6.28s/it]

[I 2025-11-07 16:02:18,039] Trial 34 finished with value: 0.5815768930523029 and parameters: {'learning_rate': 0.013085620802507598, 'depth': 6, 'l2_leaf_reg': 1.4322254930168237, 'subsample': 0.8, 'random_strength': 3.231777572224776e-05, 'bagging_temperature': 0.445555215250005, 'border_count': 252, 'scale_pos_weight': 1.7325798357504985}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  36%|███▌      | 36/100 [02:32<04:58,  4.67s/it]

[I 2025-11-07 16:02:18,948] Trial 35 finished with value: 0.5616076150185088 and parameters: {'learning_rate': 0.1512716442376408, 'depth': 3, 'l2_leaf_reg': 0.6602867501922218, 'subsample': 0.7, 'random_strength': 6.698482229318661e-06, 'bagging_temperature': 0.29830470286156874, 'border_count': 73, 'scale_pos_weight': 4.886263711335738}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  37%|███▋      | 37/100 [02:39<05:31,  5.25s/it]

[I 2025-11-07 16:02:25,576] Trial 36 finished with value: 0.5889925973607982 and parameters: {'learning_rate': 0.014400952525258911, 'depth': 9, 'l2_leaf_reg': 0.011724111529921833, 'subsample': 0.8, 'random_strength': 0.049336396249354514, 'bagging_temperature': 0.35428078567555554, 'border_count': 130, 'scale_pos_weight': 3.3388499081555785}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  38%|███▊      | 38/100 [02:44<05:27,  5.29s/it]

[I 2025-11-07 16:02:30,949] Trial 37 finished with value: 0.5778873818331278 and parameters: {'learning_rate': 0.021592543950155623, 'depth': 5, 'l2_leaf_reg': 3.0961388862779633, 'subsample': 0.7, 'random_strength': 0.0008238690184127209, 'bagging_temperature': 0.4833879594072103, 'border_count': 112, 'scale_pos_weight': 1.4702024174568398}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  39%|███▉      | 39/100 [02:51<05:51,  5.76s/it]

[I 2025-11-07 16:02:37,789] Trial 38 finished with value: 0.5257142857142857 and parameters: {'learning_rate': 0.015449112558692565, 'depth': 6, 'l2_leaf_reg': 5.828697795007612, 'subsample': 0.9, 'random_strength': 1.4591042893395043e-06, 'bagging_temperature': 0.6337062938056605, 'border_count': 55, 'scale_pos_weight': 1.043381091200044}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  40%|████      | 40/100 [02:55<05:11,  5.20s/it]

[I 2025-11-07 16:02:41,691] Trial 39 finished with value: 0.5842696629213483 and parameters: {'learning_rate': 0.01824310256212012, 'depth': 4, 'l2_leaf_reg': 1.0199188769371417, 'subsample': 0.7, 'random_strength': 4.015193874661453e-05, 'bagging_temperature': 0.6941567195665941, 'border_count': 83, 'scale_pos_weight': 3.214906206580019}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  41%|████      | 41/100 [03:01<05:19,  5.42s/it]

[I 2025-11-07 16:02:47,616] Trial 40 finished with value: 0.5797619047619048 and parameters: {'learning_rate': 0.023253057007490825, 'depth': 9, 'l2_leaf_reg': 2.3915565749686873, 'subsample': 0.9, 'random_strength': 4.78518032667306e-08, 'bagging_temperature': 0.4346587415383053, 'border_count': 106, 'scale_pos_weight': 3.797055185939908}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  42%|████▏     | 42/100 [03:05<04:48,  4.97s/it]

[I 2025-11-07 16:02:51,535] Trial 41 finished with value: 0.596562184024267 and parameters: {'learning_rate': 0.026035591192059303, 'depth': 7, 'l2_leaf_reg': 0.264332724105326, 'subsample': 0.6, 'random_strength': 0.0008742858772491707, 'bagging_temperature': 0.9581337648819597, 'border_count': 150, 'scale_pos_weight': 2.5841806902903826}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  43%|████▎     | 43/100 [03:11<05:01,  5.29s/it]

[I 2025-11-07 16:02:57,587] Trial 42 finished with value: 0.5956864087641218 and parameters: {'learning_rate': 0.011620039900405407, 'depth': 7, 'l2_leaf_reg': 0.5406145644160141, 'subsample': 0.6, 'random_strength': 0.00013071597480270042, 'bagging_temperature': 0.8638317809207733, 'border_count': 179, 'scale_pos_weight': 2.4060466464665304}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  44%|████▍     | 44/100 [03:17<05:05,  5.45s/it]

[I 2025-11-07 16:03:03,415] Trial 43 finished with value: 0.5979915775834144 and parameters: {'learning_rate': 0.013899382897072316, 'depth': 8, 'l2_leaf_reg': 0.1728620157238352, 'subsample': 0.8, 'random_strength': 2.118912015170755e-05, 'bagging_temperature': 0.23767469657395607, 'border_count': 142, 'scale_pos_weight': 2.961303468723002}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  45%|████▌     | 45/100 [03:23<05:09,  5.63s/it]

[I 2025-11-07 16:03:09,461] Trial 44 finished with value: 0.5752038234467247 and parameters: {'learning_rate': 0.010147727125168784, 'depth': 8, 'l2_leaf_reg': 0.05645722985704061, 'subsample': 0.8, 'random_strength': 1.6532842802009674e-05, 'bagging_temperature': 0.23472489826311294, 'border_count': 139, 'scale_pos_weight': 4.450392132576456}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  46%|████▌     | 46/100 [03:28<05:01,  5.58s/it]

[I 2025-11-07 16:03:14,928] Trial 45 finished with value: 0.5973684210526315 and parameters: {'learning_rate': 0.013351826042558959, 'depth': 8, 'l2_leaf_reg': 0.02595477984078108, 'subsample': 1.0, 'random_strength': 6.09212014836888e-06, 'bagging_temperature': 0.0024273827992333796, 'border_count': 130, 'scale_pos_weight': 2.932938857090055}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  47%|████▋     | 47/100 [03:33<04:39,  5.27s/it]

[I 2025-11-07 16:03:19,465] Trial 46 finished with value: 0.5736040609137056 and parameters: {'learning_rate': 0.016516216043755815, 'depth': 3, 'l2_leaf_reg': 0.15565778269994424, 'subsample': 0.8, 'random_strength': 5.132903688658406e-05, 'bagging_temperature': 0.12193967214956436, 'border_count': 120, 'scale_pos_weight': 3.931973890886135}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  48%|████▊     | 48/100 [03:36<04:01,  4.64s/it]

[I 2025-11-07 16:03:22,636] Trial 47 finished with value: 0.5841979249800479 and parameters: {'learning_rate': 0.03439478579534494, 'depth': 6, 'l2_leaf_reg': 0.9120674314375067, 'subsample': 0.7, 'random_strength': 8.715369103928936e-07, 'bagging_temperature': 0.3250224741069968, 'border_count': 45, 'scale_pos_weight': 1.6025974193735417}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  49%|████▉     | 49/100 [03:45<05:05,  5.99s/it]

[I 2025-11-07 16:03:31,768] Trial 48 finished with value: 0.5908170155300473 and parameters: {'learning_rate': 0.011234959177397048, 'depth': 10, 'l2_leaf_reg': 0.10488941522827668, 'subsample': 0.8, 'random_strength': 2.60673448872429e-06, 'bagging_temperature': 0.22134676774028134, 'border_count': 102, 'scale_pos_weight': 3.062986435598627}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  50%|█████     | 50/100 [03:51<05:01,  6.03s/it]

[I 2025-11-07 16:03:37,889] Trial 49 finished with value: 0.592917123037605 and parameters: {'learning_rate': 0.021811812298523654, 'depth': 8, 'l2_leaf_reg': 1.5088467283450782, 'subsample': 0.9, 'random_strength': 0.012634829206446279, 'bagging_temperature': 0.41115895564195015, 'border_count': 67, 'scale_pos_weight': 2.167950248674382}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  51%|█████     | 51/100 [03:52<03:42,  4.54s/it]

[I 2025-11-07 16:03:38,956] Trial 50 finished with value: 0.5197183098591549 and parameters: {'learning_rate': 0.23995774225207767, 'depth': 6, 'l2_leaf_reg': 0.0013609771939221227, 'subsample': 0.7, 'random_strength': 1.422051230131415e-07, 'bagging_temperature': 0.27472526548351794, 'border_count': 85, 'scale_pos_weight': 7.308364955276348}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  52%|█████▏    | 52/100 [03:56<03:25,  4.27s/it]

[I 2025-11-07 16:03:42,613] Trial 51 finished with value: 0.5972730295976055 and parameters: {'learning_rate': 0.01939577879857855, 'depth': 7, 'l2_leaf_reg': 0.29883606300387283, 'subsample': 0.6, 'random_strength': 0.00044219127435773416, 'bagging_temperature': 0.9571433346531908, 'border_count': 145, 'scale_pos_weight': 2.6255230059966603}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  53%|█████▎    | 53/100 [04:02<03:39,  4.67s/it]

[I 2025-11-07 16:03:48,215] Trial 52 finished with value: 0.6002710027100271 and parameters: {'learning_rate': 0.013499184374684753, 'depth': 7, 'l2_leaf_reg': 0.203574289998433, 'subsample': 0.7, 'random_strength': 0.0019142891421769315, 'bagging_temperature': 0.4791698001314261, 'border_count': 157, 'scale_pos_weight': 2.5391733070071045}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  54%|█████▍    | 54/100 [04:08<03:54,  5.11s/it]

[I 2025-11-07 16:03:54,338] Trial 53 finished with value: 0.5847554038680318 and parameters: {'learning_rate': 0.014435858764709137, 'depth': 7, 'l2_leaf_reg': 0.19115494564062, 'subsample': 0.7, 'random_strength': 0.0025779472758079244, 'bagging_temperature': 0.4931462299960871, 'border_count': 159, 'scale_pos_weight': 1.9066987152820278}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  55%|█████▌    | 55/100 [04:16<04:30,  6.00s/it]

[I 2025-11-07 16:04:02,430] Trial 54 finished with value: 0.5860693464252839 and parameters: {'learning_rate': 0.013206900997658151, 'depth': 8, 'l2_leaf_reg': 0.5190648081522992, 'subsample': 0.7, 'random_strength': 0.014510471790887592, 'bagging_temperature': 0.5459438969975619, 'border_count': 172, 'scale_pos_weight': 3.4442550401691476}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  56%|█████▌    | 56/100 [04:23<04:46,  6.51s/it]

[I 2025-11-07 16:04:10,112] Trial 55 finished with value: 0.5573770491803278 and parameters: {'learning_rate': 0.010010471217951924, 'depth': 7, 'l2_leaf_reg': 0.08346868226997362, 'subsample': 0.8, 'random_strength': 0.08926728437870017, 'bagging_temperature': 0.6046100323554918, 'border_count': 137, 'scale_pos_weight': 1.302510542443494}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  57%|█████▋    | 57/100 [04:28<04:19,  6.04s/it]

[I 2025-11-07 16:04:15,071] Trial 56 finished with value: 0.5931687521136286 and parameters: {'learning_rate': 0.01687676088330627, 'depth': 8, 'l2_leaf_reg': 0.046018105183296416, 'subsample': 0.7, 'random_strength': 8.291036661065698e-05, 'bagging_temperature': 0.478278722139416, 'border_count': 124, 'scale_pos_weight': 2.722194539653639}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  58%|█████▊    | 58/100 [04:34<04:02,  5.78s/it]

[I 2025-11-07 16:04:20,239] Trial 57 finished with value: 0.5657608695652174 and parameters: {'learning_rate': 0.011730384510979122, 'depth': 4, 'l2_leaf_reg': 0.6900647038863974, 'subsample': 0.7, 'random_strength': 2.25021686384969e-05, 'bagging_temperature': 0.3911975827757432, 'border_count': 90, 'scale_pos_weight': 4.456681442518187}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  59%|█████▉    | 59/100 [04:39<03:55,  5.73s/it]

[I 2025-11-07 16:04:25,863] Trial 58 finished with value: 0.5905233380480905 and parameters: {'learning_rate': 0.014089466983228598, 'depth': 6, 'l2_leaf_reg': 0.13100223321095827, 'subsample': 0.8, 'random_strength': 9.621396140840007e-06, 'bagging_temperature': 0.18005615124879648, 'border_count': 109, 'scale_pos_weight': 2.1968916156574805}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  60%|██████    | 60/100 [04:45<03:48,  5.72s/it]

[I 2025-11-07 16:04:31,558] Trial 59 finished with value: 0.5913978494623656 and parameters: {'learning_rate': 0.012147759555954608, 'depth': 7, 'l2_leaf_reg': 0.40880636130611403, 'subsample': 0.8, 'random_strength': 0.0016027601086976936, 'bagging_temperature': 0.5145673468064336, 'border_count': 79, 'scale_pos_weight': 3.0915608270967856}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  61%|██████    | 61/100 [04:50<03:36,  5.55s/it]

[I 2025-11-07 16:04:36,712] Trial 60 finished with value: 0.5799168152109329 and parameters: {'learning_rate': 0.017627998386430126, 'depth': 7, 'l2_leaf_reg': 3.803839621047843, 'subsample': 0.7, 'random_strength': 7.058026951167665e-07, 'bagging_temperature': 0.33053981554990586, 'border_count': 96, 'scale_pos_weight': 3.6406000198173736}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  62%|██████▏   | 62/100 [04:54<03:15,  5.16s/it]

[I 2025-11-07 16:04:40,945] Trial 61 finished with value: 0.5966101694915255 and parameters: {'learning_rate': 0.02019031058074757, 'depth': 7, 'l2_leaf_reg': 0.21498004136061266, 'subsample': 0.6, 'random_strength': 0.00025045909907363604, 'bagging_temperature': 0.9563990150446064, 'border_count': 155, 'scale_pos_weight': 2.517032052470536}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  63%|██████▎   | 63/100 [05:00<03:15,  5.27s/it]

[I 2025-11-07 16:04:46,490] Trial 62 finished with value: 0.5845206006931074 and parameters: {'learning_rate': 0.025300990832187613, 'depth': 8, 'l2_leaf_reg': 0.28796783140441407, 'subsample': 0.6, 'random_strength': 2.5364759848963688e-08, 'bagging_temperature': 0.5841759355566534, 'border_count': 142, 'scale_pos_weight': 1.8882016231776202}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  64%|██████▍   | 64/100 [05:02<02:32,  4.23s/it]

[I 2025-11-07 16:04:48,292] Trial 63 finished with value: 0.5907036797934151 and parameters: {'learning_rate': 0.050018671243555975, 'depth': 6, 'l2_leaf_reg': 0.4122515421179195, 'subsample': 0.5, 'random_strength': 4.0276686015114405e-06, 'bagging_temperature': 0.7965117861901977, 'border_count': 167, 'scale_pos_weight': 2.7594287730420737}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  65%|██████▌   | 65/100 [05:07<02:42,  4.64s/it]

[I 2025-11-07 16:04:53,901] Trial 64 finished with value: 0.599250936329588 and parameters: {'learning_rate': 0.015734678945099938, 'depth': 4, 'l2_leaf_reg': 1.1850701262957584, 'subsample': 0.6, 'random_strength': 0.0001648779700832464, 'bagging_temperature': 0.8830910662212291, 'border_count': 175, 'scale_pos_weight': 2.3470927458738453}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  66%|██████▌   | 66/100 [05:12<02:43,  4.82s/it]

[I 2025-11-07 16:04:59,123] Trial 65 finished with value: 0.5978186775732788 and parameters: {'learning_rate': 0.015921066022636172, 'depth': 4, 'l2_leaf_reg': 1.0923491076004463, 'subsample': 0.7, 'random_strength': 1.1175906460959491e-05, 'bagging_temperature': 0.6491639786694859, 'border_count': 187, 'scale_pos_weight': 2.3534657998823967}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  67%|██████▋   | 67/100 [05:18<02:45,  5.02s/it]

[I 2025-11-07 16:05:04,604] Trial 66 finished with value: 0.5462226640159046 and parameters: {'learning_rate': 0.011181177053849146, 'depth': 3, 'l2_leaf_reg': 1.9721293347975488, 'subsample': 0.6, 'random_strength': 1.5606318881174898e-06, 'bagging_temperature': 0.9961742128692519, 'border_count': 187, 'scale_pos_weight': 5.892829656537619}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  68%|██████▊   | 68/100 [05:20<02:10,  4.08s/it]

[I 2025-11-07 16:05:06,513] Trial 67 finished with value: 0.5967121371108779 and parameters: {'learning_rate': 0.07084004497726115, 'depth': 4, 'l2_leaf_reg': 0.8151633588694953, 'subsample': 0.7, 'random_strength': 1.5249623782242647e-05, 'bagging_temperature': 0.8624623216934274, 'border_count': 210, 'scale_pos_weight': 2.1889937037188676}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  69%|██████▉   | 69/100 [05:28<02:44,  5.31s/it]

[I 2025-11-07 16:05:14,697] Trial 68 finished with value: 0.5834621329211747 and parameters: {'learning_rate': 0.01556407356726292, 'depth': 4, 'l2_leaf_reg': 1.0867662114444834, 'subsample': 0.7, 'random_strength': 3.7327401352731507e-07, 'bagging_temperature': 0.8254906421938536, 'border_count': 182, 'scale_pos_weight': 1.7008138968333943}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  70%|███████   | 70/100 [05:29<02:04,  4.14s/it]

[I 2025-11-07 16:05:16,089] Trial 69 finished with value: 0.5799256505576208 and parameters: {'learning_rate': 0.11052675967627346, 'depth': 3, 'l2_leaf_reg': 0.5737001226149588, 'subsample': 0.6, 'random_strength': 0.0004406951563168633, 'bagging_temperature': 0.7209486851247038, 'border_count': 218, 'scale_pos_weight': 1.42302877298516}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  71%|███████   | 71/100 [05:34<02:03,  4.27s/it]

[I 2025-11-07 16:05:20,679] Trial 70 finished with value: 0.5866499529927922 and parameters: {'learning_rate': 0.016005741564219297, 'depth': 3, 'l2_leaf_reg': 1.792887945007147, 'subsample': 0.5, 'random_strength': 0.009570010753266031, 'bagging_temperature': 0.6462106564364128, 'border_count': 195, 'scale_pos_weight': 2.9661550705025643}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  72%|███████▏  | 72/100 [05:41<02:24,  5.16s/it]

[I 2025-11-07 16:05:27,918] Trial 71 finished with value: 0.5964079972890546 and parameters: {'learning_rate': 0.013410516997603221, 'depth': 5, 'l2_leaf_reg': 1.2941300782109502, 'subsample': 0.7, 'random_strength': 2.5652795339669762e-05, 'bagging_temperature': 0.8918853266059424, 'border_count': 174, 'scale_pos_weight': 2.4228915287791577}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  73%|███████▎  | 73/100 [05:48<02:30,  5.57s/it]

[I 2025-11-07 16:05:34,441] Trial 72 finished with value: 0.5878057685286601 and parameters: {'learning_rate': 0.0148689000362202, 'depth': 5, 'l2_leaf_reg': 7.425339594355844, 'subsample': 0.7, 'random_strength': 5.7191782828895926e-05, 'bagging_temperature': 0.4561906671552802, 'border_count': 167, 'scale_pos_weight': 1.9842521669226216}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  74%|███████▍  | 74/100 [05:54<02:33,  5.91s/it]

[I 2025-11-07 16:05:41,154] Trial 73 finished with value: 0.5980861244019139 and parameters: {'learning_rate': 0.012483362551818658, 'depth': 4, 'l2_leaf_reg': 2.726171086056414, 'subsample': 0.7, 'random_strength': 2.0738625472036453e-06, 'bagging_temperature': 0.4095867415158335, 'border_count': 188, 'scale_pos_weight': 2.3337341862351613}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  75%|███████▌  | 75/100 [06:00<02:27,  5.90s/it]

[I 2025-11-07 16:05:47,023] Trial 74 finished with value: 0.585635359116022 and parameters: {'learning_rate': 0.012843241762680778, 'depth': 4, 'l2_leaf_reg': 2.371842258061805, 'subsample': 0.7, 'random_strength': 9.808290125649923e-06, 'bagging_temperature': 0.4115028435696402, 'border_count': 202, 'scale_pos_weight': 3.1961938227619227}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  76%|███████▌  | 76/100 [06:05<02:15,  5.65s/it]

[I 2025-11-07 16:05:52,099] Trial 75 finished with value: 0.519516092216389 and parameters: {'learning_rate': 0.010810466468676049, 'depth': 4, 'l2_leaf_reg': 3.5986719411040524, 'subsample': 0.8, 'random_strength': 2.9511779729091344e-06, 'bagging_temperature': 0.946292290025761, 'border_count': 185, 'scale_pos_weight': 8.868457988129439}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  77%|███████▋  | 77/100 [06:10<02:02,  5.32s/it]

[I 2025-11-07 16:05:56,656] Trial 76 finished with value: 0.5998632010943913 and parameters: {'learning_rate': 0.018158607677576166, 'depth': 3, 'l2_leaf_reg': 5.9698820016594745, 'subsample': 0.6, 'random_strength': 2.1380824202440337e-06, 'bagging_temperature': 0.761066183264132, 'border_count': 191, 'scale_pos_weight': 2.317531252860484}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  78%|███████▊  | 78/100 [06:14<01:51,  5.06s/it]

[I 2025-11-07 16:06:01,092] Trial 77 finished with value: 0.5158150851581509 and parameters: {'learning_rate': 0.01791927641652563, 'depth': 3, 'l2_leaf_reg': 5.623143558457447, 'subsample': 0.6, 'random_strength': 5.734181402597984e-07, 'bagging_temperature': 0.7631731446314786, 'border_count': 60, 'scale_pos_weight': 1.0030879545706253}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  79%|███████▉  | 79/100 [06:19<01:40,  4.80s/it]

[I 2025-11-07 16:06:05,279] Trial 78 finished with value: 0.5944744363289933 and parameters: {'learning_rate': 0.023460701508906497, 'depth': 3, 'l2_leaf_reg': 9.785691126853955, 'subsample': 0.6, 'random_strength': 1.9750027777774356e-06, 'bagging_temperature': 0.8952147473385903, 'border_count': 240, 'scale_pos_weight': 2.8961791053610106}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  80%|████████  | 80/100 [06:23<01:34,  4.72s/it]

[I 2025-11-07 16:06:09,810] Trial 79 finished with value: 0.5805506736965437 and parameters: {'learning_rate': 0.012183512708369863, 'depth': 3, 'l2_leaf_reg': 6.9497648182698475, 'subsample': 0.6, 'random_strength': 2.2232642845828857e-07, 'bagging_temperature': 0.8278089682449257, 'border_count': 206, 'scale_pos_weight': 3.4732933114362985}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  81%|████████  | 81/100 [06:28<01:28,  4.67s/it]

[I 2025-11-07 16:06:14,371] Trial 80 finished with value: 0.5898226676946801 and parameters: {'learning_rate': 0.020715921786234336, 'depth': 3, 'l2_leaf_reg': 4.660767711436011, 'subsample': 0.6, 'random_strength': 4.585566075106895e-06, 'bagging_temperature': 0.3586426192483846, 'border_count': 191, 'scale_pos_weight': 1.7044154381197993}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  82%|████████▏ | 82/100 [06:34<01:30,  5.01s/it]

[I 2025-11-07 16:06:20,168] Trial 81 finished with value: 0.5993220338983051 and parameters: {'learning_rate': 0.01687242215005263, 'depth': 4, 'l2_leaf_reg': 2.3670650332003946, 'subsample': 0.7, 'random_strength': 7.483935755618967e-06, 'bagging_temperature': 0.6722821153729087, 'border_count': 190, 'scale_pos_weight': 2.4000910861370057}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  83%|████████▎ | 83/100 [06:39<01:26,  5.10s/it]

[I 2025-11-07 16:06:25,483] Trial 82 finished with value: 0.5964912280701754 and parameters: {'learning_rate': 0.018424508137019516, 'depth': 4, 'l2_leaf_reg': 2.8040596955155808, 'subsample': 0.7, 'random_strength': 6.94192866307464e-06, 'bagging_temperature': 0.9789302862918663, 'border_count': 177, 'scale_pos_weight': 2.4160304764695644}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  84%|████████▍ | 84/100 [06:44<01:21,  5.07s/it]

[I 2025-11-07 16:06:30,471] Trial 83 finished with value: 0.5926976249556895 and parameters: {'learning_rate': 0.014070669562300252, 'depth': 4, 'l2_leaf_reg': 0.17707852243895647, 'subsample': 0.6, 'random_strength': 3.6483221169645487e-06, 'bagging_temperature': 0.5193413707562303, 'border_count': 199, 'scale_pos_weight': 2.098042523175277}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  85%|████████▌ | 85/100 [06:51<01:25,  5.71s/it]

[I 2025-11-07 16:06:37,667] Trial 84 finished with value: 0.5867253392005867 and parameters: {'learning_rate': 0.012701271195685822, 'depth': 5, 'l2_leaf_reg': 3.31428343724024, 'subsample': 0.8, 'random_strength': 1.1047757608437636e-06, 'bagging_temperature': 0.7440715141624403, 'border_count': 214, 'scale_pos_weight': 1.9884105989249243}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  86%|████████▌ | 86/100 [06:55<01:14,  5.31s/it]

[I 2025-11-07 16:06:42,058] Trial 85 finished with value: 0.5886137021550337 and parameters: {'learning_rate': 0.016977092763254344, 'depth': 3, 'l2_leaf_reg': 2.38020809846513, 'subsample': 0.5, 'random_strength': 1.7408881505482056e-05, 'bagging_temperature': 0.8670782432870519, 'border_count': 229, 'scale_pos_weight': 2.7841056504662265}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  87%|████████▋ | 87/100 [07:01<01:10,  5.42s/it]

[I 2025-11-07 16:06:47,724] Trial 86 finished with value: 0.589247311827957 and parameters: {'learning_rate': 0.013774245133197244, 'depth': 4, 'l2_leaf_reg': 1.5675416832942022, 'subsample': 0.7, 'random_strength': 0.00010720216306496514, 'bagging_temperature': 0.6984737157655558, 'border_count': 163, 'scale_pos_weight': 3.171017909776791}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  88%|████████▊ | 88/100 [07:08<01:10,  5.85s/it]

[I 2025-11-07 16:06:54,598] Trial 87 finished with value: 0.6003963011889035 and parameters: {'learning_rate': 0.01088542031321359, 'depth': 5, 'l2_leaf_reg': 0.13156765403292192, 'subsample': 0.7, 'random_strength': 1.9832497874823054e-06, 'bagging_temperature': 0.9332422726334167, 'border_count': 155, 'scale_pos_weight': 2.617539154460182}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  89%|████████▉ | 89/100 [07:14<01:03,  5.80s/it]

[I 2025-11-07 16:07:00,258] Trial 88 finished with value: 0.5702857142857143 and parameters: {'learning_rate': 0.011034744292646413, 'depth': 5, 'l2_leaf_reg': 0.11773376024621844, 'subsample': 0.7, 'random_strength': 6.872883599669556e-06, 'bagging_temperature': 0.9786193808915972, 'border_count': 154, 'scale_pos_weight': 3.8923248637927546}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  90%|█████████ | 90/100 [07:21<01:01,  6.19s/it]

[I 2025-11-07 16:07:07,371] Trial 89 finished with value: 0.5967849966510381 and parameters: {'learning_rate': 0.01077534712729177, 'depth': 5, 'l2_leaf_reg': 0.0716862739404483, 'subsample': 0.6, 'random_strength': 2.111922020919815e-06, 'bagging_temperature': 0.9421111985555791, 'border_count': 193, 'scale_pos_weight': 2.5416298038293075}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  91%|█████████ | 91/100 [07:26<00:54,  6.00s/it]

[I 2025-11-07 16:07:12,931] Trial 90 finished with value: 0.5713039686000873 and parameters: {'learning_rate': 0.012232955582615956, 'depth': 4, 'l2_leaf_reg': 0.16198358875172267, 'subsample': 0.7, 'random_strength': 3.260422240901053e-05, 'bagging_temperature': 0.9312660004122063, 'border_count': 169, 'scale_pos_weight': 1.2531883354269235}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  92%|█████████▏| 92/100 [07:31<00:44,  5.50s/it]

[I 2025-11-07 16:07:17,273] Trial 91 finished with value: 0.5906300484652666 and parameters: {'learning_rate': 0.01534914652269292, 'depth': 3, 'l2_leaf_reg': 0.21879525766817556, 'subsample': 0.7, 'random_strength': 7.098904249266142e-08, 'bagging_temperature': 0.8435640851401509, 'border_count': 68, 'scale_pos_weight': 2.741269751583074}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  93%|█████████▎| 93/100 [07:35<00:36,  5.17s/it]

[I 2025-11-07 16:07:21,680] Trial 92 finished with value: 0.5987738419618529 and parameters: {'learning_rate': 0.019096794858437172, 'depth': 3, 'l2_leaf_reg': 0.3648539821651878, 'subsample': 0.8, 'random_strength': 1.658717621177073e-06, 'bagging_temperature': 0.8798986911894618, 'border_count': 92, 'scale_pos_weight': 2.3549464503222293}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  94%|█████████▍| 94/100 [07:40<00:30,  5.03s/it]

[I 2025-11-07 16:07:26,381] Trial 93 finished with value: 0.5926966292134831 and parameters: {'learning_rate': 0.01905322555370998, 'depth': 7, 'l2_leaf_reg': 0.3666070260025272, 'subsample': 0.8, 'random_strength': 1.0330627185869023e-06, 'bagging_temperature': 0.8889493090390787, 'border_count': 161, 'scale_pos_weight': 2.305283565358069}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  95%|█████████▌| 95/100 [07:44<00:24,  4.85s/it]

[I 2025-11-07 16:07:30,794] Trial 94 finished with value: 0.5827814569536424 and parameters: {'learning_rate': 0.013784775178097472, 'depth': 3, 'l2_leaf_reg': 0.13986892663919465, 'subsample': 0.8, 'random_strength': 4.624508215320622e-06, 'bagging_temperature': 0.913302408572559, 'border_count': 93, 'scale_pos_weight': 3.281157164317687}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  96%|█████████▌| 96/100 [07:50<00:20,  5.09s/it]

[I 2025-11-07 16:07:36,463] Trial 95 finished with value: 0.5858279651383099 and parameters: {'learning_rate': 0.01168260021494267, 'depth': 4, 'l2_leaf_reg': 0.09194276563619387, 'subsample': 0.8, 'random_strength': 1.366629586187286e-06, 'bagging_temperature': 0.08678777556908163, 'border_count': 102, 'scale_pos_weight': 1.8056428195662453}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  97%|█████████▋| 97/100 [07:53<00:13,  4.63s/it]

[I 2025-11-07 16:07:40,010] Trial 96 finished with value: 0.5768098881695115 and parameters: {'learning_rate': 0.01670097683293008, 'depth': 7, 'l2_leaf_reg': 0.32581038340459917, 'subsample': 0.8, 'random_strength': 2.1183575720982907e-06, 'bagging_temperature': 0.8770018719476191, 'border_count': 80, 'scale_pos_weight': 3.614055806974865}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  98%|█████████▊| 98/100 [07:58<00:09,  4.71s/it]

[I 2025-11-07 16:07:44,903] Trial 97 finished with value: 0.5949729557747375 and parameters: {'learning_rate': 0.014939954599894641, 'depth': 5, 'l2_leaf_reg': 0.24994828910011718, 'subsample': 0.8, 'random_strength': 5.965328102514435e-07, 'bagging_temperature': 0.781338609848673, 'border_count': 133, 'scale_pos_weight': 2.896429978644985}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942:  99%|█████████▉| 99/100 [08:07<00:05,  5.89s/it]

[I 2025-11-07 16:07:53,548] Trial 98 finished with value: 0.5298044825941821 and parameters: {'learning_rate': 0.01017093452278366, 'depth': 9, 'l2_leaf_reg': 0.6099119206368506, 'subsample': 0.9, 'random_strength': 7.969719373424491e-06, 'bagging_temperature': 0.966347529585297, 'border_count': 175, 'scale_pos_weight': 9.957257241450511}. Best is trial 31 with value: 0.6019417475728155.


Best trial: 31. Best value: 0.601942: 100%|██████████| 100/100 [08:15<00:00,  4.95s/it]

[I 2025-11-07 16:08:01,212] Trial 99 finished with value: 0.5790118415679869 and parameters: {'learning_rate': 0.012418886782147183, 'depth': 6, 'l2_leaf_reg': 0.49815347670057886, 'subsample': 0.7, 'random_strength': 2.888320885158491e-07, 'bagging_temperature': 0.9974666855613765, 'border_count': 151, 'scale_pos_weight': 1.5385975205590314}. Best is trial 31 with value: 0.6019417475728155.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6019
  Best Hyperparameters:
    learning_rate: 0.012521488824550344
    depth: 7
    l2_leaf_reg: 0.37319814792803824
    subsample: 0.7
    random_strength: 1.1038850104000488e-05
    bagging_temperature: 0.9536511771248262
    border_count: 107
    scale_pos_weight: 2.6189739604075983





In [24]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.012521488824550344, 'depth': 7, 'l2_leaf_reg': 0.37319814792803824, 'subsample': 0.7, 'random_strength': 1.1038850104000488e-05, 'bagging_temperature': 0.9536511771248262, 'border_count': 107, 'scale_pos_weight': 2.6189739604075983}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.6019

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.91      0.80      0.85      4165
   Class 1.0       0.51      0.73      0.60      1235

    accuracy                           0.78      5400
   macro avg       0.71      0.76      0.72      5400
weighted avg       0.82      0.78      0.79      5400



In [25]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6019.cbm")

print("Done.")

Saving best_model...
Done.


In [17]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 136

Top 10 Most Important Features:
                        feature  importance
135  recovery_feasibility_score    6.169598
44              liab_x_multicar    5.522465
53               liab_prct_sqrt    3.782572
58     is_multi_vehicle_unclear    3.758837
45         liab_x_highrisk_site    3.383531
54                liab_prct_log    3.164801
52              liab_prct_cubed    3.108423
51            liab_prct_squared    3.107916
134           in_network_repair    2.934785
127                 recent_move    2.717762

Bottom 10 Least Important Features:
                   feature  importance
49       police_x_multicar         0.0
94            high_mileage         0.0
96       very_high_mileage         0.0
97        frequent_claimer         0.0
100  very_frequent_claimer         0.0
101           large_payout         0.0
66         evidence_strong         0.0
103           small_payout         0.0
104      very_large_payout         0.0


In [18]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [None]:
# prediction.to_csv("results/catboost_6034_prediction.csv", index=False)