In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from f1_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor()

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting CatBoost Preprocessor (stateless)...
Fit complete.
Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.
Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.


In [8]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [9]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
CAT_FEATURES = pre.cat_features_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']


In [11]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 123
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [12]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-10 12:49:08,895] A new study created in memory with name: no-name-127f4cdf-286c-40e8-83e3-a85bd5322b00



2. Starting Optuna study...


Best trial: 0. Best value: 0.541975:   1%|          | 1/100 [00:03<05:19,  3.23s/it]

[I 2025-11-10 12:49:12,132] Trial 0 finished with value: 0.5419749936208217 and parameters: {'learning_rate': 0.05107592729130285, 'depth': 9, 'l2_leaf_reg': 0.0012021005496685258, 'subsample': 0.7, 'random_strength': 3.963946965037201e-05, 'bagging_temperature': 0.844561474638459, 'border_count': 214, 'scale_pos_weight': 7.262763324169601}. Best is trial 0 with value: 0.5419749936208217.


Best trial: 0. Best value: 0.541975:   2%|▏         | 2/100 [00:07<06:04,  3.71s/it]

[I 2025-11-10 12:49:16,186] Trial 1 finished with value: 0.5190045248868779 and parameters: {'learning_rate': 0.012740194632919848, 'depth': 3, 'l2_leaf_reg': 0.29976191497149707, 'subsample': 0.5, 'random_strength': 4.027178141990003e-08, 'bagging_temperature': 0.5683713160443205, 'border_count': 107, 'scale_pos_weight': 8.836743400471578}. Best is trial 0 with value: 0.5419749936208217.


Best trial: 2. Best value: 0.585873:   3%|▎         | 3/100 [00:10<05:16,  3.27s/it]

[I 2025-11-10 12:49:18,916] Trial 2 finished with value: 0.5858730727859448 and parameters: {'learning_rate': 0.0632963892609435, 'depth': 9, 'l2_leaf_reg': 0.0022579388074606926, 'subsample': 1.0, 'random_strength': 6.691967400711291e-06, 'bagging_temperature': 0.7094744789659676, 'border_count': 248, 'scale_pos_weight': 2.187236060991182}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 2. Best value: 0.585873:   4%|▍         | 4/100 [00:13<05:05,  3.18s/it]

[I 2025-11-10 12:49:21,961] Trial 3 finished with value: 0.5236329935125116 and parameters: {'learning_rate': 0.041951234397549365, 'depth': 8, 'l2_leaf_reg': 4.418792830234185, 'subsample': 0.8, 'random_strength': 0.0008695942978770521, 'bagging_temperature': 0.7603260872115929, 'border_count': 210, 'scale_pos_weight': 9.712064493167496}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 2. Best value: 0.585873:   5%|▌         | 5/100 [00:14<04:15,  2.69s/it]

[I 2025-11-10 12:49:23,795] Trial 4 finished with value: 0.5155596841616349 and parameters: {'learning_rate': 0.13991850579917736, 'depth': 8, 'l2_leaf_reg': 0.0017189562451456785, 'subsample': 0.9, 'random_strength': 1.4353142220086624e-06, 'bagging_temperature': 0.5808164309339082, 'border_count': 73, 'scale_pos_weight': 1.204142445620921}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 2. Best value: 0.585873:   6%|▌         | 6/100 [00:16<03:51,  2.46s/it]

[I 2025-11-10 12:49:25,797] Trial 5 finished with value: 0.5166666666666667 and parameters: {'learning_rate': 0.03573174399041903, 'depth': 3, 'l2_leaf_reg': 2.021998326394779, 'subsample': 0.8, 'random_strength': 3.1813202908239835e-06, 'bagging_temperature': 0.1702672524037927, 'border_count': 223, 'scale_pos_weight': 8.940673600032607}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 2. Best value: 0.585873:   7%|▋         | 7/100 [00:18<03:31,  2.27s/it]

[I 2025-11-10 12:49:27,690] Trial 6 finished with value: 0.5241574357765731 and parameters: {'learning_rate': 0.048937230337441476, 'depth': 8, 'l2_leaf_reg': 0.001550129008043396, 'subsample': 0.7, 'random_strength': 0.04495542356857389, 'bagging_temperature': 0.24950094276347057, 'border_count': 44, 'scale_pos_weight': 8.323421252196438}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 2. Best value: 0.585873:   8%|▊         | 8/100 [00:23<04:44,  3.10s/it]

[I 2025-11-10 12:49:32,545] Trial 7 finished with value: 0.583125 and parameters: {'learning_rate': 0.014386602217637769, 'depth': 9, 'l2_leaf_reg': 0.0025396512024690484, 'subsample': 0.5, 'random_strength': 1.0293499000010525e-05, 'bagging_temperature': 0.9520985342418637, 'border_count': 93, 'scale_pos_weight': 3.4222344271061877}. Best is trial 2 with value: 0.5858730727859448.


Best trial: 8. Best value: 0.593693:   9%|▉         | 9/100 [00:29<05:46,  3.81s/it]

[I 2025-11-10 12:49:37,933] Trial 8 finished with value: 0.5936933671620153 and parameters: {'learning_rate': 0.01580328295065854, 'depth': 7, 'l2_leaf_reg': 0.5414586498805348, 'subsample': 0.7, 'random_strength': 0.08432231619209367, 'bagging_temperature': 0.25754955699586624, 'border_count': 201, 'scale_pos_weight': 2.1118003091121684}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  10%|█         | 10/100 [00:38<08:33,  5.70s/it]

[I 2025-11-10 12:49:47,870] Trial 9 finished with value: 0.5776487663280117 and parameters: {'learning_rate': 0.013253503018126429, 'depth': 9, 'l2_leaf_reg': 6.798492110129506, 'subsample': 0.7, 'random_strength': 0.000198025138563378, 'bagging_temperature': 0.16909343417143474, 'border_count': 182, 'scale_pos_weight': 3.961189846632641}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  11%|█         | 11/100 [00:39<06:16,  4.23s/it]

[I 2025-11-10 12:49:48,762] Trial 10 finished with value: 0.5497166409067491 and parameters: {'learning_rate': 0.2874599273925299, 'depth': 5, 'l2_leaf_reg': 0.03745108189581725, 'subsample': 0.6, 'random_strength': 0.7346133893544622, 'bagging_temperature': 0.3579532469697796, 'border_count': 183, 'scale_pos_weight': 5.380468000759617}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  12%|█▏        | 12/100 [00:40<04:48,  3.28s/it]

[I 2025-11-10 12:49:49,874] Trial 11 finished with value: 0.5299625468164794 and parameters: {'learning_rate': 0.10564355186017164, 'depth': 6, 'l2_leaf_reg': 0.03157927392506408, 'subsample': 1.0, 'random_strength': 0.019762093425813625, 'bagging_temperature': 0.033691911260374185, 'border_count': 243, 'scale_pos_weight': 1.1580822004193045}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  13%|█▎        | 13/100 [00:47<06:07,  4.22s/it]

[I 2025-11-10 12:49:56,266] Trial 12 finished with value: 0.5918367346938775 and parameters: {'learning_rate': 0.02280743712800788, 'depth': 10, 'l2_leaf_reg': 0.39316507072817286, 'subsample': 1.0, 'random_strength': 8.346688837908903e-08, 'bagging_temperature': 0.42409240894669376, 'border_count': 254, 'scale_pos_weight': 2.991387349606504}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  14%|█▍        | 14/100 [00:53<06:41,  4.67s/it]

[I 2025-11-10 12:50:01,951] Trial 13 finished with value: 0.5920303605313093 and parameters: {'learning_rate': 0.022288406020056373, 'depth': 10, 'l2_leaf_reg': 0.4956313088226169, 'subsample': 0.9, 'random_strength': 1.3289202458157978e-08, 'bagging_temperature': 0.3887596991544673, 'border_count': 150, 'scale_pos_weight': 3.251875570790455}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  15%|█▌        | 15/100 [00:55<05:52,  4.14s/it]

[I 2025-11-10 12:50:04,887] Trial 14 finished with value: 0.5589383294301327 and parameters: {'learning_rate': 0.023301342682839947, 'depth': 6, 'l2_leaf_reg': 0.7645891366299457, 'subsample': 0.9, 'random_strength': 0.0020504618228746217, 'bagging_temperature': 0.33685534478059265, 'border_count': 136, 'scale_pos_weight': 5.129839044916801}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 8. Best value: 0.593693:  16%|█▌        | 16/100 [00:59<05:27,  3.90s/it]

[I 2025-11-10 12:50:08,215] Trial 15 finished with value: 0.5679423183582918 and parameters: {'learning_rate': 0.02150552216090957, 'depth': 7, 'l2_leaf_reg': 0.10516545547048056, 'subsample': 0.9, 'random_strength': 1.1434535532428458e-08, 'bagging_temperature': 0.48473973453334834, 'border_count': 155, 'scale_pos_weight': 4.2836243220618915}. Best is trial 8 with value: 0.5936933671620153.


Best trial: 16. Best value: 0.596277:  17%|█▋        | 17/100 [01:05<06:24,  4.63s/it]

[I 2025-11-10 12:50:14,539] Trial 16 finished with value: 0.5962774957698815 and parameters: {'learning_rate': 0.010231955243992197, 'depth': 5, 'l2_leaf_reg': 1.2303450104057587, 'subsample': 0.6, 'random_strength': 0.8176409094848471, 'bagging_temperature': 0.023854373343721452, 'border_count': 146, 'scale_pos_weight': 2.3371554720648295}. Best is trial 16 with value: 0.5962774957698815.


Best trial: 16. Best value: 0.596277:  18%|█▊        | 18/100 [01:12<07:14,  5.30s/it]

[I 2025-11-10 12:50:21,387] Trial 17 finished with value: 0.5386485172581429 and parameters: {'learning_rate': 0.01245524186114878, 'depth': 5, 'l2_leaf_reg': 1.6438187387640208, 'subsample': 0.6, 'random_strength': 0.9631343361485104, 'bagging_temperature': 0.060655092781325914, 'border_count': 179, 'scale_pos_weight': 6.767304184947286}. Best is trial 16 with value: 0.5962774957698815.


Best trial: 16. Best value: 0.596277:  19%|█▉        | 19/100 [01:17<06:53,  5.10s/it]

[I 2025-11-10 12:50:26,032] Trial 18 finished with value: 0.5954198473282443 and parameters: {'learning_rate': 0.017624537425752557, 'depth': 4, 'l2_leaf_reg': 0.12877044244206812, 'subsample': 0.6, 'random_strength': 0.07769291823327928, 'bagging_temperature': 0.16997048306727847, 'border_count': 123, 'scale_pos_weight': 2.2103061832794593}. Best is trial 16 with value: 0.5962774957698815.


Best trial: 16. Best value: 0.596277:  20%|██        | 20/100 [01:20<06:01,  4.52s/it]

[I 2025-11-10 12:50:29,201] Trial 19 finished with value: 0.5954415954415955 and parameters: {'learning_rate': 0.028643981437824203, 'depth': 4, 'l2_leaf_reg': 0.11815696943487158, 'subsample': 0.6, 'random_strength': 0.005061844550819977, 'bagging_temperature': 0.1052520957423016, 'border_count': 122, 'scale_pos_weight': 2.0892863803446367}. Best is trial 16 with value: 0.5962774957698815.


Best trial: 16. Best value: 0.596277:  21%|██        | 21/100 [01:22<04:56,  3.75s/it]

[I 2025-11-10 12:50:31,150] Trial 20 finished with value: 0.5421745490004876 and parameters: {'learning_rate': 0.032701429259875896, 'depth': 4, 'l2_leaf_reg': 0.008182124198386743, 'subsample': 0.5, 'random_strength': 0.0028288380320890863, 'bagging_temperature': 0.0036661358331931994, 'border_count': 77, 'scale_pos_weight': 6.484260312450623}. Best is trial 16 with value: 0.5962774957698815.


Best trial: 21. Best value: 0.597096:  22%|██▏       | 22/100 [01:28<05:39,  4.36s/it]

[I 2025-11-10 12:50:36,929] Trial 21 finished with value: 0.5970955758189801 and parameters: {'learning_rate': 0.010624529055322211, 'depth': 4, 'l2_leaf_reg': 0.11422565803249576, 'subsample': 0.6, 'random_strength': 0.14419221445591435, 'bagging_temperature': 0.11388681645132899, 'border_count': 125, 'scale_pos_weight': 2.3449605958056923}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  23%|██▎       | 23/100 [01:33<05:51,  4.57s/it]

[I 2025-11-10 12:50:41,994] Trial 22 finished with value: 0.5970636215334421 and parameters: {'learning_rate': 0.01096230661981309, 'depth': 4, 'l2_leaf_reg': 0.03690021171138303, 'subsample': 0.6, 'random_strength': 0.009481536794220403, 'bagging_temperature': 0.09998288845783262, 'border_count': 117, 'scale_pos_weight': 2.5690890275748317}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  24%|██▍       | 24/100 [01:39<06:30,  5.14s/it]

[I 2025-11-10 12:50:48,454] Trial 23 finished with value: 0.5684210526315789 and parameters: {'learning_rate': 0.010109782303884639, 'depth': 5, 'l2_leaf_reg': 0.029959459926650136, 'subsample': 0.6, 'random_strength': 0.2825968174850314, 'bagging_temperature': 0.2516750167707685, 'border_count': 164, 'scale_pos_weight': 4.559419896704128}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  25%|██▌       | 25/100 [01:44<06:15,  5.00s/it]

[I 2025-11-10 12:50:53,144] Trial 24 finished with value: 0.5925215723873442 and parameters: {'learning_rate': 0.01018437597329506, 'depth': 4, 'l2_leaf_reg': 0.012337292141422814, 'subsample': 0.5, 'random_strength': 0.01458285187593954, 'bagging_temperature': 0.128051257177291, 'border_count': 132, 'scale_pos_weight': 2.7552301142545024}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  26%|██▌       | 26/100 [01:48<05:42,  4.63s/it]

[I 2025-11-10 12:50:56,906] Trial 25 finished with value: 0.5720164609053497 and parameters: {'learning_rate': 0.010042029953790088, 'depth': 3, 'l2_leaf_reg': 0.012916589277837842, 'subsample': 0.6, 'random_strength': 0.265865474423289, 'bagging_temperature': 0.0826320287425132, 'border_count': 104, 'scale_pos_weight': 1.5051274688662595}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  27%|██▋       | 27/100 [01:51<05:09,  4.24s/it]

[I 2025-11-10 12:51:00,222] Trial 26 finished with value: 0.572644721906924 and parameters: {'learning_rate': 0.017008881776083378, 'depth': 5, 'l2_leaf_reg': 0.052092911464812666, 'subsample': 0.5, 'random_strength': 0.0005256422994798235, 'bagging_temperature': 0.00730695267605766, 'border_count': 54, 'scale_pos_weight': 3.750018267338985}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  28%|██▊       | 28/100 [01:53<04:15,  3.54s/it]

[I 2025-11-10 12:51:02,154] Trial 27 finished with value: 0.593607305936073 and parameters: {'learning_rate': 0.07735605494988969, 'depth': 6, 'l2_leaf_reg': 0.21126870334901962, 'subsample': 0.7, 'random_strength': 0.01181547556147687, 'bagging_temperature': 0.22841685827831285, 'border_count': 112, 'scale_pos_weight': 2.7212853192642537}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  29%|██▉       | 29/100 [01:58<04:37,  3.91s/it]

[I 2025-11-10 12:51:06,932] Trial 28 finished with value: 0.5868123293015997 and parameters: {'learning_rate': 0.018851066220176776, 'depth': 4, 'l2_leaf_reg': 1.1620815254572336, 'subsample': 0.6, 'random_strength': 0.151744835093294, 'bagging_temperature': 0.11753478331549605, 'border_count': 84, 'scale_pos_weight': 1.6773720823174163}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  30%|███       | 30/100 [01:58<03:27,  2.97s/it]

[I 2025-11-10 12:51:07,693] Trial 29 finished with value: 0.5536403395935169 and parameters: {'learning_rate': 0.22990817306472605, 'depth': 5, 'l2_leaf_reg': 0.0068543013507553, 'subsample': 0.8, 'random_strength': 7.8845688590475e-05, 'bagging_temperature': 0.31513351223679065, 'border_count': 161, 'scale_pos_weight': 4.730460642344372}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 21. Best value: 0.597096:  31%|███       | 31/100 [02:00<03:08,  2.73s/it]

[I 2025-11-10 12:51:09,859] Trial 30 finished with value: 0.5437947199605231 and parameters: {'learning_rate': 0.02782579465201925, 'depth': 3, 'l2_leaf_reg': 0.05866484988926208, 'subsample': 0.7, 'random_strength': 0.03966487855364433, 'bagging_temperature': 0.20304564053652363, 'border_count': 142, 'scale_pos_weight': 6.036986128960901}. Best is trial 21 with value: 0.5970955758189801.


Best trial: 31. Best value: 0.597987:  32%|███▏      | 32/100 [02:06<04:02,  3.57s/it]

[I 2025-11-10 12:51:15,381] Trial 31 finished with value: 0.5979865771812081 and parameters: {'learning_rate': 0.012047700660521014, 'depth': 4, 'l2_leaf_reg': 0.16849850288853563, 'subsample': 0.6, 'random_strength': 0.005304922310998841, 'bagging_temperature': 0.10072630322169301, 'border_count': 120, 'scale_pos_weight': 2.397370994699089}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  33%|███▎      | 33/100 [02:11<04:30,  4.04s/it]

[I 2025-11-10 12:51:20,515] Trial 32 finished with value: 0.5970731707317073 and parameters: {'learning_rate': 0.012186813851861532, 'depth': 4, 'l2_leaf_reg': 0.3051395598386131, 'subsample': 0.6, 'random_strength': 0.37582693435471937, 'bagging_temperature': 0.07309910443780486, 'border_count': 123, 'scale_pos_weight': 2.6375272639938787}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  34%|███▍      | 34/100 [02:15<04:23,  4.00s/it]

[I 2025-11-10 12:51:24,425] Trial 33 finished with value: 0.5785317344252705 and parameters: {'learning_rate': 0.012853703563107801, 'depth': 3, 'l2_leaf_reg': 0.2163104475338262, 'subsample': 0.5, 'random_strength': 0.0074235788442825286, 'bagging_temperature': 0.11484750149380096, 'border_count': 99, 'scale_pos_weight': 3.4404187591710085}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  35%|███▌      | 35/100 [02:20<04:42,  4.35s/it]

[I 2025-11-10 12:51:29,588] Trial 34 finished with value: 0.5872832369942197 and parameters: {'learning_rate': 0.012825174905640844, 'depth': 4, 'l2_leaf_reg': 0.19722363452584732, 'subsample': 0.6, 'random_strength': 0.0010911650616198393, 'bagging_temperature': 0.6586694615390578, 'border_count': 117, 'scale_pos_weight': 1.7442969789804195}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  36%|███▌      | 36/100 [02:24<04:30,  4.23s/it]

[I 2025-11-10 12:51:33,527] Trial 35 finished with value: 0.5932311621966795 and parameters: {'learning_rate': 0.015521593315135385, 'depth': 3, 'l2_leaf_reg': 0.023280898666627477, 'subsample': 0.5, 'random_strength': 0.0003065927516987398, 'bagging_temperature': 0.27610450398144337, 'border_count': 131, 'scale_pos_weight': 2.71820154437489}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  37%|███▋      | 37/100 [02:28<04:26,  4.22s/it]

[I 2025-11-10 12:51:37,744] Trial 36 finished with value: 0.5714285714285714 and parameters: {'learning_rate': 0.01831588655857309, 'depth': 4, 'l2_leaf_reg': 0.07475559234955231, 'subsample': 0.7, 'random_strength': 0.2508297472418231, 'bagging_temperature': 0.1812093661018045, 'border_count': 64, 'scale_pos_weight': 3.9565505302217674}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  38%|███▊      | 38/100 [02:32<04:14,  4.11s/it]

[I 2025-11-10 12:51:41,586] Trial 37 finished with value: 0.5560053981106613 and parameters: {'learning_rate': 0.011931906588335048, 'depth': 3, 'l2_leaf_reg': 0.3204221160987537, 'subsample': 0.7, 'random_strength': 0.030834223800227104, 'bagging_temperature': 0.06141923707315339, 'border_count': 88, 'scale_pos_weight': 1.2264184880974778}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 31. Best value: 0.597987:  39%|███▉      | 39/100 [02:33<03:13,  3.18s/it]

[I 2025-11-10 12:51:42,596] Trial 38 finished with value: 0.527784236224134 and parameters: {'learning_rate': 0.062402780657778475, 'depth': 4, 'l2_leaf_reg': 0.07611814317354523, 'subsample': 0.6, 'random_strength': 2.6366301861436124e-05, 'bagging_temperature': 0.46227306459350187, 'border_count': 106, 'scale_pos_weight': 7.738848158188499}. Best is trial 31 with value: 0.5979865771812081.


Best trial: 39. Best value: 0.602187:  40%|████      | 40/100 [02:36<02:59,  3.00s/it]

[I 2025-11-10 12:51:45,167] Trial 39 finished with value: 0.6021872863978127 and parameters: {'learning_rate': 0.042059298117722826, 'depth': 6, 'l2_leaf_reg': 2.6676457775670617, 'subsample': 0.5, 'random_strength': 0.07041736275037083, 'bagging_temperature': 0.8299484798553904, 'border_count': 35, 'scale_pos_weight': 2.3543968662300707}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  41%|████      | 41/100 [02:38<02:43,  2.77s/it]

[I 2025-11-10 12:51:47,392] Trial 40 finished with value: 0.591304347826087 and parameters: {'learning_rate': 0.04853130611505058, 'depth': 7, 'l2_leaf_reg': 3.5262196770250154, 'subsample': 0.5, 'random_strength': 0.09336399311562174, 'bagging_temperature': 0.8181343859691468, 'border_count': 33, 'scale_pos_weight': 3.155733640060661}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  42%|████▏     | 42/100 [02:39<02:14,  2.31s/it]

[I 2025-11-10 12:51:48,642] Trial 41 finished with value: 0.5924170616113744 and parameters: {'learning_rate': 0.17035015488932775, 'depth': 6, 'l2_leaf_reg': 8.777800191153693, 'subsample': 0.5, 'random_strength': 0.33870539512212755, 'bagging_temperature': 0.9778928265223191, 'border_count': 65, 'scale_pos_weight': 2.4157009629441095}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  43%|████▎     | 43/100 [02:44<02:47,  2.93s/it]

[I 2025-11-10 12:51:53,031] Trial 42 finished with value: 0.5878810688746707 and parameters: {'learning_rate': 0.014608705080241749, 'depth': 5, 'l2_leaf_reg': 0.7252644451654455, 'subsample': 0.6, 'random_strength': 0.0470608596396158, 'bagging_temperature': 0.586141751056507, 'border_count': 170, 'scale_pos_weight': 1.8564564525074676}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  44%|████▍     | 44/100 [02:46<02:33,  2.74s/it]

[I 2025-11-10 12:51:55,331] Trial 43 finished with value: 0.5746658919233004 and parameters: {'learning_rate': 0.04253805623251794, 'depth': 4, 'l2_leaf_reg': 2.4247931652765096, 'subsample': 0.5, 'random_strength': 0.0036623490097860015, 'bagging_temperature': 0.6933974530516926, 'border_count': 98, 'scale_pos_weight': 3.606044844341053}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  45%|████▌     | 45/100 [02:48<02:15,  2.47s/it]

[I 2025-11-10 12:51:57,154] Trial 44 finished with value: 0.5937190082644628 and parameters: {'learning_rate': 0.09557579072826297, 'depth': 8, 'l2_leaf_reg': 0.16748767657336097, 'subsample': 0.7, 'random_strength': 0.019099975909749812, 'bagging_temperature': 0.9026986470092243, 'border_count': 194, 'scale_pos_weight': 2.6393953631457103}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  46%|████▌     | 46/100 [02:52<02:44,  3.05s/it]

[I 2025-11-10 12:52:01,577] Trial 45 finished with value: 0.5702091335894153 and parameters: {'learning_rate': 0.011275963841693333, 'depth': 3, 'l2_leaf_reg': 0.046442677669336636, 'subsample': 0.6, 'random_strength': 0.12189827834467322, 'bagging_temperature': 0.5534883100070611, 'border_count': 127, 'scale_pos_weight': 1.3857505810093458}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  47%|████▋     | 47/100 [02:57<03:05,  3.49s/it]

[I 2025-11-10 12:52:06,093] Trial 46 finished with value: 0.5904522613065326 and parameters: {'learning_rate': 0.014724377944036015, 'depth': 6, 'l2_leaf_reg': 0.019337733351908006, 'subsample': 0.5, 'random_strength': 3.7398505668781403e-07, 'bagging_temperature': 0.8149141125559747, 'border_count': 140, 'scale_pos_weight': 3.0246146858031246}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  48%|████▊     | 48/100 [02:59<02:48,  3.24s/it]

[I 2025-11-10 12:52:08,736] Trial 47 finished with value: 0.515796549406229 and parameters: {'learning_rate': 0.019816961660650022, 'depth': 5, 'l2_leaf_reg': 0.003910703920009335, 'subsample': 0.8, 'random_strength': 0.0072950213852472235, 'bagging_temperature': 0.14985363282570693, 'border_count': 114, 'scale_pos_weight': 9.719784942719524}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  49%|████▉     | 49/100 [03:02<02:42,  3.20s/it]

[I 2025-11-10 12:52:11,831] Trial 48 finished with value: 0.5952983725135624 and parameters: {'learning_rate': 0.02756204490311963, 'depth': 4, 'l2_leaf_reg': 0.33357969180098507, 'subsample': 0.6, 'random_strength': 0.0014962888295962245, 'bagging_temperature': 0.29413290573647466, 'border_count': 44, 'scale_pos_weight': 2.011318943643012}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  50%|█████     | 50/100 [03:10<03:48,  4.57s/it]

[I 2025-11-10 12:52:19,623] Trial 49 finished with value: 0.5710251341428975 and parameters: {'learning_rate': 0.011605072114166783, 'depth': 7, 'l2_leaf_reg': 4.212633028768864, 'subsample': 0.7, 'random_strength': 0.37526623925196695, 'bagging_temperature': 0.20615923014216533, 'border_count': 237, 'scale_pos_weight': 4.040135611129683}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  51%|█████     | 51/100 [03:11<02:50,  3.48s/it]

[I 2025-11-10 12:52:20,549] Trial 50 finished with value: 0.6000659413122321 and parameters: {'learning_rate': 0.13108337589058103, 'depth': 3, 'l2_leaf_reg': 0.6045188321782928, 'subsample': 0.6, 'random_strength': 0.04889399047513341, 'bagging_temperature': 0.37423746094513166, 'border_count': 222, 'scale_pos_weight': 2.5214557512783866}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  52%|█████▏    | 52/100 [03:12<02:11,  2.73s/it]

[I 2025-11-10 12:52:21,528] Trial 51 finished with value: 0.5854108956602031 and parameters: {'learning_rate': 0.11542543163135865, 'depth': 3, 'l2_leaf_reg': 0.5249286237484352, 'subsample': 0.6, 'random_strength': 0.0640255856455096, 'bagging_temperature': 0.06684468846064502, 'border_count': 203, 'scale_pos_weight': 3.042275282038708}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  53%|█████▎    | 53/100 [03:14<01:49,  2.33s/it]

[I 2025-11-10 12:52:22,928] Trial 52 finished with value: 0.5923653715064758 and parameters: {'learning_rate': 0.07148463074018924, 'depth': 4, 'l2_leaf_reg': 0.8030175301354137, 'subsample': 0.6, 'random_strength': 0.025929317235209158, 'bagging_temperature': 0.4152546024738254, 'border_count': 236, 'scale_pos_weight': 2.3180952149064113}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  54%|█████▍    | 54/100 [03:14<01:26,  1.87s/it]

[I 2025-11-10 12:52:23,728] Trial 53 finished with value: 0.5361980382998599 and parameters: {'learning_rate': 0.18371837521209733, 'depth': 3, 'l2_leaf_reg': 0.14218040064146184, 'subsample': 0.6, 'random_strength': 0.1743751917354261, 'bagging_temperature': 0.3736891233468691, 'border_count': 224, 'scale_pos_weight': 1.1128026032627893}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  55%|█████▌    | 55/100 [03:15<01:08,  1.53s/it]

[I 2025-11-10 12:52:24,448] Trial 54 finished with value: 0.594078947368421 and parameters: {'learning_rate': 0.29941326287267633, 'depth': 4, 'l2_leaf_reg': 0.09099446663172557, 'subsample': 0.5, 'random_strength': 0.6018854706260716, 'bagging_temperature': 0.6377455064307066, 'border_count': 154, 'scale_pos_weight': 2.469507568900789}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  56%|█████▌    | 56/100 [03:19<01:41,  2.30s/it]

[I 2025-11-10 12:52:28,557] Trial 55 finished with value: 0.5796499555028182 and parameters: {'learning_rate': 0.014238334634408267, 'depth': 3, 'l2_leaf_reg': 0.25655802808761524, 'subsample': 0.6, 'random_strength': 0.010642747613403482, 'bagging_temperature': 0.039099245621732844, 'border_count': 220, 'scale_pos_weight': 3.328072834859626}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  57%|█████▋    | 57/100 [03:20<01:22,  1.93s/it]

[I 2025-11-10 12:52:29,616] Trial 56 finished with value: 0.5908096280087527 and parameters: {'learning_rate': 0.13901974475781392, 'depth': 5, 'l2_leaf_reg': 2.1320835362202124, 'subsample': 0.8, 'random_strength': 0.056644533481041474, 'bagging_temperature': 0.5321816361965981, 'border_count': 91, 'scale_pos_weight': 1.9881944695337248}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  58%|█████▊    | 58/100 [03:22<01:13,  1.74s/it]

[I 2025-11-10 12:52:30,918] Trial 57 finished with value: 0.5713131313131313 and parameters: {'learning_rate': 0.0895982584279447, 'depth': 4, 'l2_leaf_reg': 1.237609315322903, 'subsample': 0.5, 'random_strength': 0.5576573112314633, 'bagging_temperature': 0.14126206533475394, 'border_count': 80, 'scale_pos_weight': 1.5343847667411965}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  59%|█████▉    | 59/100 [03:25<01:28,  2.16s/it]

[I 2025-11-10 12:52:34,061] Trial 58 finished with value: 0.598895745371874 and parameters: {'learning_rate': 0.036301142196457546, 'depth': 8, 'l2_leaf_reg': 0.547961013255802, 'subsample': 0.7, 'random_strength': 0.10291343019237426, 'bagging_temperature': 0.7559323629784764, 'border_count': 122, 'scale_pos_weight': 2.881730731847708}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  60%|██████    | 60/100 [03:28<01:42,  2.57s/it]

[I 2025-11-10 12:52:37,593] Trial 59 finished with value: 0.5928571428571429 and parameters: {'learning_rate': 0.03480888789349595, 'depth': 8, 'l2_leaf_reg': 0.46927458381874226, 'subsample': 0.7, 'random_strength': 0.12074290056241534, 'bagging_temperature': 0.7700515458183415, 'border_count': 172, 'scale_pos_weight': 2.9273718271929035}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  61%|██████    | 61/100 [03:31<01:47,  2.75s/it]

[I 2025-11-10 12:52:40,770] Trial 60 finished with value: 0.5251162790697674 and parameters: {'learning_rate': 0.04095069906965769, 'depth': 9, 'l2_leaf_reg': 0.6527081200548246, 'subsample': 0.7, 'random_strength': 0.9553023930954009, 'bagging_temperature': 0.9033489229607605, 'border_count': 148, 'scale_pos_weight': 8.778222052534485}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  62%|██████▏   | 62/100 [03:34<01:43,  2.73s/it]

[I 2025-11-10 12:52:43,458] Trial 61 finished with value: 0.5961670088980151 and parameters: {'learning_rate': 0.05574374368093045, 'depth': 8, 'l2_leaf_reg': 0.9866832600517158, 'subsample': 0.6, 'random_strength': 0.024537485035621573, 'bagging_temperature': 0.7282562466540771, 'border_count': 119, 'scale_pos_weight': 2.5188970538654334}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  63%|██████▎   | 63/100 [03:39<02:01,  3.29s/it]

[I 2025-11-10 12:52:48,035] Trial 62 finished with value: 0.5886627906976745 and parameters: {'learning_rate': 0.024506765052913108, 'depth': 9, 'l2_leaf_reg': 0.4154245342133487, 'subsample': 0.6, 'random_strength': 0.1904331853862223, 'bagging_temperature': 0.8774798146988851, 'border_count': 137, 'scale_pos_weight': 2.150751300330543}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  64%|██████▍   | 64/100 [03:43<02:13,  3.71s/it]

[I 2025-11-10 12:52:52,748] Trial 63 finished with value: 0.5797356828193833 and parameters: {'learning_rate': 0.016253677481953048, 'depth': 7, 'l2_leaf_reg': 0.2613584762034199, 'subsample': 0.7, 'random_strength': 0.07337326382214306, 'bagging_temperature': 0.7748472375729489, 'border_count': 110, 'scale_pos_weight': 3.706770761656908}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  65%|██████▌   | 65/100 [03:50<02:36,  4.48s/it]

[I 2025-11-10 12:52:59,001] Trial 64 finished with value: 0.585982382229031 and parameters: {'learning_rate': 0.011225120790785118, 'depth': 5, 'l2_leaf_reg': 0.13104760619469885, 'subsample': 0.6, 'random_strength': 0.004997362254260397, 'bagging_temperature': 0.07579818291499826, 'border_count': 125, 'scale_pos_weight': 1.7844323908672535}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  66%|██████▌   | 66/100 [03:52<02:13,  3.92s/it]

[I 2025-11-10 12:53:01,610] Trial 65 finished with value: 0.5833333333333334 and parameters: {'learning_rate': 0.0383293284271532, 'depth': 6, 'l2_leaf_reg': 5.57462852463198, 'subsample': 0.6, 'random_strength': 0.0006229454450295915, 'bagging_temperature': 0.09786412744473909, 'border_count': 104, 'scale_pos_weight': 3.3427648823825042}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  67%|██████▋   | 67/100 [03:57<02:18,  4.19s/it]

[I 2025-11-10 12:53:06,448] Trial 66 finished with value: 0.5677966101694916 and parameters: {'learning_rate': 0.04711591348138802, 'depth': 10, 'l2_leaf_reg': 1.4456665940670252, 'subsample': 0.7, 'random_strength': 0.44594713533069724, 'bagging_temperature': 0.2002545724363674, 'border_count': 192, 'scale_pos_weight': 4.481937384449949}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  68%|██████▊   | 68/100 [04:01<02:08,  4.03s/it]

[I 2025-11-10 12:53:10,096] Trial 67 finished with value: 0.5890886155786818 and parameters: {'learning_rate': 0.0319735890091928, 'depth': 4, 'l2_leaf_reg': 2.690240064516003, 'subsample': 0.5, 'random_strength': 0.013427009906635982, 'bagging_temperature': 0.23923683802839757, 'border_count': 132, 'scale_pos_weight': 2.892133648514583}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  69%|██████▉   | 69/100 [04:06<02:14,  4.34s/it]

[I 2025-11-10 12:53:15,151] Trial 68 finished with value: 0.5915395284327323 and parameters: {'learning_rate': 0.02121022711413387, 'depth': 5, 'l2_leaf_reg': 0.17200333911166807, 'subsample': 0.6, 'random_strength': 0.0367393414797842, 'bagging_temperature': 0.04079160533209836, 'border_count': 119, 'scale_pos_weight': 2.266248012680325}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  70%|███████   | 70/100 [04:11<02:15,  4.52s/it]

[I 2025-11-10 12:53:20,101] Trial 69 finished with value: 0.559229367352252 and parameters: {'learning_rate': 0.01351162556484799, 'depth': 3, 'l2_leaf_reg': 0.03866646508565388, 'subsample': 0.6, 'random_strength': 0.0027988133714191353, 'bagging_temperature': 0.146129175565207, 'border_count': 160, 'scale_pos_weight': 4.983584736337842}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  71%|███████   | 71/100 [04:13<01:48,  3.75s/it]

[I 2025-11-10 12:53:22,057] Trial 70 finished with value: 0.5931388265469701 and parameters: {'learning_rate': 0.060278306246772675, 'depth': 4, 'l2_leaf_reg': 0.06739316715776238, 'subsample': 0.7, 'random_strength': 0.09659393060589251, 'bagging_temperature': 0.3372722206084636, 'border_count': 97, 'scale_pos_weight': 2.7653726797269886}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  72%|███████▏  | 72/100 [04:20<02:11,  4.69s/it]

[I 2025-11-10 12:53:28,948] Trial 71 finished with value: 0.5960836375705277 and parameters: {'learning_rate': 0.010729967279506671, 'depth': 5, 'l2_leaf_reg': 0.6091778376949429, 'subsample': 0.6, 'random_strength': 0.9967502279301782, 'bagging_temperature': 0.01350742599234532, 'border_count': 72, 'scale_pos_weight': 2.445466455365054}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  73%|███████▎  | 73/100 [04:26<02:22,  5.29s/it]

[I 2025-11-10 12:53:35,624] Trial 72 finished with value: 0.5912596401028277 and parameters: {'learning_rate': 0.012227208640886644, 'depth': 6, 'l2_leaf_reg': 0.0963101668154376, 'subsample': 0.6, 'random_strength': 0.16106841328461385, 'bagging_temperature': 0.09126414089119862, 'border_count': 142, 'scale_pos_weight': 1.9943690100475766}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  74%|███████▍  | 74/100 [04:33<02:32,  5.86s/it]

[I 2025-11-10 12:53:42,808] Trial 73 finished with value: 0.5816485225505443 and parameters: {'learning_rate': 0.010257827998995234, 'depth': 5, 'l2_leaf_reg': 0.9353436836474867, 'subsample': 0.6, 'random_strength': 0.24489097686226594, 'bagging_temperature': 0.030913218568718356, 'border_count': 255, 'scale_pos_weight': 1.7066690470046946}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  75%|███████▌  | 75/100 [04:39<02:24,  5.77s/it]

[I 2025-11-10 12:53:48,384] Trial 74 finished with value: 0.5824476161554814 and parameters: {'learning_rate': 0.012828618337087998, 'depth': 4, 'l2_leaf_reg': 1.695055999650611, 'subsample': 0.5, 'random_strength': 0.04709525068211385, 'bagging_temperature': 0.12012465669240924, 'border_count': 129, 'scale_pos_weight': 3.1910834225215012}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  76%|███████▌  | 76/100 [04:44<02:14,  5.59s/it]

[I 2025-11-10 12:53:53,551] Trial 75 finished with value: 0.5170731707317073 and parameters: {'learning_rate': 0.016988053723335877, 'depth': 4, 'l2_leaf_reg': 0.35508133066617753, 'subsample': 0.6, 'random_strength': 0.4120968980615808, 'bagging_temperature': 0.841139053399403, 'border_count': 110, 'scale_pos_weight': 1.025088870281044}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  77%|███████▋  | 77/100 [04:45<01:36,  4.21s/it]

[I 2025-11-10 12:53:54,526] Trial 76 finished with value: 0.5894599869876382 and parameters: {'learning_rate': 0.24345461189635706, 'depth': 5, 'l2_leaf_reg': 1.59430578595903, 'subsample': 0.8, 'random_strength': 5.182328030420745e-06, 'bagging_temperature': 0.005220596515353643, 'border_count': 145, 'scale_pos_weight': 2.633245324471605}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  78%|███████▊  | 78/100 [04:50<01:34,  4.30s/it]

[I 2025-11-10 12:53:59,031] Trial 77 finished with value: 0.5679862306368331 and parameters: {'learning_rate': 0.01115738710211048, 'depth': 3, 'l2_leaf_reg': 0.24592399217470498, 'subsample': 0.6, 'random_strength': 0.009411223784990168, 'bagging_temperature': 0.1593132942506986, 'border_count': 135, 'scale_pos_weight': 1.3549758156357514}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  79%|███████▉  | 79/100 [04:54<01:28,  4.22s/it]

[I 2025-11-10 12:54:03,065] Trial 78 finished with value: 0.5513211382113821 and parameters: {'learning_rate': 0.014183557974998446, 'depth': 6, 'l2_leaf_reg': 2.9686653719581924, 'subsample': 0.5, 'random_strength': 0.019634992821142956, 'bagging_temperature': 0.4665996663093446, 'border_count': 123, 'scale_pos_weight': 5.642516546070142}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  80%|████████  | 80/100 [04:58<01:27,  4.38s/it]

[I 2025-11-10 12:54:07,812] Trial 79 finished with value: 0.5686274509803921 and parameters: {'learning_rate': 0.012166446106948964, 'depth': 4, 'l2_leaf_reg': 1.1340799546732792, 'subsample': 0.7, 'random_strength': 0.5888009922895249, 'bagging_temperature': 0.6020871741953111, 'border_count': 32, 'scale_pos_weight': 4.18783784276798}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  81%|████████  | 81/100 [05:04<01:27,  4.61s/it]

[I 2025-11-10 12:54:12,958] Trial 80 finished with value: 0.5803806734992679 and parameters: {'learning_rate': 0.015308173917050028, 'depth': 6, 'l2_leaf_reg': 0.4442801714700918, 'subsample': 1.0, 'random_strength': 0.11532596674985655, 'bagging_temperature': 0.05306742215386939, 'border_count': 46, 'scale_pos_weight': 3.617477588261032}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  82%|████████▏ | 82/100 [05:06<01:08,  3.82s/it]

[I 2025-11-10 12:54:14,950] Trial 81 finished with value: 0.5867905741785596 and parameters: {'learning_rate': 0.1306796082228735, 'depth': 8, 'l2_leaf_reg': 0.9611640279236324, 'subsample': 0.6, 'random_strength': 0.025016313855975848, 'bagging_temperature': 0.7198492799472335, 'border_count': 118, 'scale_pos_weight': 2.569825223167334}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  83%|████████▎ | 83/100 [05:08<00:57,  3.37s/it]

[I 2025-11-10 12:54:17,251] Trial 82 finished with value: 0.5922836287799792 and parameters: {'learning_rate': 0.07027651383140078, 'depth': 8, 'l2_leaf_reg': 0.7973936528430411, 'subsample': 0.6, 'random_strength': 0.24162325730683926, 'bagging_temperature': 0.741812094441876, 'border_count': 114, 'scale_pos_weight': 2.3594966534749493}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  84%|████████▍ | 84/100 [05:10<00:48,  3.02s/it]

[I 2025-11-10 12:54:19,461] Trial 83 finished with value: 0.5925663716814159 and parameters: {'learning_rate': 0.05425881774021614, 'depth': 8, 'l2_leaf_reg': 1.000701426726009, 'subsample': 0.6, 'random_strength': 0.0346426877816033, 'bagging_temperature': 0.6684999630071403, 'border_count': 122, 'scale_pos_weight': 2.1570708777655807}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  85%|████████▌ | 85/100 [05:17<01:05,  4.34s/it]

[I 2025-11-10 12:54:26,869] Trial 84 finished with value: 0.5981187155368148 and parameters: {'learning_rate': 0.010085056995836698, 'depth': 7, 'l2_leaf_reg': 1.980140250232036, 'subsample': 0.9, 'random_strength': 0.06869814351178989, 'bagging_temperature': 0.8227721941550625, 'border_count': 101, 'scale_pos_weight': 2.831376886531846}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  86%|████████▌ | 86/100 [05:25<01:15,  5.42s/it]

[I 2025-11-10 12:54:34,831] Trial 85 finished with value: 0.5947503201024328 and parameters: {'learning_rate': 0.010056746615006914, 'depth': 7, 'l2_leaf_reg': 2.039633814894341, 'subsample': 0.9, 'random_strength': 0.07907171770699037, 'bagging_temperature': 0.9385405866577544, 'border_count': 102, 'scale_pos_weight': 2.9202952442803176}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  87%|████████▋ | 87/100 [05:33<01:18,  6.03s/it]

[I 2025-11-10 12:54:42,266] Trial 86 finished with value: 0.5882352941176471 and parameters: {'learning_rate': 0.012940212897674127, 'depth': 7, 'l2_leaf_reg': 3.897293113655453, 'subsample': 0.8, 'random_strength': 9.653479626905678e-05, 'bagging_temperature': 0.8019430018020322, 'border_count': 60, 'scale_pos_weight': 1.8978371329976735}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  88%|████████▊ | 88/100 [05:40<01:16,  6.40s/it]

[I 2025-11-10 12:54:49,529] Trial 87 finished with value: 0.5834614202274824 and parameters: {'learning_rate': 0.011012326115708498, 'depth': 5, 'l2_leaf_reg': 5.566140827176809, 'subsample': 0.9, 'random_strength': 0.3114032648723943, 'bagging_temperature': 0.8571508304004474, 'border_count': 151, 'scale_pos_weight': 3.0946775472004355}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  89%|████████▉ | 89/100 [05:41<00:51,  4.71s/it]

[I 2025-11-10 12:54:50,311] Trial 88 finished with value: 0.5788083357792779 and parameters: {'learning_rate': 0.1663613020325421, 'depth': 3, 'l2_leaf_reg': 0.184172315333629, 'subsample': 0.5, 'random_strength': 0.12605751253131695, 'bagging_temperature': 0.18075321689325377, 'border_count': 91, 'scale_pos_weight': 3.465925461757885}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  90%|█████████ | 90/100 [05:47<00:51,  5.13s/it]

[I 2025-11-10 12:54:56,424] Trial 89 finished with value: 0.5826262626262626 and parameters: {'learning_rate': 0.01384414548151977, 'depth': 4, 'l2_leaf_reg': 0.603458163756063, 'subsample': 1.0, 'random_strength': 0.005682294679581257, 'bagging_temperature': 0.9940792149907298, 'border_count': 84, 'scale_pos_weight': 1.5652655267252096}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  91%|█████████ | 91/100 [05:52<00:45,  5.04s/it]

[I 2025-11-10 12:55:01,241] Trial 90 finished with value: 0.5962854349951124 and parameters: {'learning_rate': 0.01977735825837174, 'depth': 7, 'l2_leaf_reg': 3.0476810361314244, 'subsample': 0.7, 'random_strength': 1.5550454089115569e-06, 'bagging_temperature': 0.09693455758583297, 'border_count': 109, 'scale_pos_weight': 2.8354431196899474}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  92%|█████████▏| 92/100 [05:59<00:44,  5.58s/it]

[I 2025-11-10 12:55:08,095] Trial 91 finished with value: 0.5962974991880481 and parameters: {'learning_rate': 0.011856422808764281, 'depth': 7, 'l2_leaf_reg': 1.315629498358712, 'subsample': 0.7, 'random_strength': 3.289399211691694e-07, 'bagging_temperature': 0.10093207056780691, 'border_count': 107, 'scale_pos_weight': 2.862132919779668}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  93%|█████████▎| 93/100 [06:03<00:36,  5.19s/it]

[I 2025-11-10 12:55:12,365] Trial 92 finished with value: 0.592282958199357 and parameters: {'learning_rate': 0.019813740487575247, 'depth': 7, 'l2_leaf_reg': 3.123837624034562, 'subsample': 0.7, 'random_strength': 4.972852916532296e-07, 'bagging_temperature': 0.1087931326793685, 'border_count': 109, 'scale_pos_weight': 2.849235585148689}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  94%|█████████▍| 94/100 [06:05<00:25,  4.27s/it]

[I 2025-11-10 12:55:14,499] Trial 93 finished with value: 0.5722428042177259 and parameters: {'learning_rate': 0.04402606217901017, 'depth': 7, 'l2_leaf_reg': 2.0555920311785316, 'subsample': 0.7, 'random_strength': 6.47140101482796e-08, 'bagging_temperature': 0.0875269826824302, 'border_count': 95, 'scale_pos_weight': 3.874609708446573}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  95%|█████████▌| 95/100 [06:09<00:20,  4.12s/it]

[I 2025-11-10 12:55:18,275] Trial 94 finished with value: 0.5945767195767195 and parameters: {'learning_rate': 0.024236980586211286, 'depth': 7, 'l2_leaf_reg': 1.3807158078707464, 'subsample': 0.7, 'random_strength': 2.69527069001606e-07, 'bagging_temperature': 0.13330189601467318, 'border_count': 104, 'scale_pos_weight': 2.7214848677962493}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  96%|█████████▌| 96/100 [06:15<00:18,  4.60s/it]

[I 2025-11-10 12:55:23,970] Trial 95 finished with value: 0.5875077303648732 and parameters: {'learning_rate': 0.01652511301353603, 'depth': 8, 'l2_leaf_reg': 5.552730663067303, 'subsample': 0.8, 'random_strength': 8.890610082894512e-07, 'bagging_temperature': 0.7892489606149957, 'border_count': 113, 'scale_pos_weight': 3.230442045723411}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  97%|█████████▋| 97/100 [06:17<00:12,  4.04s/it]

[I 2025-11-10 12:55:26,716] Trial 96 finished with value: 0.5933216168717047 and parameters: {'learning_rate': 0.03134962678714193, 'depth': 7, 'l2_leaf_reg': 0.1458539456644234, 'subsample': 0.7, 'random_strength': 2.1044149283339682e-06, 'bagging_temperature': 0.2129806202951865, 'border_count': 128, 'scale_pos_weight': 2.257136830488093}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  98%|█████████▊| 98/100 [06:23<00:08,  4.43s/it]

[I 2025-11-10 12:55:32,063] Trial 97 finished with value: 0.5976391231028668 and parameters: {'learning_rate': 0.018022536068975113, 'depth': 7, 'l2_leaf_reg': 2.4961654148918226, 'subsample': 0.7, 'random_strength': 1.2640078144978489e-05, 'bagging_temperature': 0.05558101201670897, 'border_count': 116, 'scale_pos_weight': 2.524851995252126}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187:  99%|█████████▉| 99/100 [06:31<00:05,  5.55s/it]

[I 2025-11-10 12:55:40,231] Trial 98 finished with value: 0.5945945945945946 and parameters: {'learning_rate': 0.011722651131210722, 'depth': 8, 'l2_leaf_reg': 0.3035834498158361, 'subsample': 0.7, 'random_strength': 3.0244426454653993e-05, 'bagging_temperature': 0.06724048934367913, 'border_count': 137, 'scale_pos_weight': 2.528068824282113}. Best is trial 39 with value: 0.6021872863978127.


Best trial: 39. Best value: 0.602187: 100%|██████████| 100/100 [06:37<00:00,  3.98s/it]

[I 2025-11-10 12:55:46,766] Trial 99 finished with value: 0.5860148514851485 and parameters: {'learning_rate': 0.01056762156000304, 'depth': 9, 'l2_leaf_reg': 0.11116318860041133, 'subsample': 0.8, 'random_strength': 1.397799542205658e-05, 'bagging_temperature': 0.27360809514225526, 'border_count': 116, 'scale_pos_weight': 3.4583098585874525}. Best is trial 39 with value: 0.6021872863978127.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6022
  Best Hyperparameters:
    learning_rate: 0.042059298117722826
    depth: 6
    l2_leaf_reg: 2.6676457775670617
    subsample: 0.5
    random_strength: 0.07041736275037083
    bagging_temperature: 0.8299484798553904
    border_count: 35
    scale_pos_weight: 2.3543968662300707





In [13]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.042059298117722826, 'depth': 6, 'l2_leaf_reg': 2.6676457775670617, 'subsample': 0.5, 'random_strength': 0.07041736275037083, 'bagging_temperature': 0.8299484798553904, 'border_count': 35, 'scale_pos_weight': 2.3543968662300707}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.6022

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.90      0.81      0.85      4165
   Class 1.0       0.52      0.71      0.60      1235

    accuracy                           0.78      5400
   macro avg       0.71      0.76      0.73      5400
weighted avg       0.82      0.78      0.79      5400



In [14]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6022.cbm")

print("Done.")

Saving best_model...
Done.


In [15]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 83

Top 10 Most Important Features:
                     feature  importance
14                 liab_prct   23.422263
49     recovery_case_clarity    6.895557
52      highrisk_site_binary    3.970960
46             is_single_car    3.845630
13       witness_present_ind    2.992513
45  is_multi_vehicle_unclear    2.951461
25       in_network_bodyshop    2.888566
53            high_risk_site    2.612414
48       has_recovery_target    2.430656
22            vehicle_weight    2.138397

Bottom 10 Least Important Features:
              feature  importance
66     veteran_driver    0.052804
43      police_binary    0.047522
62  middle_age_driver    0.035613
36          is_friday    0.028452
59       young_driver    0.023122
38         claim_hour    0.000000
64      novice_driver    0.000000
60      senior_driver    0.000000
39          rush_hour    0.000000
40         late_night    0.000000

Keeping top 66 features and removing bottom 100.


In [16]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            0
2       4655051            0
3       6728725            1
4       9848460            1


In [17]:
prediction.to_csv("results/catboost_6022_prediction.csv", index=False)