In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc5_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [8]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [9]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [11]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 123
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [13]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-09 13:55:35,151] A new study created in memory with name: no-name-ad2394d0-113c-4af9-a6f2-622a76c737a1



2. Starting Optuna study...


Best trial: 0. Best value: 0.513761:   1%|          | 1/100 [00:03<05:01,  3.04s/it]

[I 2025-11-09 13:55:38,193] Trial 0 finished with value: 0.5137614678899083 and parameters: {'learning_rate': 0.04399414140994055, 'depth': 8, 'l2_leaf_reg': 0.01752493467327922, 'subsample': 0.9, 'random_strength': 0.0014595330664878767, 'bagging_temperature': 0.8597467653415687, 'border_count': 51, 'scale_pos_weight': 1.0899928448611056}. Best is trial 0 with value: 0.5137614678899083.


Best trial: 1. Best value: 0.531682:   2%|▏         | 2/100 [00:07<05:53,  3.60s/it]

[I 2025-11-09 13:55:42,189] Trial 1 finished with value: 0.531681753215817 and parameters: {'learning_rate': 0.010870544818371738, 'depth': 5, 'l2_leaf_reg': 0.00328829891656304, 'subsample': 0.6, 'random_strength': 0.00098174306807767, 'bagging_temperature': 0.2728542364568849, 'border_count': 107, 'scale_pos_weight': 7.228042727599194}. Best is trial 1 with value: 0.531681753215817.


Best trial: 2. Best value: 0.596801:   3%|▎         | 3/100 [00:08<04:07,  2.55s/it]

[I 2025-11-09 13:55:43,483] Trial 2 finished with value: 0.5968005223636957 and parameters: {'learning_rate': 0.11992901611654633, 'depth': 6, 'l2_leaf_reg': 1.0245064630564518, 'subsample': 0.8, 'random_strength': 0.0020419248778626295, 'bagging_temperature': 0.8797840859730776, 'border_count': 207, 'scale_pos_weight': 2.652379424622503}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   4%|▍         | 4/100 [00:12<05:10,  3.24s/it]

[I 2025-11-09 13:55:47,778] Trial 3 finished with value: 0.5499190501888829 and parameters: {'learning_rate': 0.06908734142193082, 'depth': 10, 'l2_leaf_reg': 0.04530716941182683, 'subsample': 0.7, 'random_strength': 0.012959741602483857, 'bagging_temperature': 0.4821040490374522, 'border_count': 102, 'scale_pos_weight': 5.919457916869646}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   5%|▌         | 5/100 [00:15<04:46,  3.01s/it]

[I 2025-11-09 13:55:50,388] Trial 4 finished with value: 0.5339735894357743 and parameters: {'learning_rate': 0.026302378490573503, 'depth': 7, 'l2_leaf_reg': 0.04463846597011004, 'subsample': 0.8, 'random_strength': 2.279091731456199e-08, 'bagging_temperature': 0.11825444616180147, 'border_count': 224, 'scale_pos_weight': 7.645484290921883}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   6%|▌         | 6/100 [00:17<04:11,  2.68s/it]

[I 2025-11-09 13:55:52,427] Trial 5 finished with value: 0.5477050025786487 and parameters: {'learning_rate': 0.07645461260956132, 'depth': 8, 'l2_leaf_reg': 0.15196667679809917, 'subsample': 0.7, 'random_strength': 9.257305584139078e-07, 'bagging_temperature': 0.7768819052747699, 'border_count': 34, 'scale_pos_weight': 5.60374581117585}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   7%|▋         | 7/100 [00:20<04:17,  2.77s/it]

[I 2025-11-09 13:55:55,388] Trial 6 finished with value: 0.5706634930080333 and parameters: {'learning_rate': 0.22081496263625783, 'depth': 9, 'l2_leaf_reg': 0.0015878623777449263, 'subsample': 0.8, 'random_strength': 1.738012195928883e-07, 'bagging_temperature': 0.4454147427202855, 'border_count': 244, 'scale_pos_weight': 3.4172805725260496}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   8%|▊         | 8/100 [00:22<03:53,  2.54s/it]

[I 2025-11-09 13:55:57,436] Trial 7 finished with value: 0.5772432932469935 and parameters: {'learning_rate': 0.09726688183606039, 'depth': 8, 'l2_leaf_reg': 0.013189106674449535, 'subsample': 0.7, 'random_strength': 2.241320787211984e-06, 'bagging_temperature': 0.6158867493927276, 'border_count': 192, 'scale_pos_weight': 3.1692322715988914}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:   9%|▉         | 9/100 [00:25<04:22,  2.88s/it]

[I 2025-11-09 13:56:01,058] Trial 8 finished with value: 0.5623402442487929 and parameters: {'learning_rate': 0.04981274556216114, 'depth': 9, 'l2_leaf_reg': 0.0028697314586173532, 'subsample': 0.9, 'random_strength': 1.7090674174506762e-07, 'bagging_temperature': 0.5457562048334181, 'border_count': 158, 'scale_pos_weight': 4.952099590785759}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  10%|█         | 10/100 [00:27<03:44,  2.49s/it]

[I 2025-11-09 13:56:02,673] Trial 9 finished with value: 0.5202626216889291 and parameters: {'learning_rate': 0.04308284805158615, 'depth': 4, 'l2_leaf_reg': 0.0030711947716400107, 'subsample': 1.0, 'random_strength': 0.00035689284827310214, 'bagging_temperature': 0.8828420658820446, 'border_count': 151, 'scale_pos_weight': 9.252192700192774}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  11%|█         | 11/100 [00:28<03:02,  2.05s/it]

[I 2025-11-09 13:56:03,724] Trial 10 finished with value: 0.5340751043115438 and parameters: {'learning_rate': 0.24465446263558271, 'depth': 3, 'l2_leaf_reg': 3.403362694910058, 'subsample': 0.5, 'random_strength': 0.5676677315657829, 'bagging_temperature': 0.9708488493893473, 'border_count': 190, 'scale_pos_weight': 1.0605350017466968}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  12%|█▏        | 12/100 [00:29<02:40,  1.82s/it]

[I 2025-11-09 13:56:05,026] Trial 11 finished with value: 0.588011417697431 and parameters: {'learning_rate': 0.13322385796141278, 'depth': 6, 'l2_leaf_reg': 0.952966166213059, 'subsample': 0.6, 'random_strength': 6.606254218055755e-06, 'bagging_temperature': 0.673991331757474, 'border_count': 207, 'scale_pos_weight': 2.828044890792338}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  13%|█▎        | 13/100 [00:31<02:25,  1.67s/it]

[I 2025-11-09 13:56:06,336] Trial 12 finished with value: 0.5931876606683805 and parameters: {'learning_rate': 0.13815781449305106, 'depth': 6, 'l2_leaf_reg': 1.4009971895486402, 'subsample': 0.5, 'random_strength': 2.690884718960373e-05, 'bagging_temperature': 0.7135061106633555, 'border_count': 214, 'scale_pos_weight': 2.773157818430909}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  14%|█▍        | 14/100 [00:32<02:17,  1.60s/it]

[I 2025-11-09 13:56:07,787] Trial 13 finished with value: 0.586218487394958 and parameters: {'learning_rate': 0.15467204863264009, 'depth': 6, 'l2_leaf_reg': 0.742242421822503, 'subsample': 0.5, 'random_strength': 3.173816748176424e-05, 'bagging_temperature': 0.7232127753440294, 'border_count': 249, 'scale_pos_weight': 2.472502457409208}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  15%|█▌        | 15/100 [00:34<02:10,  1.53s/it]

[I 2025-11-09 13:56:09,152] Trial 14 finished with value: 0.5677665666757568 and parameters: {'learning_rate': 0.13215926839676498, 'depth': 5, 'l2_leaf_reg': 9.641333171377793, 'subsample': 0.6, 'random_strength': 0.02791814151478171, 'bagging_temperature': 0.9611867519268492, 'border_count': 177, 'scale_pos_weight': 4.223153243013291}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  16%|█▌        | 16/100 [00:35<02:01,  1.44s/it]

[I 2025-11-09 13:56:10,396] Trial 15 finished with value: 0.5849602313810557 and parameters: {'learning_rate': 0.2888901306009943, 'depth': 5, 'l2_leaf_reg': 0.5861841988677673, 'subsample': 0.9, 'random_strength': 0.00012843915013853585, 'bagging_temperature': 0.788624603364698, 'border_count': 217, 'scale_pos_weight': 1.987370711491197}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  17%|█▋        | 17/100 [00:40<03:32,  2.56s/it]

[I 2025-11-09 13:56:15,537] Trial 16 finished with value: 0.575954738330976 and parameters: {'learning_rate': 0.026662552397644364, 'depth': 7, 'l2_leaf_reg': 2.506387262906843, 'subsample': 0.8, 'random_strength': 0.009050306794461381, 'bagging_temperature': 0.36137734363231255, 'border_count': 134, 'scale_pos_weight': 4.202977193316692}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  18%|█▊        | 18/100 [00:41<02:56,  2.15s/it]

[I 2025-11-09 13:56:16,751] Trial 17 finished with value: 0.5955137481910275 and parameters: {'learning_rate': 0.16818373928449884, 'depth': 3, 'l2_leaf_reg': 0.23399883400980062, 'subsample': 1.0, 'random_strength': 0.3384414930492306, 'bagging_temperature': 0.5997696938164465, 'border_count': 232, 'scale_pos_weight': 1.998168899919262}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  19%|█▉        | 19/100 [00:42<02:26,  1.81s/it]

[I 2025-11-09 13:56:17,775] Trial 18 finished with value: 0.5720657931419013 and parameters: {'learning_rate': 0.19590330139825896, 'depth': 3, 'l2_leaf_reg': 0.23344077595339038, 'subsample': 1.0, 'random_strength': 0.5798045241038849, 'bagging_temperature': 0.07726865997134935, 'border_count': 243, 'scale_pos_weight': 3.948616114722163}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  20%|██        | 20/100 [00:45<02:44,  2.06s/it]

[I 2025-11-09 13:56:20,417] Trial 19 finished with value: 0.5960644007155635 and parameters: {'learning_rate': 0.09983101476435818, 'depth': 4, 'l2_leaf_reg': 0.3122115706011686, 'subsample': 1.0, 'random_strength': 0.10680292163179363, 'bagging_temperature': 0.23845552163316303, 'border_count': 173, 'scale_pos_weight': 2.113622839423341}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  21%|██        | 21/100 [00:47<02:57,  2.24s/it]

[I 2025-11-09 13:56:23,082] Trial 20 finished with value: 0.5407788390889052 and parameters: {'learning_rate': 0.07869442272722732, 'depth': 4, 'l2_leaf_reg': 8.522962570903124, 'subsample': 0.9, 'random_strength': 0.07194494248186956, 'bagging_temperature': 0.20087646462430053, 'border_count': 127, 'scale_pos_weight': 6.513457611342687}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  22%|██▏       | 22/100 [00:50<02:52,  2.22s/it]

[I 2025-11-09 13:56:25,234] Trial 21 finished with value: 0.5872720506140677 and parameters: {'learning_rate': 0.10255121476261853, 'depth': 4, 'l2_leaf_reg': 0.33246835853921275, 'subsample': 1.0, 'random_strength': 0.14973965387317326, 'bagging_temperature': 0.37009172835916515, 'border_count': 171, 'scale_pos_weight': 1.8888365828848412}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  23%|██▎       | 23/100 [00:51<02:21,  1.84s/it]

[I 2025-11-09 13:56:26,211] Trial 22 finished with value: 0.5866564417177914 and parameters: {'learning_rate': 0.18006477667081347, 'depth': 3, 'l2_leaf_reg': 0.07970433505268168, 'subsample': 1.0, 'random_strength': 0.0033281306974641085, 'bagging_temperature': 0.5748056854470904, 'border_count': 231, 'scale_pos_weight': 1.717296003717625}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  24%|██▍       | 24/100 [00:52<02:14,  1.78s/it]

[I 2025-11-09 13:56:27,826] Trial 23 finished with value: 0.5963136971449223 and parameters: {'learning_rate': 0.103790799616444, 'depth': 4, 'l2_leaf_reg': 0.35174847471749815, 'subsample': 0.9, 'random_strength': 0.13025167185231276, 'bagging_temperature': 0.2364551321823718, 'border_count': 196, 'scale_pos_weight': 2.0468334287378234}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  25%|██▌       | 25/100 [00:53<02:01,  1.62s/it]

[I 2025-11-09 13:56:29,075] Trial 24 finished with value: 0.5593709043250328 and parameters: {'learning_rate': 0.10245516453345319, 'depth': 4, 'l2_leaf_reg': 0.39615359953877105, 'subsample': 0.9, 'random_strength': 0.059259791574866064, 'bagging_temperature': 0.2478918668733181, 'border_count': 195, 'scale_pos_weight': 4.900855646571122}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  26%|██▌       | 26/100 [00:55<02:07,  1.72s/it]

[I 2025-11-09 13:56:31,043] Trial 25 finished with value: 0.5782493368700266 and parameters: {'learning_rate': 0.06071046709314411, 'depth': 5, 'l2_leaf_reg': 1.949079981737, 'subsample': 0.8, 'random_strength': 0.006090618557566358, 'bagging_temperature': 0.15077991762300322, 'border_count': 168, 'scale_pos_weight': 3.5302202408866634}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  27%|██▋       | 27/100 [00:57<01:57,  1.61s/it]

[I 2025-11-09 13:56:32,394] Trial 26 finished with value: 0.5904379806084922 and parameters: {'learning_rate': 0.10179935173146461, 'depth': 4, 'l2_leaf_reg': 0.12089172451524237, 'subsample': 0.9, 'random_strength': 0.03639175583644051, 'bagging_temperature': 0.01904473981618443, 'border_count': 198, 'scale_pos_weight': 2.480423555672806}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  28%|██▊       | 28/100 [01:00<02:36,  2.17s/it]

[I 2025-11-09 13:56:35,860] Trial 27 finished with value: 0.532516493873704 and parameters: {'learning_rate': 0.025688610587782566, 'depth': 5, 'l2_leaf_reg': 3.735114102614618, 'subsample': 0.8, 'random_strength': 0.13992692721768238, 'bagging_temperature': 0.3269259104316284, 'border_count': 179, 'scale_pos_weight': 1.0694831158241154}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  29%|██▉       | 29/100 [01:02<02:22,  2.00s/it]

[I 2025-11-09 13:56:37,483] Trial 28 finished with value: 0.5133139404788544 and parameters: {'learning_rate': 0.05564384421100505, 'depth': 6, 'l2_leaf_reg': 0.5011251462159538, 'subsample': 0.9, 'random_strength': 0.0005754917438501037, 'bagging_temperature': 0.42457295033712805, 'border_count': 114, 'scale_pos_weight': 9.694585152452941}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  30%|███       | 30/100 [01:05<02:46,  2.37s/it]

[I 2025-11-09 13:56:40,717] Trial 29 finished with value: 0.5802371541501976 and parameters: {'learning_rate': 0.03977301147770645, 'depth': 7, 'l2_leaf_reg': 0.02305974908210485, 'subsample': 1.0, 'random_strength': 0.0030990746576061387, 'bagging_temperature': 0.2121388528818421, 'border_count': 82, 'scale_pos_weight': 1.7345062527440906}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 2. Best value: 0.596801:  31%|███       | 31/100 [01:07<02:34,  2.23s/it]

[I 2025-11-09 13:56:42,629] Trial 30 finished with value: 0.5606583488186886 and parameters: {'learning_rate': 0.08545314272265639, 'depth': 4, 'l2_leaf_reg': 1.1821914856886164, 'subsample': 0.9, 'random_strength': 0.01785293455160446, 'bagging_temperature': 0.30906243657457366, 'border_count': 141, 'scale_pos_weight': 4.977277495454207}. Best is trial 2 with value: 0.5968005223636957.


Best trial: 31. Best value: 0.602044:  32%|███▏      | 32/100 [01:08<02:11,  1.93s/it]

[I 2025-11-09 13:56:43,843] Trial 31 finished with value: 0.6020444131124427 and parameters: {'learning_rate': 0.15804097887036375, 'depth': 3, 'l2_leaf_reg': 0.22946926051906394, 'subsample': 1.0, 'random_strength': 0.8852579089082552, 'bagging_temperature': 0.8538903062862053, 'border_count': 207, 'scale_pos_weight': 2.1219229945474476}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  33%|███▎      | 33/100 [01:10<02:01,  1.82s/it]

[I 2025-11-09 13:56:45,393] Trial 32 finished with value: 0.5848981222532961 and parameters: {'learning_rate': 0.12085629962246607, 'depth': 3, 'l2_leaf_reg': 0.18417192860992768, 'subsample': 1.0, 'random_strength': 0.851488448383831, 'bagging_temperature': 0.899009830840009, 'border_count': 204, 'scale_pos_weight': 1.5669960214169059}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  34%|███▍      | 34/100 [01:16<03:30,  3.19s/it]

[I 2025-11-09 13:56:51,786] Trial 33 finished with value: 0.5925163062135256 and parameters: {'learning_rate': 0.013221658209464292, 'depth': 5, 'l2_leaf_reg': 0.08147793331803566, 'subsample': 0.9, 'random_strength': 0.0014443008507338078, 'bagging_temperature': 0.7978687510481237, 'border_count': 161, 'scale_pos_weight': 2.330424161079075}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  35%|███▌      | 35/100 [01:18<03:10,  2.93s/it]

[I 2025-11-09 13:56:54,112] Trial 34 finished with value: 0.5897435897435898 and parameters: {'learning_rate': 0.06338553994362688, 'depth': 3, 'l2_leaf_reg': 0.036318261457686046, 'subsample': 1.0, 'random_strength': 0.14991634233988035, 'bagging_temperature': 0.85548082942981, 'border_count': 183, 'scale_pos_weight': 3.0497178778417577}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  36%|███▌      | 36/100 [01:19<02:30,  2.35s/it]

[I 2025-11-09 13:56:55,110] Trial 35 finished with value: 0.5729166666666666 and parameters: {'learning_rate': 0.11526504690605681, 'depth': 4, 'l2_leaf_reg': 0.01193449178475579, 'subsample': 0.8, 'random_strength': 0.22538293764957865, 'bagging_temperature': 0.9966120973564259, 'border_count': 221, 'scale_pos_weight': 3.577332850553567}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  37%|███▋      | 37/100 [01:21<02:11,  2.08s/it]

[I 2025-11-09 13:56:56,575] Trial 36 finished with value: 0.5260223048327137 and parameters: {'learning_rate': 0.08049060427919937, 'depth': 5, 'l2_leaf_reg': 0.2803078172228021, 'subsample': 0.9, 'random_strength': 0.9496976890980392, 'bagging_temperature': 0.13724677909567917, 'border_count': 74, 'scale_pos_weight': 8.027343650469845}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  38%|███▊      | 38/100 [01:25<02:51,  2.76s/it]

[I 2025-11-09 13:57:00,925] Trial 37 finished with value: 0.5318699873364289 and parameters: {'learning_rate': 0.2187960696739805, 'depth': 10, 'l2_leaf_reg': 0.11690924505647654, 'subsample': 1.0, 'random_strength': 0.051907908487565255, 'bagging_temperature': 0.5053980497868044, 'border_count': 229, 'scale_pos_weight': 1.438278715092532}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  39%|███▉      | 39/100 [01:28<02:53,  2.85s/it]

[I 2025-11-09 13:57:03,966] Trial 38 finished with value: 0.589804994868286 and parameters: {'learning_rate': 0.15449896805191812, 'depth': 9, 'l2_leaf_reg': 0.6963403325731083, 'subsample': 0.7, 'random_strength': 0.00020279187165038153, 'bagging_temperature': 0.01232254435520802, 'border_count': 207, 'scale_pos_weight': 2.444026294185959}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  40%|████      | 40/100 [01:29<02:15,  2.26s/it]

[I 2025-11-09 13:57:04,858] Trial 39 finished with value: 0.5371819960861057 and parameters: {'learning_rate': 0.2759591687584365, 'depth': 4, 'l2_leaf_reg': 0.05618567137204364, 'subsample': 0.8, 'random_strength': 0.011530562417653323, 'bagging_temperature': 0.659675214953243, 'border_count': 158, 'scale_pos_weight': 6.214234353892359}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  41%|████      | 41/100 [01:31<02:06,  2.14s/it]

[I 2025-11-09 13:57:06,707] Trial 40 finished with value: 0.5770114942528736 and parameters: {'learning_rate': 0.07088570650012202, 'depth': 7, 'l2_leaf_reg': 0.43489450371325167, 'subsample': 0.9, 'random_strength': 1.2282563786363745e-08, 'bagging_temperature': 0.4209941763235813, 'border_count': 186, 'scale_pos_weight': 3.7634051598981744}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  42%|████▏     | 42/100 [01:32<01:49,  1.89s/it]

[I 2025-11-09 13:57:08,032] Trial 41 finished with value: 0.5743243243243243 and parameters: {'learning_rate': 0.17168277081137479, 'depth': 3, 'l2_leaf_reg': 0.20508693364040723, 'subsample': 1.0, 'random_strength': 0.27859486355870633, 'bagging_temperature': 0.8324265985786765, 'border_count': 235, 'scale_pos_weight': 1.3817948539759621}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  43%|████▎     | 43/100 [01:33<01:30,  1.59s/it]

[I 2025-11-09 13:57:08,927] Trial 42 finished with value: 0.5928292509762159 and parameters: {'learning_rate': 0.19766533474196066, 'depth': 3, 'l2_leaf_reg': 0.15703475111972212, 'subsample': 1.0, 'random_strength': 0.2537204897462575, 'bagging_temperature': 0.9229774727242097, 'border_count': 211, 'scale_pos_weight': 2.0835366875895684}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  44%|████▍     | 44/100 [01:34<01:20,  1.44s/it]

[I 2025-11-09 13:57:10,009] Trial 43 finished with value: 0.5843446601941747 and parameters: {'learning_rate': 0.15515978259194088, 'depth': 3, 'l2_leaf_reg': 0.997150957052875, 'subsample': 1.0, 'random_strength': 0.4122715825354162, 'bagging_temperature': 0.7474870756201748, 'border_count': 239, 'scale_pos_weight': 3.2435931502832442}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  45%|████▌     | 45/100 [01:36<01:22,  1.50s/it]

[I 2025-11-09 13:57:11,642] Trial 44 finished with value: 0.5916230366492147 and parameters: {'learning_rate': 0.11942086939020895, 'depth': 4, 'l2_leaf_reg': 1.6125816272776599, 'subsample': 1.0, 'random_strength': 0.07885408484917672, 'bagging_temperature': 0.5812684212807471, 'border_count': 222, 'scale_pos_weight': 2.738958047487846}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  46%|████▌     | 46/100 [01:37<01:19,  1.48s/it]

[I 2025-11-09 13:57:13,070] Trial 45 finished with value: 0.597420704078076 and parameters: {'learning_rate': 0.09365703018375914, 'depth': 3, 'l2_leaf_reg': 0.2821527648333196, 'subsample': 0.9, 'random_strength': 0.01959313809372556, 'bagging_temperature': 0.48472509267710184, 'border_count': 193, 'scale_pos_weight': 2.2016124513376756}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  47%|████▋     | 47/100 [01:40<01:31,  1.72s/it]

[I 2025-11-09 13:57:15,372] Trial 46 finished with value: 0.5920554854981085 and parameters: {'learning_rate': 0.08479797133551613, 'depth': 8, 'l2_leaf_reg': 0.3695823503101011, 'subsample': 0.8, 'random_strength': 0.02571762480562662, 'bagging_temperature': 0.4611636618168361, 'border_count': 195, 'scale_pos_weight': 3.02680023925335}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  48%|████▊     | 48/100 [01:42<01:32,  1.78s/it]

[I 2025-11-09 13:57:17,291] Trial 47 finished with value: 0.5288232491662697 and parameters: {'learning_rate': 0.09167797459250313, 'depth': 4, 'l2_leaf_reg': 0.7794953462267145, 'subsample': 0.7, 'random_strength': 0.003281466430182408, 'bagging_temperature': 0.2426949081038722, 'border_count': 170, 'scale_pos_weight': 1.0377787207510512}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  49%|████▉     | 49/100 [01:43<01:21,  1.59s/it]

[I 2025-11-09 13:57:18,428] Trial 48 finished with value: 0.5648524235039264 and parameters: {'learning_rate': 0.13982542639033185, 'depth': 3, 'l2_leaf_reg': 0.006198064739895732, 'subsample': 0.9, 'random_strength': 0.0007320546179376013, 'bagging_temperature': 0.6508067651836633, 'border_count': 201, 'scale_pos_weight': 4.603903261431625}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  50%|█████     | 50/100 [01:44<01:16,  1.53s/it]

[I 2025-11-09 13:57:19,810] Trial 49 finished with value: 0.5979882067291016 and parameters: {'learning_rate': 0.10861340553625137, 'depth': 5, 'l2_leaf_reg': 5.377501653423105, 'subsample': 0.8, 'random_strength': 0.0063951876223146764, 'bagging_temperature': 0.5276453450638597, 'border_count': 186, 'scale_pos_weight': 2.2576094340011434}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  51%|█████     | 51/100 [01:46<01:25,  1.75s/it]

[I 2025-11-09 13:57:22,075] Trial 50 finished with value: 0.5930807248764415 and parameters: {'learning_rate': 0.05007629543576406, 'depth': 6, 'l2_leaf_reg': 1.9195953879840537, 'subsample': 0.8, 'random_strength': 0.004872036767744095, 'bagging_temperature': 0.5443512737599332, 'border_count': 32, 'scale_pos_weight': 2.7129684271508228}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  52%|█████▏    | 52/100 [01:48<01:17,  1.62s/it]

[I 2025-11-09 13:57:23,407] Trial 51 finished with value: 0.5949640287769784 and parameters: {'learning_rate': 0.10748020120462976, 'depth': 5, 'l2_leaf_reg': 5.47870647818789, 'subsample': 0.7, 'random_strength': 0.01841416961552037, 'bagging_temperature': 0.07433777699035321, 'border_count': 188, 'scale_pos_weight': 2.0775421491525905}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  53%|█████▎    | 53/100 [01:49<01:12,  1.55s/it]

[I 2025-11-09 13:57:24,796] Trial 52 finished with value: 0.5930470347648262 and parameters: {'learning_rate': 0.12751701618126804, 'depth': 6, 'l2_leaf_reg': 5.247877119416355, 'subsample': 0.8, 'random_strength': 0.0014643729438714019, 'bagging_temperature': 0.38829433671270264, 'border_count': 176, 'scale_pos_weight': 2.3488158871921736}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  54%|█████▍    | 54/100 [01:51<01:14,  1.61s/it]

[I 2025-11-09 13:57:26,539] Trial 53 finished with value: 0.5750209555741828 and parameters: {'learning_rate': 0.0949565050622955, 'depth': 4, 'l2_leaf_reg': 0.562247828971531, 'subsample': 0.9, 'random_strength': 0.09249520986219258, 'bagging_temperature': 0.2949515928841109, 'border_count': 147, 'scale_pos_weight': 1.3975940015951873}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  55%|█████▌    | 55/100 [01:53<01:14,  1.66s/it]

[I 2025-11-09 13:57:28,327] Trial 54 finished with value: 0.5981045981045982 and parameters: {'learning_rate': 0.07074416321551621, 'depth': 5, 'l2_leaf_reg': 0.2713803409450355, 'subsample': 0.7, 'random_strength': 0.00916059832900291, 'bagging_temperature': 0.49304593388073725, 'border_count': 255, 'scale_pos_weight': 2.1991998613480703}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  56%|█████▌    | 56/100 [01:54<01:14,  1.70s/it]

[I 2025-11-09 13:57:30,099] Trial 55 finished with value: 0.5861325115562404 and parameters: {'learning_rate': 0.06979929724374807, 'depth': 5, 'l2_leaf_reg': 0.1416433031933851, 'subsample': 0.6, 'random_strength': 0.010200744282660589, 'bagging_temperature': 0.515070379999202, 'border_count': 251, 'scale_pos_weight': 3.1543652864311174}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  57%|█████▋    | 57/100 [01:56<01:06,  1.54s/it]

[I 2025-11-09 13:57:31,285] Trial 56 finished with value: 0.5865324103209566 and parameters: {'learning_rate': 0.13684885817200565, 'depth': 6, 'l2_leaf_reg': 0.001149927204401284, 'subsample': 0.7, 'random_strength': 7.408951753775968e-05, 'bagging_temperature': 0.710507495192193, 'border_count': 216, 'scale_pos_weight': 2.8199436269466167}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  58%|█████▊    | 58/100 [01:58<01:08,  1.64s/it]

[I 2025-11-09 13:57:33,156] Trial 57 finished with value: 0.5879204892966361 and parameters: {'learning_rate': 0.07096573184457224, 'depth': 5, 'l2_leaf_reg': 2.80134736469021, 'subsample': 0.7, 'random_strength': 7.522506334883837e-08, 'bagging_temperature': 0.6249421326539089, 'border_count': 255, 'scale_pos_weight': 1.7901565513075597}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  59%|█████▉    | 59/100 [02:00<01:18,  1.91s/it]

[I 2025-11-09 13:57:35,707] Trial 58 finished with value: 0.5747583854462763 and parameters: {'learning_rate': 0.04395309940110025, 'depth': 6, 'l2_leaf_reg': 0.06670738333348836, 'subsample': 0.7, 'random_strength': 0.002007621711592859, 'bagging_temperature': 0.47355248231417957, 'border_count': 226, 'scale_pos_weight': 4.199385794774076}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  60%|██████    | 60/100 [02:01<01:07,  1.68s/it]

[I 2025-11-09 13:57:36,848] Trial 59 finished with value: 0.5224018475750577 and parameters: {'learning_rate': 0.11272903134026162, 'depth': 5, 'l2_leaf_reg': 0.2641847579601091, 'subsample': 0.8, 'random_strength': 0.00033839757511120296, 'bagging_temperature': 0.5363304591962308, 'border_count': 211, 'scale_pos_weight': 8.424965656101197}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  61%|██████    | 61/100 [02:04<01:15,  1.94s/it]

[I 2025-11-09 13:57:39,382] Trial 60 finished with value: 0.5536261491317671 and parameters: {'learning_rate': 0.03636135006174943, 'depth': 3, 'l2_leaf_reg': 0.09942499819822168, 'subsample': 0.6, 'random_strength': 0.009095669904406434, 'bagging_temperature': 0.9378104375043633, 'border_count': 191, 'scale_pos_weight': 5.571128259649739}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  62%|██████▏   | 62/100 [02:05<01:09,  1.83s/it]

[I 2025-11-09 13:57:40,951] Trial 61 finished with value: 0.5913461538461539 and parameters: {'learning_rate': 0.09417189175954994, 'depth': 4, 'l2_leaf_reg': 0.38680457712468047, 'subsample': 0.8, 'random_strength': 0.03178355321567478, 'bagging_temperature': 0.1964603191274492, 'border_count': 199, 'scale_pos_weight': 2.2799270077589813}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  63%|██████▎   | 63/100 [02:08<01:16,  2.06s/it]

[I 2025-11-09 13:57:43,543] Trial 62 finished with value: 0.5922190201729106 and parameters: {'learning_rate': 0.062455480912047355, 'depth': 4, 'l2_leaf_reg': 0.2749956175242949, 'subsample': 0.9, 'random_strength': 0.038123015380048156, 'bagging_temperature': 0.4054072552741255, 'border_count': 163, 'scale_pos_weight': 2.0956418157638477}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  64%|██████▍   | 64/100 [02:10<01:10,  1.97s/it]

[I 2025-11-09 13:57:45,309] Trial 63 finished with value: 0.5950960901259112 and parameters: {'learning_rate': 0.07875064082213543, 'depth': 3, 'l2_leaf_reg': 0.8888455940695418, 'subsample': 0.9, 'random_strength': 0.11739210846264021, 'bagging_temperature': 0.1702388084759569, 'border_count': 172, 'scale_pos_weight': 2.5743285744362785}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  65%|██████▌   | 65/100 [02:11<01:00,  1.73s/it]

[I 2025-11-09 13:57:46,465] Trial 64 finished with value: 0.5694799658994032 and parameters: {'learning_rate': 0.14523311317012433, 'depth': 5, 'l2_leaf_reg': 0.19228612516708884, 'subsample': 0.7, 'random_strength': 0.005124751853566554, 'bagging_temperature': 0.36714018181761343, 'border_count': 152, 'scale_pos_weight': 1.3553048385280242}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  66%|██████▌   | 66/100 [02:12<00:57,  1.71s/it]

[I 2025-11-09 13:57:48,124] Trial 65 finished with value: 0.5890826383623957 and parameters: {'learning_rate': 0.08851962130512615, 'depth': 4, 'l2_leaf_reg': 0.6127007261913323, 'subsample': 0.8, 'random_strength': 0.5669313889477628, 'bagging_temperature': 0.2664419690990952, 'border_count': 186, 'scale_pos_weight': 1.788405359289742}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  67%|██████▋   | 67/100 [02:14<00:51,  1.57s/it]

[I 2025-11-09 13:57:49,392] Trial 66 finished with value: 0.5797872340425532 and parameters: {'learning_rate': 0.11126290247563415, 'depth': 5, 'l2_leaf_reg': 1.1110867225186738, 'subsample': 0.9, 'random_strength': 6.952125692061186e-06, 'bagging_temperature': 0.3318559234258252, 'border_count': 180, 'scale_pos_weight': 3.478828956070549}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  68%|██████▊   | 68/100 [02:16<00:52,  1.63s/it]

[I 2025-11-09 13:57:51,162] Trial 67 finished with value: 0.5923295454545454 and parameters: {'learning_rate': 0.10105801808456502, 'depth': 7, 'l2_leaf_reg': 0.4819532823117484, 'subsample': 0.8, 'random_strength': 0.06002733348025075, 'bagging_temperature': 0.10888952579599215, 'border_count': 243, 'scale_pos_weight': 2.1866316872648466}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  69%|██████▉   | 69/100 [02:17<00:47,  1.54s/it]

[I 2025-11-09 13:57:52,478] Trial 68 finished with value: 0.5784274990122481 and parameters: {'learning_rate': 0.1907910070241315, 'depth': 6, 'l2_leaf_reg': 0.30363684641627847, 'subsample': 1.0, 'random_strength': 0.2180474228782231, 'bagging_temperature': 0.830360203211886, 'border_count': 204, 'scale_pos_weight': 1.6304591241978508}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  70%|███████   | 70/100 [02:18<00:43,  1.46s/it]

[I 2025-11-09 13:57:53,759] Trial 69 finished with value: 0.5932481153720092 and parameters: {'learning_rate': 0.12498651952007392, 'depth': 3, 'l2_leaf_reg': 0.042787720588457666, 'subsample': 1.0, 'random_strength': 0.016398185511223157, 'bagging_temperature': 0.8673973394824994, 'border_count': 218, 'scale_pos_weight': 2.6410380061859318}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  71%|███████   | 71/100 [02:24<01:21,  2.79s/it]

[I 2025-11-09 13:57:59,664] Trial 70 finished with value: 0.5661538461538461 and parameters: {'learning_rate': 0.012242038270742954, 'depth': 4, 'l2_leaf_reg': 7.325760213531859, 'subsample': 0.9, 'random_strength': 0.006354760113556712, 'bagging_temperature': 0.44422206603370323, 'border_count': 130, 'scale_pos_weight': 1.243631436697211}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  72%|███████▏  | 72/100 [02:25<01:03,  2.25s/it]

[I 2025-11-09 13:58:00,646] Trial 71 finished with value: 0.5383684337932704 and parameters: {'learning_rate': 0.1657535968747515, 'depth': 3, 'l2_leaf_reg': 0.21415736972844365, 'subsample': 1.0, 'random_strength': 0.11294594538735159, 'bagging_temperature': 0.6294697221178797, 'border_count': 231, 'scale_pos_weight': 6.974881586480135}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  73%|███████▎  | 73/100 [02:26<00:51,  1.91s/it]

[I 2025-11-09 13:58:01,751] Trial 72 finished with value: 0.5879716100112066 and parameters: {'learning_rate': 0.15561866861750456, 'depth': 3, 'l2_leaf_reg': 0.09833833559578095, 'subsample': 1.0, 'random_strength': 0.36115499600153655, 'bagging_temperature': 0.5820145489160385, 'border_count': 245, 'scale_pos_weight': 1.842850355801414}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  74%|███████▍  | 74/100 [02:27<00:41,  1.59s/it]

[I 2025-11-09 13:58:02,600] Trial 73 finished with value: 0.5859473023839398 and parameters: {'learning_rate': 0.22346829619850794, 'depth': 3, 'l2_leaf_reg': 0.15419062143427323, 'subsample': 1.0, 'random_strength': 0.9742148368888556, 'bagging_temperature': 0.7580833638895457, 'border_count': 209, 'scale_pos_weight': 2.9488782548579096}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  75%|███████▌  | 75/100 [02:29<00:43,  1.72s/it]

[I 2025-11-09 13:58:04,638] Trial 74 finished with value: 0.5991129307403616 and parameters: {'learning_rate': 0.05785779398576467, 'depth': 4, 'l2_leaf_reg': 0.33605488931257466, 'subsample': 1.0, 'random_strength': 0.4738035204357345, 'bagging_temperature': 0.6000495164836376, 'border_count': 235, 'scale_pos_weight': 2.3769661710317744}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  76%|███████▌  | 76/100 [02:32<00:47,  1.98s/it]

[I 2025-11-09 13:58:07,208] Trial 75 finished with value: 0.5959019146792073 and parameters: {'learning_rate': 0.05675358287969091, 'depth': 4, 'l2_leaf_reg': 1.2925782407453568, 'subsample': 0.9, 'random_strength': 0.6043735697942819, 'bagging_temperature': 0.6910896346233276, 'border_count': 237, 'scale_pos_weight': 2.470303019752966}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  77%|███████▋  | 77/100 [02:33<00:43,  1.87s/it]

[I 2025-11-09 13:58:08,825] Trial 76 finished with value: 0.5848428835489834 and parameters: {'learning_rate': 0.07585318739414906, 'depth': 4, 'l2_leaf_reg': 0.3803163334758641, 'subsample': 1.0, 'random_strength': 0.03645980268367839, 'bagging_temperature': 0.8985038867665879, 'border_count': 192, 'scale_pos_weight': 1.9201182812939717}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  78%|███████▊  | 78/100 [02:35<00:42,  1.94s/it]

[I 2025-11-09 13:58:10,930] Trial 77 finished with value: 0.5817961899002116 and parameters: {'learning_rate': 0.05272085737148034, 'depth': 5, 'l2_leaf_reg': 0.7207047644061023, 'subsample': 0.7, 'random_strength': 0.1881828905929442, 'bagging_temperature': 0.5312746316894498, 'border_count': 199, 'scale_pos_weight': 3.32042358256098}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  79%|███████▉  | 79/100 [02:37<00:39,  1.86s/it]

[I 2025-11-09 13:58:12,597] Trial 78 finished with value: 0.5742857142857143 and parameters: {'learning_rate': 0.06505148713195301, 'depth': 4, 'l2_leaf_reg': 0.5030204126781747, 'subsample': 1.0, 'random_strength': 0.45216076565769037, 'bagging_temperature': 0.49443965976662185, 'border_count': 43, 'scale_pos_weight': 3.7893226440862406}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  80%|████████  | 80/100 [02:38<00:35,  1.75s/it]

[I 2025-11-09 13:58:14,104] Trial 79 finished with value: 0.5820063694267515 and parameters: {'learning_rate': 0.08423641522294377, 'depth': 5, 'l2_leaf_reg': 0.3247663367464556, 'subsample': 0.8, 'random_strength': 0.019338215140840206, 'bagging_temperature': 0.8138664149622752, 'border_count': 225, 'scale_pos_weight': 1.6119203988003208}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  81%|████████  | 81/100 [02:40<00:32,  1.72s/it]

[I 2025-11-09 13:58:15,742] Trial 80 finished with value: 0.5921555015619576 and parameters: {'learning_rate': 0.07530993423059222, 'depth': 4, 'l2_leaf_reg': 0.130372201548914, 'subsample': 0.9, 'random_strength': 0.05135535112831793, 'bagging_temperature': 0.4514606654297682, 'border_count': 182, 'scale_pos_weight': 2.259687216033822}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  82%|████████▏ | 82/100 [02:42<00:33,  1.85s/it]

[I 2025-11-09 13:58:17,897] Trial 81 finished with value: 0.59251968503937 and parameters: {'learning_rate': 0.056726965415728865, 'depth': 4, 'l2_leaf_reg': 1.6309949000594517, 'subsample': 0.9, 'random_strength': 0.46610727890460135, 'bagging_temperature': 0.6012183645556097, 'border_count': 237, 'scale_pos_weight': 2.6281840472056865}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  83%|████████▎ | 83/100 [02:46<00:38,  2.28s/it]

[I 2025-11-09 13:58:21,198] Trial 82 finished with value: 0.5958583834335337 and parameters: {'learning_rate': 0.03614341708717438, 'depth': 4, 'l2_leaf_reg': 3.8107353842068292, 'subsample': 0.9, 'random_strength': 0.16300190601069262, 'bagging_temperature': 0.6929752090312457, 'border_count': 239, 'scale_pos_weight': 2.52596775821937}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  84%|████████▍ | 84/100 [02:48<00:38,  2.42s/it]

[I 2025-11-09 13:58:23,923] Trial 83 finished with value: 0.5930599369085173 and parameters: {'learning_rate': 0.04701942082107072, 'depth': 5, 'l2_leaf_reg': 1.2028967390866556, 'subsample': 1.0, 'random_strength': 0.8990568853592926, 'bagging_temperature': 0.5638939637858129, 'border_count': 249, 'scale_pos_weight': 2.9594484909414596}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  85%|████████▌ | 85/100 [02:50<00:34,  2.27s/it]

[I 2025-11-09 13:58:25,858] Trial 84 finished with value: 0.5898181818181818 and parameters: {'learning_rate': 0.05821812201979598, 'depth': 4, 'l2_leaf_reg': 0.17126914044316813, 'subsample': 0.9, 'random_strength': 0.08615475500234508, 'bagging_temperature': 0.6761710003609371, 'border_count': 212, 'scale_pos_weight': 1.9805517299437851}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  86%|████████▌ | 86/100 [02:52<00:29,  2.10s/it]

[I 2025-11-09 13:58:27,569] Trial 85 finished with value: 0.5982905982905983 and parameters: {'learning_rate': 0.10051448023683583, 'depth': 4, 'l2_leaf_reg': 2.37601112329896, 'subsample': 0.8, 'random_strength': 0.29086202921110954, 'bagging_temperature': 0.7756590753054271, 'border_count': 222, 'scale_pos_weight': 2.3144921363275985}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  87%|████████▋ | 87/100 [02:54<00:26,  2.02s/it]

[I 2025-11-09 13:58:29,399] Trial 86 finished with value: 0.5596816976127321 and parameters: {'learning_rate': 0.10454814226819112, 'depth': 3, 'l2_leaf_reg': 2.969749064643583, 'subsample': 0.8, 'random_strength': 0.0021850383895062717, 'bagging_temperature': 0.9774504399313779, 'border_count': 219, 'scale_pos_weight': 1.2176627037309515}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  88%|████████▊ | 88/100 [02:55<00:23,  1.93s/it]

[I 2025-11-09 13:58:31,130] Trial 87 finished with value: 0.5819506272764063 and parameters: {'learning_rate': 0.12311951589476783, 'depth': 7, 'l2_leaf_reg': 4.7330268441978856, 'subsample': 0.8, 'random_strength': 0.24511434465289336, 'bagging_temperature': 0.7491674846079589, 'border_count': 195, 'scale_pos_weight': 1.564248574559313}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  89%|████████▉ | 89/100 [02:57<00:21,  1.93s/it]

[I 2025-11-09 13:58:33,057] Trial 88 finished with value: 0.5996545768566494 and parameters: {'learning_rate': 0.09493579587022292, 'depth': 6, 'l2_leaf_reg': 7.608917118495796, 'subsample': 0.8, 'random_strength': 0.3149539665246895, 'bagging_temperature': 0.2272346059854137, 'border_count': 204, 'scale_pos_weight': 2.3072932152521997}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  90%|█████████ | 90/100 [02:59<00:17,  1.74s/it]

[I 2025-11-09 13:58:34,341] Trial 89 finished with value: 0.546425845840753 and parameters: {'learning_rate': 0.09501064771814682, 'depth': 6, 'l2_leaf_reg': 7.877723075862087, 'subsample': 0.8, 'random_strength': 0.0009299109662407923, 'bagging_temperature': 0.9215952306524835, 'border_count': 206, 'scale_pos_weight': 5.261600832064204}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  91%|█████████ | 91/100 [03:00<00:14,  1.66s/it]

[I 2025-11-09 13:58:35,833] Trial 90 finished with value: 0.5911298154742635 and parameters: {'learning_rate': 0.1317772481665065, 'depth': 6, 'l2_leaf_reg': 2.304821711574925, 'subsample': 0.8, 'random_strength': 0.6514995961352182, 'bagging_temperature': 0.86160046102646, 'border_count': 223, 'scale_pos_weight': 2.8284280515156555}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  92%|█████████▏| 92/100 [03:02<00:13,  1.73s/it]

[I 2025-11-09 13:58:37,709] Trial 91 finished with value: 0.5947275922671353 and parameters: {'learning_rate': 0.11083870029796847, 'depth': 6, 'l2_leaf_reg': 5.96374141239219, 'subsample': 0.8, 'random_strength': 0.13851855698542148, 'bagging_temperature': 0.23405375594526281, 'border_count': 204, 'scale_pos_weight': 2.2782032471283924}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  93%|█████████▎| 93/100 [03:04<00:12,  1.80s/it]

[I 2025-11-09 13:58:39,694] Trial 92 finished with value: 0.5944005743000718 and parameters: {'learning_rate': 0.0852135844069856, 'depth': 6, 'l2_leaf_reg': 3.4694044944136406, 'subsample': 0.7, 'random_strength': 0.27059184872948433, 'bagging_temperature': 0.2865095892663278, 'border_count': 213, 'scale_pos_weight': 2.083156814628869}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  94%|█████████▍| 94/100 [03:06<00:10,  1.71s/it]

[I 2025-11-09 13:58:41,197] Trial 93 finished with value: 0.5957592339261286 and parameters: {'learning_rate': 0.10099736779471534, 'depth': 5, 'l2_leaf_reg': 9.292152102260074, 'subsample': 0.8, 'random_strength': 0.02411753898586732, 'bagging_temperature': 0.3255198331011106, 'border_count': 176, 'scale_pos_weight': 2.3715768709561313}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  95%|█████████▌| 95/100 [03:08<00:09,  1.94s/it]

[I 2025-11-09 13:58:43,653] Trial 94 finished with value: 0.5828571428571429 and parameters: {'learning_rate': 0.06726057486684367, 'depth': 7, 'l2_leaf_reg': 0.9294622119858504, 'subsample': 0.5, 'random_strength': 0.012250562558589669, 'bagging_temperature': 0.22056325323382492, 'border_count': 165, 'scale_pos_weight': 1.9091968544467437}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  96%|█████████▌| 96/100 [03:10<00:07,  1.84s/it]

[I 2025-11-09 13:58:45,261] Trial 95 finished with value: 0.5906515580736544 and parameters: {'learning_rate': 0.0908441383913417, 'depth': 5, 'l2_leaf_reg': 0.2448932466588933, 'subsample': 0.7, 'random_strength': 0.04804585565731807, 'bagging_temperature': 0.19367673789428552, 'border_count': 227, 'scale_pos_weight': 2.146129584926778}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  97%|█████████▋| 97/100 [03:11<00:04,  1.66s/it]

[I 2025-11-09 13:58:46,521] Trial 96 finished with value: 0.582547886895713 and parameters: {'learning_rate': 0.12006694947538783, 'depth': 3, 'l2_leaf_reg': 4.479346333401534, 'subsample': 0.8, 'random_strength': 0.3280891050974492, 'bagging_temperature': 0.8394137848935163, 'border_count': 187, 'scale_pos_weight': 3.185945767552493}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  98%|█████████▊| 98/100 [03:12<00:03,  1.60s/it]

[I 2025-11-09 13:58:47,979] Trial 97 finished with value: 0.575487012987013 and parameters: {'learning_rate': 0.0766161856967292, 'depth': 4, 'l2_leaf_reg': 0.4443869759374218, 'subsample': 0.8, 'random_strength': 0.008208450300138078, 'bagging_temperature': 0.7909338589023995, 'border_count': 255, 'scale_pos_weight': 1.5178163249815055}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044:  99%|█████████▉| 99/100 [03:13<00:01,  1.46s/it]

[I 2025-11-09 13:58:49,115] Trial 98 finished with value: 0.589873417721519 and parameters: {'learning_rate': 0.14824169356379488, 'depth': 5, 'l2_leaf_reg': 2.269428706482178, 'subsample': 1.0, 'random_strength': 0.07933371016480331, 'bagging_temperature': 0.25918917438730416, 'border_count': 195, 'scale_pos_weight': 2.8475789519257306}. Best is trial 31 with value: 0.6020444131124427.


Best trial: 31. Best value: 0.602044: 100%|██████████| 100/100 [03:15<00:00,  1.95s/it]

[I 2025-11-09 13:58:50,296] Trial 99 finished with value: 0.5979933110367893 and parameters: {'learning_rate': 0.10805861885699221, 'depth': 4, 'l2_leaf_reg': 6.931946231901031, 'subsample': 0.7, 'random_strength': 0.17322136286018303, 'bagging_temperature': 0.8865748038506424, 'border_count': 202, 'scale_pos_weight': 2.479648321568238}. Best is trial 31 with value: 0.6020444131124427.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6020
  Best Hyperparameters:
    learning_rate: 0.15804097887036375
    depth: 3
    l2_leaf_reg: 0.22946926051906394
    subsample: 1.0
    random_strength: 0.8852579089082552
    bagging_temperature: 0.8538903062862053
    border_count: 207
    scale_pos_weight: 2.1219229945474476





In [14]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.15804097887036375, 'depth': 3, 'l2_leaf_reg': 0.22946926051906394, 'subsample': 1.0, 'random_strength': 0.8852579089082552, 'bagging_temperature': 0.8538903062862053, 'border_count': 207, 'scale_pos_weight': 2.1219229945474476}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.6020

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.90      0.82      0.86      4165
   Class 1.0       0.53      0.69      0.60      1235

    accuracy                           0.79      5400
   macro avg       0.72      0.76      0.73      5400
weighted avg       0.82      0.79      0.80      5400



In [15]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6020.cbm")

print("Done.")

Saving best_model...
Done.


In [16]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 161

Top 10 Most Important Features:
                        feature  importance
58            liab_prct_squared   14.854113
152  recovery_feasibility_score   10.996216
62                 liab_inverse    8.988131
61                liab_prct_log    6.934850
60               liab_prct_sqrt    5.346997
63         liab_inverse_squared    5.229173
51           witness_x_multicar    4.556612
46                liab_x_police    3.137194
53          multicar_x_highrisk    2.836996
59              liab_prct_cubed    2.705797

Bottom 10 Least Important Features:
                feature  importance
107   very_high_mileage         0.0
97    mid_price_vehicle         0.0
105        high_mileage         0.0
104  is_compact_vehicle         0.0
42        police_binary         0.0
102   is_medium_vehicle         0.0
101       medium_weight         0.0
100       light_vehicle         0.0
98      economy_vehicle         0.0
160     annual_income_z       

In [17]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            0
2       4655051            0
3       6728725            1
4       9848460            1


In [18]:
prediction.to_csv("results/catboost_6020_prediction.csv", index=False)