In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc3_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Fit complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [8]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [9]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [11]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 42
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [12]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-07 11:14:58,418] A new study created in memory with name: no-name-364a0fd2-32e5-4414-966c-3af07ef260b4



2. Starting Optuna study...


Best trial: 0. Best value: 0.54098:   1%|          | 1/100 [00:02<03:46,  2.29s/it]

[I 2025-11-07 11:15:00,716] Trial 0 finished with value: 0.540979571745016 and parameters: {'learning_rate': 0.02485482372534955, 'depth': 5, 'l2_leaf_reg': 0.02103275816497219, 'subsample': 0.9, 'random_strength': 0.019497543532053744, 'bagging_temperature': 0.30798298383762224, 'border_count': 250, 'scale_pos_weight': 6.23023418473191}. Best is trial 0 with value: 0.540979571745016.


Best trial: 0. Best value: 0.54098:   2%|▏         | 2/100 [00:05<04:36,  2.83s/it]

[I 2025-11-07 11:15:03,918] Trial 1 finished with value: 0.5332366449117718 and parameters: {'learning_rate': 0.01949762381261014, 'depth': 7, 'l2_leaf_reg': 1.5229462360823927, 'subsample': 0.5, 'random_strength': 3.016784344175113e-08, 'bagging_temperature': 0.3805006927636555, 'border_count': 117, 'scale_pos_weight': 7.143981626222278}. Best is trial 0 with value: 0.540979571745016.


Best trial: 0. Best value: 0.54098:   3%|▎         | 3/100 [00:06<03:29,  2.16s/it]

[I 2025-11-07 11:15:05,277] Trial 2 finished with value: 0.5267707082833133 and parameters: {'learning_rate': 0.15689929828049576, 'depth': 7, 'l2_leaf_reg': 0.0023184613407175626, 'subsample': 0.9, 'random_strength': 1.9302964576173697e-08, 'bagging_temperature': 0.7473287274591744, 'border_count': 126, 'scale_pos_weight': 7.145697006331965}. Best is trial 0 with value: 0.540979571745016.


Best trial: 0. Best value: 0.54098:   4%|▍         | 4/100 [00:16<07:55,  4.95s/it]

[I 2025-11-07 11:15:14,505] Trial 3 finished with value: 0.5382173382173382 and parameters: {'learning_rate': 0.011028405028481464, 'depth': 10, 'l2_leaf_reg': 0.03064146745386029, 'subsample': 0.6, 'random_strength': 0.5615356324583837, 'bagging_temperature': 0.03229659585360667, 'border_count': 242, 'scale_pos_weight': 8.998696700874152}. Best is trial 0 with value: 0.540979571745016.


Best trial: 4. Best value: 0.558707:   5%|▌         | 5/100 [00:17<05:38,  3.57s/it]

[I 2025-11-07 11:15:15,620] Trial 4 finished with value: 0.558706599522926 and parameters: {'learning_rate': 0.130714854321783, 'depth': 6, 'l2_leaf_reg': 0.13135356927503952, 'subsample': 0.7, 'random_strength': 6.926018146779139e-06, 'bagging_temperature': 0.2555623206074389, 'border_count': 44, 'scale_pos_weight': 4.735589670801926}. Best is trial 4 with value: 0.558706599522926.


Best trial: 4. Best value: 0.558707:   6%|▌         | 6/100 [00:18<04:08,  2.64s/it]

[I 2025-11-07 11:15:16,471] Trial 5 finished with value: 0.5363614399613433 and parameters: {'learning_rate': 0.27538187930413444, 'depth': 5, 'l2_leaf_reg': 0.4199436783828399, 'subsample': 1.0, 'random_strength': 0.7833183251901533, 'bagging_temperature': 0.8837777305383209, 'border_count': 137, 'scale_pos_weight': 6.584149253733651}. Best is trial 4 with value: 0.558706599522926.


Best trial: 6. Best value: 0.598336:   7%|▋         | 7/100 [00:21<04:27,  2.88s/it]

[I 2025-11-07 11:15:19,832] Trial 6 finished with value: 0.5983361064891847 and parameters: {'learning_rate': 0.03319699996561389, 'depth': 8, 'l2_leaf_reg': 1.5195170335147516, 'subsample': 0.6, 'random_strength': 4.371210767413567e-06, 'bagging_temperature': 0.06342959169584128, 'border_count': 233, 'scale_pos_weight': 2.677716673060539}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:   8%|▊         | 8/100 [00:23<04:05,  2.67s/it]

[I 2025-11-07 11:15:22,065] Trial 7 finished with value: 0.5832572298325723 and parameters: {'learning_rate': 0.06052643819273355, 'depth': 6, 'l2_leaf_reg': 0.9835797075807072, 'subsample': 0.5, 'random_strength': 0.00011932307090422826, 'bagging_temperature': 0.912821500615557, 'border_count': 101, 'scale_pos_weight': 3.527404242644011}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:   9%|▉         | 9/100 [00:28<04:53,  3.23s/it]

[I 2025-11-07 11:15:26,521] Trial 8 finished with value: 0.5654421768707483 and parameters: {'learning_rate': 0.02096341274853782, 'depth': 8, 'l2_leaf_reg': 1.1437134367203956, 'subsample': 0.8, 'random_strength': 0.0006687767216584444, 'bagging_temperature': 0.26016831512178373, 'border_count': 229, 'scale_pos_weight': 5.070619177801692}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  10%|█         | 10/100 [00:28<03:44,  2.49s/it]

[I 2025-11-07 11:15:27,356] Trial 9 finished with value: 0.5337821591728781 and parameters: {'learning_rate': 0.24433822099146552, 'depth': 5, 'l2_leaf_reg': 6.852547308643222, 'subsample': 0.7, 'random_strength': 9.456917604614853e-06, 'bagging_temperature': 0.6341479574286454, 'border_count': 238, 'scale_pos_weight': 6.875850299639351}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  11%|█         | 11/100 [00:31<03:31,  2.37s/it]

[I 2025-11-07 11:15:29,468] Trial 10 finished with value: 0.5830670926517572 and parameters: {'learning_rate': 0.05462101042929931, 'depth': 3, 'l2_leaf_reg': 6.074508900834217, 'subsample': 0.6, 'random_strength': 6.284736543387426e-07, 'bagging_temperature': 0.03399247245376474, 'border_count': 190, 'scale_pos_weight': 1.5770444307959588}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  12%|█▏        | 12/100 [00:35<04:14,  2.90s/it]

[I 2025-11-07 11:15:33,555] Trial 11 finished with value: 0.586362062997577 and parameters: {'learning_rate': 0.06439138548555229, 'depth': 10, 'l2_leaf_reg': 0.401609059531969, 'subsample': 0.5, 'random_strength': 0.00033619457728453306, 'bagging_temperature': 0.9673194317244262, 'border_count': 82, 'scale_pos_weight': 2.674771847731421}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  13%|█▎        | 13/100 [00:40<05:03,  3.49s/it]

[I 2025-11-07 11:15:38,426] Trial 12 finished with value: 0.4985279685966634 and parameters: {'learning_rate': 0.04847123553136911, 'depth': 10, 'l2_leaf_reg': 0.22938810178626787, 'subsample': 0.6, 'random_strength': 0.002320148886091207, 'bagging_temperature': 0.5156229574337459, 'border_count': 64, 'scale_pos_weight': 1.0771669174872152}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  14%|█▍        | 14/100 [00:42<04:41,  3.28s/it]

[I 2025-11-07 11:15:41,197] Trial 13 finished with value: 0.580814717477004 and parameters: {'learning_rate': 0.07983451804535659, 'depth': 9, 'l2_leaf_reg': 0.025130937457741045, 'subsample': 0.5, 'random_strength': 2.9725474821091677e-06, 'bagging_temperature': 0.9991326340250155, 'border_count': 180, 'scale_pos_weight': 2.947552549157342}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  15%|█▌        | 15/100 [00:47<05:09,  3.64s/it]

[I 2025-11-07 11:15:45,675] Trial 14 finished with value: 0.5978225008248103 and parameters: {'learning_rate': 0.03437405662224441, 'depth': 9, 'l2_leaf_reg': 2.9721485898983837, 'subsample': 0.6, 'random_strength': 0.0002127673311043226, 'bagging_temperature': 0.7024281019700767, 'border_count': 81, 'scale_pos_weight': 2.796199420349436}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  16%|█▌        | 16/100 [00:50<05:07,  3.66s/it]

[I 2025-11-07 11:15:49,380] Trial 15 finished with value: 0.5796329188869154 and parameters: {'learning_rate': 0.03334611579567011, 'depth': 8, 'l2_leaf_reg': 3.6024698286355523, 'subsample': 0.7, 'random_strength': 0.008747563702641134, 'bagging_temperature': 0.7323999026648224, 'border_count': 170, 'scale_pos_weight': 3.7996796996666293}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 6. Best value: 0.598336:  17%|█▋        | 17/100 [01:02<08:24,  6.08s/it]

[I 2025-11-07 11:16:01,093] Trial 16 finished with value: 0.5787370103916867 and parameters: {'learning_rate': 0.010527538819439795, 'depth': 8, 'l2_leaf_reg': 2.345801427462217, 'subsample': 0.6, 'random_strength': 2.426063170320564e-07, 'bagging_temperature': 0.5075375290511999, 'border_count': 210, 'scale_pos_weight': 1.6596015201656744}. Best is trial 6 with value: 0.5983361064891847.


Best trial: 17. Best value: 0.598827:  18%|█▊        | 18/100 [01:07<07:49,  5.72s/it]

[I 2025-11-07 11:16:05,988] Trial 17 finished with value: 0.598827181786823 and parameters: {'learning_rate': 0.03675870503698871, 'depth': 9, 'l2_leaf_reg': 8.985833105936887, 'subsample': 0.8, 'random_strength': 3.267120169373821e-05, 'bagging_temperature': 0.155515627738766, 'border_count': 33, 'scale_pos_weight': 2.492128757541074}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  19%|█▉        | 19/100 [01:12<07:15,  5.37s/it]

[I 2025-11-07 11:16:10,537] Trial 18 finished with value: 0.5695253955037469 and parameters: {'learning_rate': 0.03515203807907126, 'depth': 9, 'l2_leaf_reg': 9.021127492852406, 'subsample': 0.8, 'random_strength': 2.7228594951637223e-05, 'bagging_temperature': 0.13073235237938374, 'border_count': 35, 'scale_pos_weight': 4.5586349809848326}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  20%|██        | 20/100 [01:16<06:42,  5.03s/it]

[I 2025-11-07 11:16:14,767] Trial 19 finished with value: 0.5867530597552196 and parameters: {'learning_rate': 0.014236080899949792, 'depth': 8, 'l2_leaf_reg': 0.0011134337771189285, 'subsample': 0.9, 'random_strength': 5.745185955460369e-07, 'bagging_temperature': 0.17595811264977035, 'border_count': 157, 'scale_pos_weight': 2.226960865858942}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  21%|██        | 21/100 [01:17<04:59,  3.79s/it]

[I 2025-11-07 11:16:15,663] Trial 20 finished with value: 0.5202391904323828 and parameters: {'learning_rate': 0.10423624961117098, 'depth': 3, 'l2_leaf_reg': 0.7714362194153199, 'subsample': 0.8, 'random_strength': 3.8056779540767166e-05, 'bagging_temperature': 0.14906617564977953, 'border_count': 211, 'scale_pos_weight': 8.402492096371498}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  22%|██▏       | 22/100 [01:21<05:04,  3.90s/it]

[I 2025-11-07 11:16:19,836] Trial 21 finished with value: 0.5832097300504301 and parameters: {'learning_rate': 0.036952781370953564, 'depth': 9, 'l2_leaf_reg': 2.8012439358744885, 'subsample': 0.7, 'random_strength': 0.0001595832602709167, 'bagging_temperature': 0.38451357364099004, 'border_count': 41, 'scale_pos_weight': 3.913732703688784}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  23%|██▎       | 23/100 [01:26<05:26,  4.25s/it]

[I 2025-11-07 11:16:24,883] Trial 22 finished with value: 0.5895840157222404 and parameters: {'learning_rate': 0.028911188364674337, 'depth': 9, 'l2_leaf_reg': 2.751436388381239, 'subsample': 0.6, 'random_strength': 2.6332167898128495e-06, 'bagging_temperature': 0.6417555175172338, 'border_count': 72, 'scale_pos_weight': 2.8877991829758347}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  24%|██▍       | 24/100 [01:31<05:42,  4.51s/it]

[I 2025-11-07 11:16:29,999] Trial 23 finished with value: 0.5884923525127458 and parameters: {'learning_rate': 0.041664846463409996, 'depth': 9, 'l2_leaf_reg': 7.9355740496980385, 'subsample': 0.8, 'random_strength': 0.0009731540269992757, 'bagging_temperature': 0.014074518111129114, 'border_count': 99, 'scale_pos_weight': 2.155349775590881}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  25%|██▌       | 25/100 [01:36<05:45,  4.61s/it]

[I 2025-11-07 11:16:34,847] Trial 24 finished with value: 0.5363075462743236 and parameters: {'learning_rate': 0.017961810399660105, 'depth': 7, 'l2_leaf_reg': 0.07738750679913821, 'subsample': 0.7, 'random_strength': 3.187043448219196e-05, 'bagging_temperature': 0.7857017460344655, 'border_count': 58, 'scale_pos_weight': 1.0676572512366898}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  26%|██▌       | 26/100 [01:40<05:22,  4.36s/it]

[I 2025-11-07 11:16:38,611] Trial 25 finished with value: 0.5849289684990735 and parameters: {'learning_rate': 0.024680144372660695, 'depth': 8, 'l2_leaf_reg': 0.5555291436050646, 'subsample': 1.0, 'random_strength': 0.004688410217445917, 'bagging_temperature': 0.10261334089489092, 'border_count': 94, 'scale_pos_weight': 3.385692681976344}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  27%|██▋       | 27/100 [01:44<05:12,  4.28s/it]

[I 2025-11-07 11:16:42,730] Trial 26 finished with value: 0.5512920908379013 and parameters: {'learning_rate': 0.08592050004915999, 'depth': 10, 'l2_leaf_reg': 4.410572224791136, 'subsample': 0.6, 'random_strength': 0.09899097906806407, 'bagging_temperature': 0.6274650410772155, 'border_count': 52, 'scale_pos_weight': 5.952880755426287}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  28%|██▊       | 28/100 [01:47<04:52,  4.06s/it]

[I 2025-11-07 11:16:46,251] Trial 27 finished with value: 0.5745226560273582 and parameters: {'learning_rate': 0.04276176403356708, 'depth': 9, 'l2_leaf_reg': 1.4938949155449142, 'subsample': 0.7, 'random_strength': 1.4677635571992685e-07, 'bagging_temperature': 0.3591549676684501, 'border_count': 32, 'scale_pos_weight': 4.246757274067985}. Best is trial 17 with value: 0.598827181786823.


Best trial: 17. Best value: 0.598827:  29%|██▉       | 29/100 [01:52<05:09,  4.35s/it]

[I 2025-11-07 11:16:51,301] Trial 28 finished with value: 0.5843960990247562 and parameters: {'learning_rate': 0.014888444612294651, 'depth': 7, 'l2_leaf_reg': 0.008836135914828592, 'subsample': 0.8, 'random_strength': 1.7893278547194187e-06, 'bagging_temperature': 0.45617084130782365, 'border_count': 116, 'scale_pos_weight': 1.980762616781567}. Best is trial 17 with value: 0.598827181786823.


Best trial: 29. Best value: 0.600199:  30%|███       | 30/100 [01:57<05:09,  4.41s/it]

[I 2025-11-07 11:16:55,856] Trial 29 finished with value: 0.6001987413050679 and parameters: {'learning_rate': 0.02606147390348148, 'depth': 8, 'l2_leaf_reg': 2.0969273474161754, 'subsample': 0.9, 'random_strength': 0.04387016749261832, 'bagging_temperature': 0.22609000036822002, 'border_count': 151, 'scale_pos_weight': 2.774317452724905}. Best is trial 29 with value: 0.6001987413050679.


Best trial: 29. Best value: 0.600199:  31%|███       | 31/100 [02:00<04:27,  3.88s/it]

[I 2025-11-07 11:16:58,498] Trial 30 finished with value: 0.551954732510288 and parameters: {'learning_rate': 0.024478542147846696, 'depth': 6, 'l2_leaf_reg': 0.21337228424177138, 'subsample': 0.9, 'random_strength': 0.03865709567036796, 'bagging_temperature': 0.2291515775126956, 'border_count': 252, 'scale_pos_weight': 5.426943361180623}. Best is trial 29 with value: 0.6001987413050679.


Best trial: 31. Best value: 0.603374:  32%|███▏      | 32/100 [02:04<04:29,  3.96s/it]

[I 2025-11-07 11:17:02,636] Trial 31 finished with value: 0.603374131657294 and parameters: {'learning_rate': 0.029306960143738885, 'depth': 8, 'l2_leaf_reg': 2.160435718308443, 'subsample': 0.9, 'random_strength': 0.08003284959085531, 'bagging_temperature': 0.08294170454932412, 'border_count': 147, 'scale_pos_weight': 2.8549710208640757}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  33%|███▎      | 33/100 [02:08<04:36,  4.13s/it]

[I 2025-11-07 11:17:07,174] Trial 32 finished with value: 0.5908806995627732 and parameters: {'learning_rate': 0.02820690122908168, 'depth': 8, 'l2_leaf_reg': 2.183217121786536, 'subsample': 0.9, 'random_strength': 0.14983913649438513, 'bagging_temperature': 0.09476967174526646, 'border_count': 148, 'scale_pos_weight': 3.31978880136559}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  34%|███▍      | 34/100 [02:13<04:45,  4.33s/it]

[I 2025-11-07 11:17:11,952] Trial 33 finished with value: 0.5982324949014276 and parameters: {'learning_rate': 0.02028039341470798, 'depth': 7, 'l2_leaf_reg': 1.5679574464528268, 'subsample': 1.0, 'random_strength': 0.03980231196594959, 'bagging_temperature': 0.2078572951543005, 'border_count': 164, 'scale_pos_weight': 2.5534477529735495}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  35%|███▌      | 35/100 [02:20<05:42,  5.26s/it]

[I 2025-11-07 11:17:19,399] Trial 34 finished with value: 0.5814138204924544 and parameters: {'learning_rate': 0.027315855700682647, 'depth': 8, 'l2_leaf_reg': 5.417169820134765, 'subsample': 0.9, 'random_strength': 0.1667837622664848, 'bagging_temperature': 0.0719855835211698, 'border_count': 130, 'scale_pos_weight': 1.7119266111465716}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  36%|███▌      | 36/100 [02:24<05:04,  4.76s/it]

[I 2025-11-07 11:17:22,999] Trial 35 finished with value: 0.5237655745269958 and parameters: {'learning_rate': 0.015506082319601783, 'depth': 7, 'l2_leaf_reg': 0.06991603357110632, 'subsample': 0.9, 'random_strength': 0.010572666724990881, 'bagging_temperature': 0.32733291932771824, 'border_count': 193, 'scale_pos_weight': 9.987439598517687}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  37%|███▋      | 37/100 [02:27<04:30,  4.29s/it]

[I 2025-11-07 11:17:26,185] Trial 36 finished with value: 0.5680044593088072 and parameters: {'learning_rate': 0.022211144594356625, 'depth': 7, 'l2_leaf_reg': 9.94686684475546, 'subsample': 1.0, 'random_strength': 9.175521963953124e-06, 'bagging_temperature': 0.3000309474053505, 'border_count': 146, 'scale_pos_weight': 4.144887865204626}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  38%|███▊      | 38/100 [02:29<03:43,  3.61s/it]

[I 2025-11-07 11:17:28,210] Trial 37 finished with value: 0.5860182370820669 and parameters: {'learning_rate': 0.04325997654457915, 'depth': 4, 'l2_leaf_reg': 0.6755843873810252, 'subsample': 0.8, 'random_strength': 0.3101091244090221, 'bagging_temperature': 0.17226887112183148, 'border_count': 117, 'scale_pos_weight': 3.25267338809679}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  39%|███▉      | 39/100 [02:31<03:06,  3.06s/it]

[I 2025-11-07 11:17:29,977] Trial 38 finished with value: 0.5930232558139535 and parameters: {'learning_rate': 0.05216882381217829, 'depth': 6, 'l2_leaf_reg': 0.00804204753601715, 'subsample': 0.9, 'random_strength': 0.03869346580846738, 'bagging_temperature': 0.002947868768127776, 'border_count': 218, 'scale_pos_weight': 2.3602504434772946}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  40%|████      | 40/100 [02:34<03:01,  3.02s/it]

[I 2025-11-07 11:17:32,901] Trial 39 finished with value: 0.5670019026909486 and parameters: {'learning_rate': 0.0730493673174901, 'depth': 8, 'l2_leaf_reg': 1.6042313854501675, 'subsample': 0.9, 'random_strength': 0.0017441714273245534, 'bagging_temperature': 0.06710593271887352, 'border_count': 126, 'scale_pos_weight': 4.684449767265772}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  41%|████      | 41/100 [02:43<04:38,  4.73s/it]

[I 2025-11-07 11:17:41,625] Trial 40 finished with value: 0.5547119249665029 and parameters: {'learning_rate': 0.018417645464167527, 'depth': 10, 'l2_leaf_reg': 0.3258414691473025, 'subsample': 1.0, 'random_strength': 4.407585798074645e-08, 'bagging_temperature': 0.2822092939342255, 'border_count': 191, 'scale_pos_weight': 1.343089184693957}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  42%|████▏     | 42/100 [02:46<04:05,  4.24s/it]

[I 2025-11-07 11:17:44,715] Trial 41 finished with value: 0.5975526852481305 and parameters: {'learning_rate': 0.030326617835575033, 'depth': 7, 'l2_leaf_reg': 1.1248827340096381, 'subsample': 1.0, 'random_strength': 0.03054024719148601, 'bagging_temperature': 0.18636826008423893, 'border_count': 160, 'scale_pos_weight': 2.5009025670264924}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  43%|████▎     | 43/100 [02:51<04:25,  4.65s/it]

[I 2025-11-07 11:17:50,331] Trial 42 finished with value: 0.5910112359550562 and parameters: {'learning_rate': 0.021483673389667862, 'depth': 8, 'l2_leaf_reg': 1.708070290302072, 'subsample': 1.0, 'random_strength': 0.7762582977715998, 'bagging_temperature': 0.22494398581016375, 'border_count': 171, 'scale_pos_weight': 1.9673960373701844}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  44%|████▍     | 44/100 [02:56<04:13,  4.53s/it]

[I 2025-11-07 11:17:54,568] Trial 43 finished with value: 0.5905830994699096 and parameters: {'learning_rate': 0.019000584038077233, 'depth': 6, 'l2_leaf_reg': 4.583315416430868, 'subsample': 1.0, 'random_strength': 6.418678630411333e-05, 'bagging_temperature': 0.21927990988094503, 'border_count': 140, 'scale_pos_weight': 3.1330639492903356}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  45%|████▌     | 45/100 [03:02<04:36,  5.02s/it]

[I 2025-11-07 11:18:00,742] Trial 44 finished with value: 0.5831842576028623 and parameters: {'learning_rate': 0.013051703965703232, 'depth': 8, 'l2_leaf_reg': 0.9060219725374657, 'subsample': 0.8, 'random_strength': 0.0818611309027063, 'bagging_temperature': 0.12616545885247346, 'border_count': 159, 'scale_pos_weight': 3.671245940233788}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  46%|████▌     | 46/100 [03:04<03:47,  4.21s/it]

[I 2025-11-07 11:18:03,076] Trial 45 finished with value: 0.598404255319149 and parameters: {'learning_rate': 0.039267376801633606, 'depth': 7, 'l2_leaf_reg': 0.12454168869677058, 'subsample': 0.9, 'random_strength': 0.011378793464880037, 'bagging_temperature': 0.4178162787968074, 'border_count': 107, 'scale_pos_weight': 2.6129931479723227}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  47%|████▋     | 47/100 [03:08<03:34,  4.05s/it]

[I 2025-11-07 11:18:06,747] Trial 46 finished with value: 0.5423473234481025 and parameters: {'learning_rate': 0.05883100929797357, 'depth': 9, 'l2_leaf_reg': 0.03897317418478781, 'subsample': 0.9, 'random_strength': 0.010312333699998271, 'bagging_temperature': 0.4338391554769602, 'border_count': 105, 'scale_pos_weight': 8.133417247984312}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  48%|████▊     | 48/100 [03:10<03:03,  3.53s/it]

[I 2025-11-07 11:18:09,047] Trial 47 finished with value: 0.5542359461599367 and parameters: {'learning_rate': 0.047092888210336734, 'depth': 8, 'l2_leaf_reg': 0.014297016137168066, 'subsample': 0.9, 'random_strength': 0.0004779683429497332, 'bagging_temperature': 0.07685230487207806, 'border_count': 237, 'scale_pos_weight': 5.052206323914872}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  49%|████▉     | 49/100 [03:21<04:53,  5.76s/it]

[I 2025-11-07 11:18:20,011] Trial 48 finished with value: 0.583804143126177 and parameters: {'learning_rate': 0.1994149720311645, 'depth': 7, 'l2_leaf_reg': 0.27266256967212665, 'subsample': 0.8, 'random_strength': 0.32219495329082537, 'bagging_temperature': 0.5855118006656115, 'border_count': 180, 'scale_pos_weight': 2.8837754747009203}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  50%|█████     | 50/100 [03:27<04:47,  5.74s/it]

[I 2025-11-07 11:18:25,714] Trial 49 finished with value: 0.5614489003880984 and parameters: {'learning_rate': 0.04025140944658604, 'depth': 9, 'l2_leaf_reg': 0.15381067016853758, 'subsample': 0.5, 'random_strength': 4.1518127909247175e-06, 'bagging_temperature': 0.051228672860356705, 'border_count': 80, 'scale_pos_weight': 1.4372088539546526}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  51%|█████     | 51/100 [03:30<04:05,  5.01s/it]

[I 2025-11-07 11:18:29,005] Trial 50 finished with value: 0.5868081880212282 and parameters: {'learning_rate': 0.03360689735706318, 'depth': 6, 'l2_leaf_reg': 0.4365859025122119, 'subsample': 0.9, 'random_strength': 1.873180277027862e-05, 'bagging_temperature': 0.3424946978570822, 'border_count': 91, 'scale_pos_weight': 1.8774547955051148}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  52%|█████▏    | 52/100 [03:35<04:00,  5.01s/it]

[I 2025-11-07 11:18:34,028] Trial 51 finished with value: 0.593972231628852 and parameters: {'learning_rate': 0.02354637789817348, 'depth': 7, 'l2_leaf_reg': 3.582530897073982, 'subsample': 1.0, 'random_strength': 0.018473021148768937, 'bagging_temperature': 0.25402480627609103, 'border_count': 133, 'scale_pos_weight': 2.5533233178279455}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  53%|█████▎    | 53/100 [03:39<03:34,  4.57s/it]

[I 2025-11-07 11:18:37,561] Trial 52 finished with value: 0.597623089983022 and parameters: {'learning_rate': 0.0308734391339343, 'depth': 7, 'l2_leaf_reg': 1.4550144338251902, 'subsample': 0.8, 'random_strength': 0.004723718993544308, 'bagging_temperature': 0.13815216323525376, 'border_count': 108, 'scale_pos_weight': 2.501411534374488}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  54%|█████▍    | 54/100 [03:42<03:13,  4.21s/it]

[I 2025-11-07 11:18:40,933] Trial 53 finished with value: 0.5804341361879275 and parameters: {'learning_rate': 0.03725139978537528, 'depth': 8, 'l2_leaf_reg': 5.879064289725326, 'subsample': 1.0, 'random_strength': 0.049140375195587366, 'bagging_temperature': 0.4133648946285068, 'border_count': 204, 'scale_pos_weight': 3.542688920728038}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  55%|█████▌    | 55/100 [03:45<02:51,  3.81s/it]

[I 2025-11-07 11:18:43,827] Trial 54 finished with value: 0.5881608103830326 and parameters: {'learning_rate': 0.048314586592853585, 'depth': 8, 'l2_leaf_reg': 2.0904625068857383, 'subsample': 0.9, 'random_strength': 0.408244744132052, 'bagging_temperature': 0.18268065885757512, 'border_count': 151, 'scale_pos_weight': 3.0763557970730857}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  56%|█████▌    | 56/100 [03:49<02:45,  3.75s/it]

[I 2025-11-07 11:18:47,439] Trial 55 finished with value: 0.5707940033314826 and parameters: {'learning_rate': 0.016585335366049742, 'depth': 5, 'l2_leaf_reg': 0.6004820613201364, 'subsample': 0.8, 'random_strength': 0.01792253833617853, 'bagging_temperature': 0.2769826214735019, 'border_count': 169, 'scale_pos_weight': 4.175560597406188}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  57%|█████▋    | 57/100 [03:55<03:14,  4.53s/it]

[I 2025-11-07 11:18:53,785] Trial 56 finished with value: 0.5970042796005706 and parameters: {'learning_rate': 0.02685109285865009, 'depth': 9, 'l2_leaf_reg': 3.4747186706283304, 'subsample': 0.7, 'random_strength': 0.0024718965061171164, 'bagging_temperature': 0.034983443128303704, 'border_count': 180, 'scale_pos_weight': 2.283907074120068}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  58%|█████▊    | 58/100 [04:02<03:44,  5.34s/it]

[I 2025-11-07 11:19:01,017] Trial 57 finished with value: 0.5956175298804781 and parameters: {'learning_rate': 0.01274343628881416, 'depth': 7, 'l2_leaf_reg': 1.2828883650041771, 'subsample': 0.9, 'random_strength': 1.2322180980270748e-06, 'bagging_temperature': 0.1095251966040919, 'border_count': 139, 'scale_pos_weight': 2.7671283747736894}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  59%|█████▉    | 59/100 [04:04<02:52,  4.21s/it]

[I 2025-11-07 11:19:02,572] Trial 58 finished with value: 0.5746994848311391 and parameters: {'learning_rate': 0.06673294316351429, 'depth': 6, 'l2_leaf_reg': 0.9310165020465162, 'subsample': 1.0, 'random_strength': 1.4744205249388945e-05, 'bagging_temperature': 0.5539760751535098, 'border_count': 121, 'scale_pos_weight': 3.839316594303064}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  60%|██████    | 60/100 [04:12<03:35,  5.38s/it]

[I 2025-11-07 11:19:10,698] Trial 59 finished with value: 0.5805354866128347 and parameters: {'learning_rate': 0.020210433810324002, 'depth': 8, 'l2_leaf_reg': 7.494045569701478, 'subsample': 0.9, 'random_strength': 5.101949683755393e-05, 'bagging_temperature': 0.16263846103720495, 'border_count': 65, 'scale_pos_weight': 1.4115061646332667}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  61%|██████    | 61/100 [04:17<03:23,  5.22s/it]

[I 2025-11-07 11:19:15,548] Trial 60 finished with value: 0.5815602836879432 and parameters: {'learning_rate': 0.03796490632438357, 'depth': 10, 'l2_leaf_reg': 0.040381327543700864, 'subsample': 0.8, 'random_strength': 0.06224208842946307, 'bagging_temperature': 0.4730311989190055, 'border_count': 228, 'scale_pos_weight': 2.105731774366991}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  62%|██████▏   | 62/100 [04:22<03:16,  5.16s/it]

[I 2025-11-07 11:19:20,570] Trial 61 finished with value: 0.5970548862115127 and parameters: {'learning_rate': 0.03250270797299149, 'depth': 9, 'l2_leaf_reg': 2.6819951396042487, 'subsample': 0.6, 'random_strength': 0.0001357230031681271, 'bagging_temperature': 0.2012556496280623, 'border_count': 84, 'scale_pos_weight': 2.7497101112813795}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  63%|██████▎   | 63/100 [04:27<03:17,  5.33s/it]

[I 2025-11-07 11:19:26,300] Trial 62 finished with value: 0.5940912010276173 and parameters: {'learning_rate': 0.02581108683563507, 'depth': 9, 'l2_leaf_reg': 1.9929193507474416, 'subsample': 0.5, 'random_strength': 0.0002883840622602424, 'bagging_temperature': 0.821150453605943, 'border_count': 54, 'scale_pos_weight': 3.0055970624328006}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  64%|██████▍   | 64/100 [04:34<03:23,  5.65s/it]

[I 2025-11-07 11:19:32,700] Trial 63 finished with value: 0.597623089983022 and parameters: {'learning_rate': 0.03273086200698259, 'depth': 9, 'l2_leaf_reg': 4.026425157936045, 'subsample': 0.6, 'random_strength': 0.2028264591479798, 'bagging_temperature': 0.7021270678891739, 'border_count': 69, 'scale_pos_weight': 2.6556229616520164}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  65%|██████▌   | 65/100 [04:36<02:47,  4.77s/it]

[I 2025-11-07 11:19:35,422] Trial 64 finished with value: 0.5853221957040573 and parameters: {'learning_rate': 0.04479266508720521, 'depth': 8, 'l2_leaf_reg': 2.8543296265550064, 'subsample': 0.6, 'random_strength': 0.0060946490968076415, 'bagging_temperature': 0.8706739098154105, 'border_count': 113, 'scale_pos_weight': 3.4880890013950245}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  66%|██████▌   | 66/100 [04:43<03:02,  5.37s/it]

[I 2025-11-07 11:19:42,189] Trial 65 finished with value: 0.588917059036581 and parameters: {'learning_rate': 0.0369174300069927, 'depth': 10, 'l2_leaf_reg': 5.500085906469829, 'subsample': 0.7, 'random_strength': 0.0008663271087456384, 'bagging_temperature': 0.24915279602262008, 'border_count': 49, 'scale_pos_weight': 2.230184049512744}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  67%|██████▋   | 67/100 [04:46<02:27,  4.47s/it]

[I 2025-11-07 11:19:44,563] Trial 66 finished with value: 0.5807962529274004 and parameters: {'learning_rate': 0.02934766297574005, 'depth': 7, 'l2_leaf_reg': 0.0037468787624516556, 'subsample': 0.6, 'random_strength': 0.00024078591161325004, 'bagging_temperature': 0.6651233983358523, 'border_count': 39, 'scale_pos_weight': 1.7530217528653438}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  68%|██████▊   | 68/100 [04:48<02:05,  3.92s/it]

[I 2025-11-07 11:19:47,182] Trial 67 finished with value: 0.5339521250610649 and parameters: {'learning_rate': 0.05456601860128829, 'depth': 8, 'l2_leaf_reg': 7.671974202606831, 'subsample': 0.7, 'random_strength': 5.174937562584469e-06, 'bagging_temperature': 0.14079958903841017, 'border_count': 78, 'scale_pos_weight': 7.327214315927473}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  69%|██████▉   | 69/100 [04:54<02:19,  4.50s/it]

[I 2025-11-07 11:19:53,027] Trial 68 finished with value: 0.5693430656934306 and parameters: {'learning_rate': 0.024135515920676153, 'depth': 9, 'l2_leaf_reg': 0.8238964256431621, 'subsample': 0.5, 'random_strength': 0.992543072602495, 'bagging_temperature': 0.3814557966861986, 'border_count': 153, 'scale_pos_weight': 4.390591389679657}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  70%|███████   | 70/100 [04:58<02:06,  4.20s/it]

[I 2025-11-07 11:19:56,542] Trial 69 finished with value: 0.5746527777777778 and parameters: {'learning_rate': 0.022035083846103358, 'depth': 7, 'l2_leaf_reg': 0.4929797307941673, 'subsample': 0.9, 'random_strength': 6.561480543803204e-07, 'bagging_temperature': 0.31209549425312005, 'border_count': 164, 'scale_pos_weight': 3.9190758992662587}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  71%|███████   | 71/100 [05:01<01:59,  4.11s/it]

[I 2025-11-07 11:20:00,425] Trial 70 finished with value: 0.5851364063969896 and parameters: {'learning_rate': 0.04047830350260841, 'depth': 9, 'l2_leaf_reg': 0.12181816161404643, 'subsample': 0.7, 'random_strength': 0.10441637162358779, 'bagging_temperature': 0.03594451947208361, 'border_count': 246, 'scale_pos_weight': 3.3456531447252003}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  72%|███████▏  | 72/100 [05:05<01:48,  3.89s/it]

[I 2025-11-07 11:20:03,803] Trial 71 finished with value: 0.589316093909493 and parameters: {'learning_rate': 0.030859949985666556, 'depth': 7, 'l2_leaf_reg': 1.3044971076468943, 'subsample': 0.8, 'random_strength': 0.002960338008474618, 'bagging_temperature': 0.10307418675798666, 'border_count': 106, 'scale_pos_weight': 2.518488841612174}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  73%|███████▎  | 73/100 [05:08<01:41,  3.75s/it]

[I 2025-11-07 11:20:07,233] Trial 72 finished with value: 0.592871830020562 and parameters: {'learning_rate': 0.027405910440148257, 'depth': 7, 'l2_leaf_reg': 2.835580839309065, 'subsample': 0.8, 'random_strength': 0.025512912083078956, 'bagging_temperature': 0.1452846523807952, 'border_count': 92, 'scale_pos_weight': 2.4177474691886784}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  74%|███████▍  | 74/100 [05:12<01:34,  3.63s/it]

[I 2025-11-07 11:20:10,570] Trial 73 finished with value: 0.5935691318327975 and parameters: {'learning_rate': 0.034839971644681555, 'depth': 8, 'l2_leaf_reg': 1.6430682796070528, 'subsample': 0.8, 'random_strength': 0.001355159621040878, 'bagging_temperature': 0.08291953277117142, 'border_count': 110, 'scale_pos_weight': 3.034034091695573}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  75%|███████▌  | 75/100 [05:16<01:33,  3.72s/it]

[I 2025-11-07 11:20:14,514] Trial 74 finished with value: 0.5560932688077431 and parameters: {'learning_rate': 0.03082202112743882, 'depth': 6, 'l2_leaf_reg': 2.151298447026268, 'subsample': 0.9, 'random_strength': 0.006710854657193066, 'bagging_temperature': 0.2032664408914259, 'border_count': 124, 'scale_pos_weight': 1.2464111735661796}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  76%|███████▌  | 76/100 [05:20<01:34,  3.93s/it]

[I 2025-11-07 11:20:18,919] Trial 75 finished with value: 0.5904088050314465 and parameters: {'learning_rate': 0.025472461060531557, 'depth': 7, 'l2_leaf_reg': 4.902318000381262, 'subsample': 0.6, 'random_strength': 0.013691825387126514, 'bagging_temperature': 0.11637515939147883, 'border_count': 131, 'scale_pos_weight': 1.7135098574658068}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  77%|███████▋  | 77/100 [05:27<01:53,  4.95s/it]

[I 2025-11-07 11:20:26,267] Trial 76 finished with value: 0.5925110132158591 and parameters: {'learning_rate': 0.01737256416101628, 'depth': 8, 'l2_leaf_reg': 1.117767302052192, 'subsample': 0.8, 'random_strength': 0.00391179957002692, 'bagging_temperature': 0.15870724161805866, 'border_count': 141, 'scale_pos_weight': 2.138165853955358}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  78%|███████▊  | 78/100 [05:31<01:42,  4.65s/it]

[I 2025-11-07 11:20:30,205] Trial 77 finished with value: 0.599408478475189 and parameters: {'learning_rate': 0.020792180312367913, 'depth': 7, 'l2_leaf_reg': 0.7483181647760982, 'subsample': 0.9, 'random_strength': 0.0005464579757907919, 'bagging_temperature': 0.012451843691807879, 'border_count': 100, 'scale_pos_weight': 2.7481206110243037}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  79%|███████▉  | 79/100 [05:37<01:41,  4.86s/it]

[I 2025-11-07 11:20:35,548] Trial 78 finished with value: 0.5978623914495658 and parameters: {'learning_rate': 0.01990868133565183, 'depth': 8, 'l2_leaf_reg': 0.34398653803243, 'subsample': 0.9, 'random_strength': 7.567510326525242e-05, 'bagging_temperature': 0.05678319129244061, 'border_count': 87, 'scale_pos_weight': 2.8290786429900905}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  80%|████████  | 80/100 [05:41<01:36,  4.80s/it]

[I 2025-11-07 11:20:40,231] Trial 79 finished with value: 0.5942713251495121 and parameters: {'learning_rate': 0.019106299902899148, 'depth': 8, 'l2_leaf_reg': 0.15496827890650752, 'subsample': 0.9, 'random_strength': 7.955545918048833e-05, 'bagging_temperature': 0.011355208946409458, 'border_count': 100, 'scale_pos_weight': 3.2500137141248855}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  81%|████████  | 81/100 [05:46<01:31,  4.83s/it]

[I 2025-11-07 11:20:45,134] Trial 80 finished with value: 0.5941696691778579 and parameters: {'learning_rate': 0.01627099702290112, 'depth': 7, 'l2_leaf_reg': 0.31778786324673153, 'subsample': 0.9, 'random_strength': 0.0005634492060039038, 'bagging_temperature': 0.05074279601971163, 'border_count': 200, 'scale_pos_weight': 2.8111226648045764}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  82%|████████▏ | 82/100 [05:52<01:31,  5.07s/it]

[I 2025-11-07 11:20:50,753] Trial 81 finished with value: 0.5811175337186898 and parameters: {'learning_rate': 0.02058326219374851, 'depth': 8, 'l2_leaf_reg': 0.07591401792274441, 'subsample': 0.9, 'random_strength': 2.633683786761398e-05, 'bagging_temperature': 0.0004574193009630814, 'border_count': 85, 'scale_pos_weight': 1.964954938988119}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  83%|████████▎ | 83/100 [05:58<01:29,  5.29s/it]

[I 2025-11-07 11:20:56,560] Trial 82 finished with value: 0.5862595419847328 and parameters: {'learning_rate': 0.022626684506422377, 'depth': 9, 'l2_leaf_reg': 0.711782145904611, 'subsample': 0.9, 'random_strength': 0.0001533347490978396, 'bagging_temperature': 0.07528928507262664, 'border_count': 96, 'scale_pos_weight': 3.5753369369129495}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  84%|████████▍ | 84/100 [06:03<01:25,  5.34s/it]

[I 2025-11-07 11:21:02,017] Trial 83 finished with value: 0.5942408376963351 and parameters: {'learning_rate': 0.014325407446644971, 'depth': 8, 'l2_leaf_reg': 0.24651050365074986, 'subsample': 1.0, 'random_strength': 4.8181132709935934e-05, 'bagging_temperature': 0.05086792424408132, 'border_count': 72, 'scale_pos_weight': 2.8904118812200843}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  85%|████████▌ | 85/100 [06:08<01:16,  5.11s/it]

[I 2025-11-07 11:21:06,598] Trial 84 finished with value: 0.5975531229877656 and parameters: {'learning_rate': 0.02788599565216594, 'depth': 8, 'l2_leaf_reg': 0.6009765073182652, 'subsample': 0.9, 'random_strength': 1.1960575060367303e-05, 'bagging_temperature': 0.1038318408338457, 'border_count': 86, 'scale_pos_weight': 3.1823710821884434}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  86%|████████▌ | 86/100 [06:09<00:57,  4.10s/it]

[I 2025-11-07 11:21:08,331] Trial 85 finished with value: 0.5991735537190083 and parameters: {'learning_rate': 0.051021357193517726, 'depth': 4, 'l2_leaf_reg': 0.418520395934916, 'subsample': 1.0, 'random_strength': 8.953656309823138e-05, 'bagging_temperature': 0.5302331940016511, 'border_count': 146, 'scale_pos_weight': 2.285263533585787}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  87%|████████▋ | 87/100 [06:15<00:59,  4.55s/it]

[I 2025-11-07 11:21:13,933] Trial 86 finished with value: 0.6017699115044248 and parameters: {'learning_rate': 0.012036810885350694, 'depth': 4, 'l2_leaf_reg': 0.3669991216478325, 'subsample': 1.0, 'random_strength': 0.00010314606230975998, 'bagging_temperature': 0.5621684385142823, 'border_count': 146, 'scale_pos_weight': 2.3832361328269895}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  88%|████████▊ | 88/100 [06:21<00:58,  4.88s/it]

[I 2025-11-07 11:21:19,585] Trial 87 finished with value: 0.5934759733426868 and parameters: {'learning_rate': 0.012728513443289567, 'depth': 4, 'l2_leaf_reg': 0.19744339855877135, 'subsample': 1.0, 'random_strength': 0.00011175926433781193, 'bagging_temperature': 0.5301406710514701, 'border_count': 146, 'scale_pos_weight': 2.19433517639001}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  89%|████████▉ | 89/100 [06:22<00:43,  3.95s/it]

[I 2025-11-07 11:21:21,355] Trial 88 finished with value: 0.6024340770791075 and parameters: {'learning_rate': 0.05156315703918682, 'depth': 3, 'l2_leaf_reg': 0.43004370449483453, 'subsample': 1.0, 'random_strength': 0.0003492224832164662, 'bagging_temperature': 0.5822159374514119, 'border_count': 155, 'scale_pos_weight': 2.4051104923161866}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  90%|█████████ | 90/100 [06:27<00:40,  4.06s/it]

[I 2025-11-07 11:21:25,682] Trial 89 finished with value: 0.5827709408495435 and parameters: {'learning_rate': 0.01071256745859817, 'depth': 3, 'l2_leaf_reg': 0.4292108253358264, 'subsample': 1.0, 'random_strength': 0.00040773512288359945, 'bagging_temperature': 0.587753018932205, 'border_count': 135, 'scale_pos_weight': 1.592444356680236}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  91%|█████████ | 91/100 [06:29<00:30,  3.44s/it]

[I 2025-11-07 11:21:27,679] Trial 90 finished with value: 0.5876598946576373 and parameters: {'learning_rate': 0.061384424524094955, 'depth': 4, 'l2_leaf_reg': 0.49307454124787337, 'subsample': 1.0, 'random_strength': 0.00020172265575539344, 'bagging_temperature': 0.4945565743423507, 'border_count': 173, 'scale_pos_weight': 1.857101759128327}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  92%|█████████▏| 92/100 [06:30<00:23,  2.93s/it]

[I 2025-11-07 11:21:29,411] Trial 91 finished with value: 0.6001362397820164 and parameters: {'learning_rate': 0.0500555737590809, 'depth': 4, 'l2_leaf_reg': 1.046846940000433, 'subsample': 1.0, 'random_strength': 3.550441839121201e-05, 'bagging_temperature': 0.5995682670650535, 'border_count': 152, 'scale_pos_weight': 2.3807315618074383}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  93%|█████████▎| 93/100 [06:33<00:18,  2.66s/it]

[I 2025-11-07 11:21:31,454] Trial 92 finished with value: 0.6007450050795801 and parameters: {'learning_rate': 0.052181180739940554, 'depth': 4, 'l2_leaf_reg': 1.0682555558816424, 'subsample': 1.0, 'random_strength': 7.463119235586606e-06, 'bagging_temperature': 0.5956362756768624, 'border_count': 153, 'scale_pos_weight': 2.3995333875603175}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  94%|█████████▍| 94/100 [06:35<00:14,  2.48s/it]

[I 2025-11-07 11:21:33,508] Trial 93 finished with value: 0.5971596813301004 and parameters: {'learning_rate': 0.05031389805057703, 'depth': 4, 'l2_leaf_reg': 1.016637236286594, 'subsample': 1.0, 'random_strength': 2.3862663958001673e-05, 'bagging_temperature': 0.6025790149088752, 'border_count': 151, 'scale_pos_weight': 2.288586233198554}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  95%|█████████▌| 95/100 [06:36<00:11,  2.28s/it]

[I 2025-11-07 11:21:35,329] Trial 94 finished with value: 0.5954198473282443 and parameters: {'learning_rate': 0.07824499061812135, 'depth': 5, 'l2_leaf_reg': 0.7904123683924253, 'subsample': 1.0, 'random_strength': 6.098581977972052e-06, 'bagging_temperature': 0.5470006074550438, 'border_count': 159, 'scale_pos_weight': 2.0637725251813825}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  96%|█████████▌| 96/100 [06:38<00:08,  2.14s/it]

[I 2025-11-07 11:21:37,150] Trial 95 finished with value: 0.5981735159817352 and parameters: {'learning_rate': 0.05802671596516084, 'depth': 4, 'l2_leaf_reg': 0.6520217207332349, 'subsample': 1.0, 'random_strength': 4.2820966243426965e-05, 'bagging_temperature': 0.6678751331494746, 'border_count': 154, 'scale_pos_weight': 2.658203780352771}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  97%|█████████▋| 97/100 [06:40<00:06,  2.09s/it]

[I 2025-11-07 11:21:39,111] Trial 96 finished with value: 0.6006066734074823 and parameters: {'learning_rate': 0.04631240637779152, 'depth': 3, 'l2_leaf_reg': 0.09673315928924478, 'subsample': 1.0, 'random_strength': 1.0789966282641285e-05, 'bagging_temperature': 0.5725621983344348, 'border_count': 146, 'scale_pos_weight': 2.432987319289878}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  98%|█████████▊| 98/100 [06:42<00:04,  2.04s/it]

[I 2025-11-07 11:21:41,031] Trial 97 finished with value: 0.5276190476190477 and parameters: {'learning_rate': 0.04398026324175591, 'depth': 3, 'l2_leaf_reg': 0.1016081650051483, 'subsample': 1.0, 'random_strength': 1.1285856211410941e-05, 'bagging_temperature': 0.5611047303211386, 'border_count': 143, 'scale_pos_weight': 1.0305218555246203}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374:  99%|█████████▉| 99/100 [06:44<00:01,  1.92s/it]

[I 2025-11-07 11:21:42,690] Trial 98 finished with value: 0.6021947873799726 and parameters: {'learning_rate': 0.0669330753109842, 'depth': 3, 'l2_leaf_reg': 0.37713955210755007, 'subsample': 1.0, 'random_strength': 3.4302947173046877e-06, 'bagging_temperature': 0.625954592886709, 'border_count': 164, 'scale_pos_weight': 2.321232732756329}. Best is trial 31 with value: 0.603374131657294.


Best trial: 31. Best value: 0.603374: 100%|██████████| 100/100 [06:45<00:00,  4.05s/it]

[I 2025-11-07 11:21:43,648] Trial 99 finished with value: 0.5450941526263627 and parameters: {'learning_rate': 0.09539964815175042, 'depth': 3, 'l2_leaf_reg': 0.19277899252389472, 'subsample': 1.0, 'random_strength': 2.8994150357351e-06, 'bagging_temperature': 0.6146467575776378, 'border_count': 164, 'scale_pos_weight': 6.052456353967993}. Best is trial 31 with value: 0.603374131657294.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6034
  Best Hyperparameters:
    learning_rate: 0.029306960143738885
    depth: 8
    l2_leaf_reg: 2.160435718308443
    subsample: 0.9
    random_strength: 0.08003284959085531
    bagging_temperature: 0.08294170454932412
    border_count: 147
    scale_pos_weight: 2.8549710208640757





In [13]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 1000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 50 # Keep early stopping
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.029306960143738885, 'depth': 8, 'l2_leaf_reg': 2.160435718308443, 'subsample': 0.9, 'random_strength': 0.08003284959085531, 'bagging_temperature': 0.08294170454932412, 'border_count': 147, 'scale_pos_weight': 2.8549710208640757}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5973

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.91      0.78      0.84      4165
   Class 1.0       0.50      0.74      0.60      1235

    accuracy                           0.77      5400
   macro avg       0.71      0.76      0.72      5400
weighted avg       0.82      0.77      0.78      5400



In [16]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6034.cbm")

print("Done.")

Saving best_model...
Done.


In [17]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 136

Top 10 Most Important Features:
                        feature  importance
135  recovery_feasibility_score    6.169598
44              liab_x_multicar    5.522465
53               liab_prct_sqrt    3.782572
58     is_multi_vehicle_unclear    3.758837
45         liab_x_highrisk_site    3.383531
54                liab_prct_log    3.164801
52              liab_prct_cubed    3.108423
51            liab_prct_squared    3.107916
134           in_network_repair    2.934785
127                 recent_move    2.717762

Bottom 10 Least Important Features:
                   feature  importance
49       police_x_multicar         0.0
94            high_mileage         0.0
96       very_high_mileage         0.0
97        frequent_claimer         0.0
100  very_frequent_claimer         0.0
101           large_payout         0.0
66         evidence_strong         0.0
103           small_payout         0.0
104      very_large_payout         0.0


In [18]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [19]:
prediction.to_csv("results/catboost_6034_prediction.csv", index=False)