In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split, StratifiedKFold, cross_val_score

from cc5_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [5]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [6]:
pre.fit(X, y)
X_proc = pre.transform(X)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


## CatBoost with Optuna Tuning

In [7]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [9]:


def objective(trial: optuna.trial.Trial) -> float:
    
    # 1. Define hyperparameters from Optuna
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        # --- THIS IS THE FIX ---
        # Add the static parameters (including cat_features) here
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'random_state': 42,
        'cat_features': CAT_FEATURES  # <-- Pass cat_features at initialization
    }

    # 2. Set up the model
    # The model now knows about cat_features from the start
    model = CatBoostClassifier(**params)
    
    # 3. Configure CV Splitter
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42) # 3-fold for speed

    # 4. Run CV on the TRAINING data
    # We no longer need 'fit_params' because the model already has the info
    f1_scores = cross_val_score(
        model,
        X_proc,
        y,
        cv=skf,
        scoring='f1'
    )
    
    # 5. Return the mean F1 from the folds
    return np.mean(f1_scores)

In [10]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-13 22:11:04,805] A new study created in memory with name: no-name-a3c13103-fb15-46d8-89ff-1c417739c5ef



2. Starting Optuna study...


Best trial: 0. Best value: 0.549104:   1%|          | 1/100 [00:34<57:11, 34.66s/it]

[I 2025-11-13 22:11:39,474] Trial 0 finished with value: 0.5491044579495765 and parameters: {'learning_rate': 0.08174445497072659, 'depth': 7, 'l2_leaf_reg': 0.6912461495176748, 'subsample': 0.8, 'random_strength': 0.027833425192154176, 'bagging_temperature': 0.38846852930913056, 'border_count': 176, 'scale_pos_weight': 4.823080042929025}. Best is trial 0 with value: 0.5491044579495765.


Best trial: 0. Best value: 0.549104:   2%|▏         | 2/100 [02:34<2:18:55, 85.06s/it]

[I 2025-11-13 22:13:39,812] Trial 1 finished with value: 0.5326156686199699 and parameters: {'learning_rate': 0.02533245899261147, 'depth': 10, 'l2_leaf_reg': 0.6045013549775682, 'subsample': 0.9, 'random_strength': 2.80816514032516e-06, 'bagging_temperature': 0.7709404643299286, 'border_count': 168, 'scale_pos_weight': 2.8059418949740724}. Best is trial 0 with value: 0.5491044579495765.


Best trial: 0. Best value: 0.549104:   3%|▎         | 3/100 [04:48<2:53:25, 107.27s/it]

[I 2025-11-13 22:15:53,515] Trial 2 finished with value: 0.5271975996745618 and parameters: {'learning_rate': 0.016495198479818587, 'depth': 10, 'l2_leaf_reg': 0.0015499059902761517, 'subsample': 0.7, 'random_strength': 0.2114779093725347, 'bagging_temperature': 0.5795949215655088, 'border_count': 161, 'scale_pos_weight': 7.265768209015592}. Best is trial 0 with value: 0.5491044579495765.


Best trial: 3. Best value: 0.558995:   4%|▍         | 4/100 [05:11<1:58:01, 73.77s/it] 

[I 2025-11-13 22:16:15,927] Trial 3 finished with value: 0.558994826041968 and parameters: {'learning_rate': 0.056137093803637544, 'depth': 5, 'l2_leaf_reg': 2.412307537208863, 'subsample': 0.9, 'random_strength': 1.5084327934875838e-05, 'bagging_temperature': 0.27100507283468367, 'border_count': 80, 'scale_pos_weight': 7.846137901641568}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:   5%|▌         | 5/100 [05:56<1:40:38, 63.56s/it]

[I 2025-11-13 22:17:01,381] Trial 4 finished with value: 0.5549213182437739 and parameters: {'learning_rate': 0.01205562046020276, 'depth': 8, 'l2_leaf_reg': 0.017356713239750115, 'subsample': 0.8, 'random_strength': 0.006753730332130862, 'bagging_temperature': 0.4238289643868073, 'border_count': 55, 'scale_pos_weight': 2.086059529742396}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:   6%|▌         | 6/100 [06:15<1:15:46, 48.37s/it]

[I 2025-11-13 22:17:20,273] Trial 5 finished with value: 0.5244133641395017 and parameters: {'learning_rate': 0.2025982388909172, 'depth': 5, 'l2_leaf_reg': 0.007004868483988883, 'subsample': 0.6, 'random_strength': 0.09452137589089166, 'bagging_temperature': 0.25044769162492486, 'border_count': 105, 'scale_pos_weight': 6.475010329048991}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:   7%|▋         | 7/100 [06:33<59:38, 38.47s/it]  

[I 2025-11-13 22:17:38,367] Trial 6 finished with value: 0.5572216109195947 and parameters: {'learning_rate': 0.014286214060552539, 'depth': 5, 'l2_leaf_reg': 0.059596210686239194, 'subsample': 0.5, 'random_strength': 2.5418278090170563e-07, 'bagging_temperature': 0.6366137655106486, 'border_count': 41, 'scale_pos_weight': 5.987870257506707}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:   8%|▊         | 8/100 [06:46<46:41, 30.46s/it]

[I 2025-11-13 22:17:51,654] Trial 7 finished with value: 0.5192494231101724 and parameters: {'learning_rate': 0.011454464287015504, 'depth': 3, 'l2_leaf_reg': 0.11251814969502985, 'subsample': 0.5, 'random_strength': 2.243140551897201e-08, 'bagging_temperature': 0.40075074482635953, 'border_count': 194, 'scale_pos_weight': 9.505389430159635}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:   9%|▉         | 9/100 [07:07<41:22, 27.28s/it]

[I 2025-11-13 22:18:11,953] Trial 8 finished with value: 0.5578885758785148 and parameters: {'learning_rate': 0.07673036348874386, 'depth': 5, 'l2_leaf_reg': 0.36269174080662253, 'subsample': 0.8, 'random_strength': 2.193841583198333e-08, 'bagging_temperature': 0.4986260890737888, 'border_count': 219, 'scale_pos_weight': 9.879022367663723}. Best is trial 3 with value: 0.558994826041968.


Best trial: 3. Best value: 0.558995:  10%|█         | 10/100 [07:19<33:58, 22.65s/it]

[I 2025-11-13 22:18:24,237] Trial 9 finished with value: 0.55229848880108 and parameters: {'learning_rate': 0.203180260143334, 'depth': 3, 'l2_leaf_reg': 0.0075786675006944545, 'subsample': 0.9, 'random_strength': 2.44932813522867e-06, 'bagging_temperature': 0.2693756071494249, 'border_count': 49, 'scale_pos_weight': 2.8561208424443527}. Best is trial 3 with value: 0.558994826041968.


Best trial: 10. Best value: 0.575181:  11%|█         | 11/100 [08:05<44:14, 29.83s/it]

[I 2025-11-13 22:19:10,346] Trial 10 finished with value: 0.5751811961290393 and parameters: {'learning_rate': 0.038467600843219145, 'depth': 8, 'l2_leaf_reg': 9.567424367982381, 'subsample': 1.0, 'random_strength': 0.00024701445844950486, 'bagging_temperature': 0.02824232439643981, 'border_count': 107, 'scale_pos_weight': 8.1619041583046}. Best is trial 10 with value: 0.5751811961290393.


Best trial: 10. Best value: 0.575181:  12%|█▏        | 12/100 [08:52<51:11, 34.90s/it]

[I 2025-11-13 22:19:56,835] Trial 11 finished with value: 0.5721122643407591 and parameters: {'learning_rate': 0.038437458326822244, 'depth': 8, 'l2_leaf_reg': 9.041361822327573, 'subsample': 1.0, 'random_strength': 0.00038221364822897485, 'bagging_temperature': 0.029884335179956003, 'border_count': 107, 'scale_pos_weight': 8.421005223907647}. Best is trial 10 with value: 0.5751811961290393.


Best trial: 10. Best value: 0.575181:  13%|█▎        | 13/100 [09:40<56:21, 38.87s/it]

[I 2025-11-13 22:20:44,838] Trial 12 finished with value: 0.5745729100712916 and parameters: {'learning_rate': 0.031195764096048524, 'depth': 8, 'l2_leaf_reg': 6.28631454689286, 'subsample': 1.0, 'random_strength': 0.0005426420817301859, 'bagging_temperature': 0.001703904900923224, 'border_count': 122, 'scale_pos_weight': 8.258344056067328}. Best is trial 10 with value: 0.5751811961290393.


Best trial: 13. Best value: 0.580419:  14%|█▍        | 14/100 [10:27<59:30, 41.52s/it]

[I 2025-11-13 22:21:32,475] Trial 13 finished with value: 0.5804185023972251 and parameters: {'learning_rate': 0.029754763806535033, 'depth': 8, 'l2_leaf_reg': 9.954555656591051, 'subsample': 1.0, 'random_strength': 0.0007158849776481871, 'bagging_temperature': 0.05947912316035442, 'border_count': 124, 'scale_pos_weight': 4.842029044445516}. Best is trial 13 with value: 0.5804185023972251.


Best trial: 13. Best value: 0.580419:  15%|█▌        | 15/100 [11:45<1:14:13, 52.39s/it]

[I 2025-11-13 22:22:50,062] Trial 14 finished with value: 0.5756391869049797 and parameters: {'learning_rate': 0.024589511879165176, 'depth': 9, 'l2_leaf_reg': 2.200640756711841, 'subsample': 1.0, 'random_strength': 0.002627301173852562, 'bagging_temperature': 0.9887209006064175, 'border_count': 130, 'scale_pos_weight': 4.386439232937925}. Best is trial 13 with value: 0.5804185023972251.


Best trial: 13. Best value: 0.580419:  16%|█▌        | 16/100 [13:05<1:25:00, 60.71s/it]

[I 2025-11-13 22:24:10,110] Trial 15 finished with value: 0.5790427848561897 and parameters: {'learning_rate': 0.02064240805040915, 'depth': 9, 'l2_leaf_reg': 2.058094203211672, 'subsample': 1.0, 'random_strength': 0.005373455758984872, 'bagging_temperature': 0.8915279569544845, 'border_count': 136, 'scale_pos_weight': 3.9922537743235145}. Best is trial 13 with value: 0.5804185023972251.


Best trial: 16. Best value: 0.581637:  17%|█▋        | 17/100 [14:25<1:32:10, 66.64s/it]

[I 2025-11-13 22:25:30,513] Trial 16 finished with value: 0.5816369347571868 and parameters: {'learning_rate': 0.019810886386758486, 'depth': 9, 'l2_leaf_reg': 2.586834035875002, 'subsample': 0.9, 'random_strength': 0.5523682887537945, 'bagging_temperature': 0.9880261882023559, 'border_count': 146, 'scale_pos_weight': 3.9577130892473513}. Best is trial 16 with value: 0.5816369347571868.


Best trial: 16. Best value: 0.581637:  18%|█▊        | 18/100 [15:07<1:20:49, 59.14s/it]

[I 2025-11-13 22:26:12,199] Trial 17 finished with value: 0.5478894521286533 and parameters: {'learning_rate': 0.0530539759606351, 'depth': 7, 'l2_leaf_reg': 0.18047039300977036, 'subsample': 0.9, 'random_strength': 0.9400174811920475, 'bagging_temperature': 0.7777777287625005, 'border_count': 242, 'scale_pos_weight': 3.6854491033369463}. Best is trial 16 with value: 0.5816369347571868.


Best trial: 16. Best value: 0.581637:  19%|█▉        | 19/100 [16:31<1:29:48, 66.53s/it]

[I 2025-11-13 22:27:35,949] Trial 18 finished with value: 0.5085504115823883 and parameters: {'learning_rate': 0.12224419807799032, 'depth': 9, 'l2_leaf_reg': 3.131290232710133, 'subsample': 0.7, 'random_strength': 3.190925083758141e-05, 'bagging_temperature': 0.15974812831904117, 'border_count': 79, 'scale_pos_weight': 1.5014111765584}. Best is trial 16 with value: 0.5816369347571868.


Best trial: 16. Best value: 0.581637:  20%|██        | 20/100 [17:05<1:15:47, 56.84s/it]

[I 2025-11-13 22:28:10,210] Trial 19 finished with value: 0.5738231243613175 and parameters: {'learning_rate': 0.021928960915181685, 'depth': 6, 'l2_leaf_reg': 1.0221018696018744, 'subsample': 0.9, 'random_strength': 0.5182221578425753, 'bagging_temperature': 0.6841769372099684, 'border_count': 201, 'scale_pos_weight': 5.100439624779213}. Best is trial 16 with value: 0.5816369347571868.


Best trial: 20. Best value: 0.585308:  21%|██        | 21/100 [19:01<1:38:20, 74.69s/it]

[I 2025-11-13 22:30:06,526] Trial 20 finished with value: 0.585307606425784 and parameters: {'learning_rate': 0.016511738724554616, 'depth': 10, 'l2_leaf_reg': 4.742756426184651, 'subsample': 0.8, 'random_strength': 0.08798402796927275, 'bagging_temperature': 0.1397861058541673, 'border_count': 152, 'scale_pos_weight': 5.5956129902023575}. Best is trial 20 with value: 0.585307606425784.


Best trial: 20. Best value: 0.585308:  22%|██▏       | 22/100 [21:02<1:55:06, 88.54s/it]

[I 2025-11-13 22:32:07,361] Trial 21 finished with value: 0.5847600633840834 and parameters: {'learning_rate': 0.010191097176568492, 'depth': 10, 'l2_leaf_reg': 3.9382098556539793, 'subsample': 0.8, 'random_strength': 0.03885345660634936, 'bagging_temperature': 0.1391035311283989, 'border_count': 143, 'scale_pos_weight': 5.492069964896565}. Best is trial 20 with value: 0.585307606425784.


Best trial: 22. Best value: 0.586491:  23%|██▎       | 23/100 [23:00<2:04:58, 97.39s/it]

[I 2025-11-13 22:34:05,386] Trial 22 finished with value: 0.586491430406651 and parameters: {'learning_rate': 0.01643543798751812, 'depth': 10, 'l2_leaf_reg': 3.40278688423811, 'subsample': 0.7, 'random_strength': 0.036528836818022346, 'bagging_temperature': 0.1841511642833141, 'border_count': 153, 'scale_pos_weight': 6.011070569085479}. Best is trial 22 with value: 0.586491430406651.


Best trial: 22. Best value: 0.586491:  24%|██▍       | 24/100 [24:55<2:10:05, 102.71s/it]

[I 2025-11-13 22:36:00,508] Trial 23 finished with value: 0.5847456966243134 and parameters: {'learning_rate': 0.014531750783940086, 'depth': 10, 'l2_leaf_reg': 1.179262610199538, 'subsample': 0.7, 'random_strength': 0.03866706683078779, 'bagging_temperature': 0.17331903110035138, 'border_count': 149, 'scale_pos_weight': 6.533968013833295}. Best is trial 22 with value: 0.586491430406651.


Best trial: 22. Best value: 0.586491:  25%|██▌       | 25/100 [27:05<2:18:42, 110.96s/it]

[I 2025-11-13 22:38:10,726] Trial 24 finished with value: 0.5820180381098182 and parameters: {'learning_rate': 0.011521738068797213, 'depth': 10, 'l2_leaf_reg': 4.214533632252798, 'subsample': 0.6, 'random_strength': 0.018800830928730364, 'bagging_temperature': 0.14532866806082884, 'border_count': 187, 'scale_pos_weight': 5.787105674642098}. Best is trial 22 with value: 0.586491430406651.


Best trial: 22. Best value: 0.586491:  26%|██▌       | 26/100 [29:26<2:27:42, 119.76s/it]

[I 2025-11-13 22:40:31,001] Trial 25 finished with value: 0.5753454309144582 and parameters: {'learning_rate': 0.014983963766465916, 'depth': 10, 'l2_leaf_reg': 0.26979296036479855, 'subsample': 0.6, 'random_strength': 0.11965774289511912, 'bagging_temperature': 0.1129481506359214, 'border_count': 159, 'scale_pos_weight': 6.9996788620911925}. Best is trial 22 with value: 0.586491430406651.


Best trial: 22. Best value: 0.586491:  27%|██▋       | 27/100 [30:53<2:13:53, 110.04s/it]

[I 2025-11-13 22:41:58,384] Trial 26 finished with value: 0.5747361785178486 and parameters: {'learning_rate': 0.011181444614939598, 'depth': 9, 'l2_leaf_reg': 0.050282619573732194, 'subsample': 0.8, 'random_strength': 0.003580970076558894, 'bagging_temperature': 0.31594404464131315, 'border_count': 230, 'scale_pos_weight': 5.605142103202693}. Best is trial 22 with value: 0.586491430406651.


Best trial: 27. Best value: 0.587068:  28%|██▊       | 28/100 [33:01<2:18:35, 115.49s/it]

[I 2025-11-13 22:44:06,576] Trial 27 finished with value: 0.5870681371115231 and parameters: {'learning_rate': 0.010108675541615446, 'depth': 10, 'l2_leaf_reg': 1.3737947625051752, 'subsample': 0.7, 'random_strength': 0.018005960012791877, 'bagging_temperature': 0.20801718292881977, 'border_count': 213, 'scale_pos_weight': 6.387936847134593}. Best is trial 27 with value: 0.5870681371115231.


Best trial: 27. Best value: 0.587068:  29%|██▉       | 29/100 [34:22<2:04:09, 104.92s/it]

[I 2025-11-13 22:45:26,835] Trial 28 finished with value: 0.5786754432469127 and parameters: {'learning_rate': 0.01742861730438132, 'depth': 9, 'l2_leaf_reg': 1.0305122428755962, 'subsample': 0.7, 'random_strength': 0.0015111487349191524, 'bagging_temperature': 0.2104311516052596, 'border_count': 218, 'scale_pos_weight': 7.4626233864119}. Best is trial 27 with value: 0.5870681371115231.


Best trial: 27. Best value: 0.587068:  30%|███       | 30/100 [34:56<1:37:35, 83.64s/it] 

[I 2025-11-13 22:46:00,834] Trial 29 finished with value: 0.5228288789947421 and parameters: {'learning_rate': 0.2801468523751709, 'depth': 7, 'l2_leaf_reg': 0.4974992190416303, 'subsample': 0.6, 'random_strength': 0.016788231566679895, 'bagging_temperature': 0.33954370954165947, 'border_count': 177, 'scale_pos_weight': 6.404843820390924}. Best is trial 27 with value: 0.5870681371115231.


Best trial: 27. Best value: 0.587068:  31%|███       | 31/100 [35:22<1:16:27, 66.48s/it]

[I 2025-11-13 22:46:27,268] Trial 30 finished with value: 0.5628419584812208 and parameters: {'learning_rate': 0.03861711346385552, 'depth': 6, 'l2_leaf_reg': 1.545639093860586, 'subsample': 0.7, 'random_strength': 0.010817052312805876, 'bagging_temperature': 0.4676544331599498, 'border_count': 250, 'scale_pos_weight': 8.817366788495963}. Best is trial 27 with value: 0.5870681371115231.


Best trial: 31. Best value: 0.587487:  32%|███▏      | 32/100 [37:15<1:31:13, 80.49s/it]

[I 2025-11-13 22:48:20,459] Trial 31 finished with value: 0.5874874353263327 and parameters: {'learning_rate': 0.010144684606030392, 'depth': 10, 'l2_leaf_reg': 4.589296075602454, 'subsample': 0.8, 'random_strength': 0.061592414661896286, 'bagging_temperature': 0.09449745540994164, 'border_count': 180, 'scale_pos_weight': 4.986913547663335}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 31. Best value: 0.587487:  33%|███▎      | 33/100 [39:08<1:40:40, 90.16s/it]

[I 2025-11-13 22:50:13,185] Trial 32 finished with value: 0.5826187338226204 and parameters: {'learning_rate': 0.013649393115747106, 'depth': 10, 'l2_leaf_reg': 5.052804960497375, 'subsample': 0.8, 'random_strength': 0.11372551658330625, 'bagging_temperature': 0.09173391077033406, 'border_count': 177, 'scale_pos_weight': 5.019028819546164}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 31. Best value: 0.587487:  34%|███▍      | 34/100 [41:05<1:47:55, 98.11s/it]

[I 2025-11-13 22:52:09,832] Trial 33 finished with value: 0.5815095909290453 and parameters: {'learning_rate': 0.017772737805896745, 'depth': 10, 'l2_leaf_reg': 0.731537888865694, 'subsample': 0.7, 'random_strength': 0.24732583617354614, 'bagging_temperature': 0.19949757866786302, 'border_count': 206, 'scale_pos_weight': 6.851960570279866}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 31. Best value: 0.587487:  35%|███▌      | 35/100 [43:05<1:53:30, 104.78s/it]

[I 2025-11-13 22:54:10,163] Trial 34 finished with value: 0.5873010939916565 and parameters: {'learning_rate': 0.010182738381104077, 'depth': 10, 'l2_leaf_reg': 5.298251826030631, 'subsample': 0.8, 'random_strength': 0.04732772307632099, 'bagging_temperature': 0.33351931444496613, 'border_count': 165, 'scale_pos_weight': 3.1541173893105587}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 31. Best value: 0.587487:  36%|███▌      | 36/100 [44:22<1:43:00, 96.57s/it] 

[I 2025-11-13 22:55:27,579] Trial 35 finished with value: 0.5805782895140584 and parameters: {'learning_rate': 0.010559468819976223, 'depth': 9, 'l2_leaf_reg': 0.6753113094825463, 'subsample': 0.7, 'random_strength': 0.039335065597664406, 'bagging_temperature': 0.3111249395159663, 'border_count': 170, 'scale_pos_weight': 3.150115676155926}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 31. Best value: 0.587487:  37%|███▋      | 37/100 [46:17<1:47:11, 102.09s/it]

[I 2025-11-13 22:57:22,541] Trial 36 finished with value: 0.5359808523759043 and parameters: {'learning_rate': 0.012852724303221083, 'depth': 10, 'l2_leaf_reg': 1.4329300111315593, 'subsample': 0.8, 'random_strength': 6.726166242310582e-05, 'bagging_temperature': 0.34994329965788723, 'border_count': 216, 'scale_pos_weight': 1.7654845417747955}. Best is trial 31 with value: 0.5874874353263327.


Best trial: 37. Best value: 0.590004:  38%|███▊      | 38/100 [46:33<1:18:46, 76.23s/it] 

[I 2025-11-13 22:57:38,429] Trial 37 finished with value: 0.5900040130760483 and parameters: {'learning_rate': 0.010091709697363019, 'depth': 4, 'l2_leaf_reg': 6.972872057755589, 'subsample': 0.6, 'random_strength': 0.312512814449532, 'bagging_temperature': 0.2357069108767939, 'border_count': 186, 'scale_pos_weight': 2.347444440265436}. Best is trial 37 with value: 0.5900040130760483.


Best trial: 37. Best value: 0.590004:  39%|███▉      | 39/100 [46:48<58:53, 57.92s/it]  

[I 2025-11-13 22:57:53,632] Trial 38 finished with value: 0.5078699132082823 and parameters: {'learning_rate': 0.010135528533501754, 'depth': 4, 'l2_leaf_reg': 0.0018713191467869179, 'subsample': 0.6, 'random_strength': 0.2766340737698163, 'bagging_temperature': 0.2592950684402045, 'border_count': 188, 'scale_pos_weight': 1.0059828686960695}. Best is trial 37 with value: 0.5900040130760483.


Best trial: 39. Best value: 0.590294:  40%|████      | 40/100 [47:04<45:08, 45.14s/it]

[I 2025-11-13 22:58:08,944] Trial 39 finished with value: 0.5902944111035784 and parameters: {'learning_rate': 0.013089107788657918, 'depth': 4, 'l2_leaf_reg': 7.236363760941275, 'subsample': 0.5, 'random_strength': 0.008740345717753858, 'bagging_temperature': 0.5529980602387732, 'border_count': 168, 'scale_pos_weight': 2.4047075956515926}. Best is trial 39 with value: 0.5902944111035784.


Best trial: 39. Best value: 0.590294:  41%|████      | 41/100 [47:19<35:32, 36.15s/it]

[I 2025-11-13 22:58:24,118] Trial 40 finished with value: 0.5895568190645092 and parameters: {'learning_rate': 0.013116494239849001, 'depth': 4, 'l2_leaf_reg': 7.055325560063257, 'subsample': 0.5, 'random_strength': 0.19427049656869627, 'bagging_temperature': 0.5764074880524216, 'border_count': 167, 'scale_pos_weight': 2.1971069180656433}. Best is trial 39 with value: 0.5902944111035784.


Best trial: 39. Best value: 0.590294:  42%|████▏     | 42/100 [47:34<28:59, 29.99s/it]

[I 2025-11-13 22:58:39,725] Trial 41 finished with value: 0.589038085443106 and parameters: {'learning_rate': 0.013258822165549857, 'depth': 4, 'l2_leaf_reg': 6.534289991281722, 'subsample': 0.5, 'random_strength': 0.24108025366275307, 'bagging_temperature': 0.5759347502314531, 'border_count': 170, 'scale_pos_weight': 2.359764906131365}. Best is trial 39 with value: 0.5902944111035784.


Best trial: 42. Best value: 0.59197:  43%|████▎     | 43/100 [47:50<24:17, 25.57s/it] 

[I 2025-11-13 22:58:54,980] Trial 42 finished with value: 0.5919700417749852 and parameters: {'learning_rate': 0.013271255808543568, 'depth': 4, 'l2_leaf_reg': 7.464687797940376, 'subsample': 0.5, 'random_strength': 0.9565203657402667, 'bagging_temperature': 0.5750962256972073, 'border_count': 183, 'scale_pos_weight': 2.302617649078191}. Best is trial 42 with value: 0.5919700417749852.


Best trial: 42. Best value: 0.59197:  44%|████▍     | 44/100 [48:05<20:57, 22.46s/it]

[I 2025-11-13 22:59:10,191] Trial 43 finished with value: 0.5918912781685178 and parameters: {'learning_rate': 0.014194177621806544, 'depth': 4, 'l2_leaf_reg': 7.441969321202613, 'subsample': 0.5, 'random_strength': 0.985791866531758, 'bagging_temperature': 0.5749720465113924, 'border_count': 192, 'scale_pos_weight': 2.3090097009281645}. Best is trial 42 with value: 0.5919700417749852.


Best trial: 42. Best value: 0.59197:  45%|████▌     | 45/100 [48:20<18:40, 20.37s/it]

[I 2025-11-13 22:59:25,674] Trial 44 finished with value: 0.5912105767342463 and parameters: {'learning_rate': 0.02563367395019175, 'depth': 4, 'l2_leaf_reg': 7.715422502085045, 'subsample': 0.5, 'random_strength': 0.8231790931590394, 'bagging_temperature': 0.580293257931619, 'border_count': 197, 'scale_pos_weight': 2.372351818124157}. Best is trial 42 with value: 0.5919700417749852.


Best trial: 45. Best value: 0.592847:  46%|████▌     | 46/100 [48:33<16:11, 17.98s/it]

[I 2025-11-13 22:59:38,101] Trial 45 finished with value: 0.5928473276261367 and parameters: {'learning_rate': 0.025785429450793963, 'depth': 3, 'l2_leaf_reg': 8.580216612209396, 'subsample': 0.5, 'random_strength': 0.7780915964264055, 'bagging_temperature': 0.6367266487953196, 'border_count': 196, 'scale_pos_weight': 2.741782749938669}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  47%|████▋     | 47/100 [48:45<14:24, 16.32s/it]

[I 2025-11-13 22:59:50,526] Trial 46 finished with value: 0.5894074501895167 and parameters: {'learning_rate': 0.030790721210021292, 'depth': 3, 'l2_leaf_reg': 9.324721142572276, 'subsample': 0.5, 'random_strength': 0.83855738608217, 'bagging_temperature': 0.6780970053819962, 'border_count': 198, 'scale_pos_weight': 2.71211437174767}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  48%|████▊     | 48/100 [48:58<13:07, 15.15s/it]

[I 2025-11-13 23:00:02,955] Trial 47 finished with value: 0.5656701837742728 and parameters: {'learning_rate': 0.02680865428834933, 'depth': 3, 'l2_leaf_reg': 2.3283184116708595, 'subsample': 0.5, 'random_strength': 4.978305641281871e-06, 'bagging_temperature': 0.5379044596644132, 'border_count': 235, 'scale_pos_weight': 1.454581516387874}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  49%|████▉     | 49/100 [49:17<13:51, 16.30s/it]

[I 2025-11-13 23:00:21,932] Trial 48 finished with value: 0.5732848324514991 and parameters: {'learning_rate': 0.05197904924485519, 'depth': 5, 'l2_leaf_reg': 0.03282427779576763, 'subsample': 0.5, 'random_strength': 0.8965893171443043, 'bagging_temperature': 0.6276691112533954, 'border_count': 207, 'scale_pos_weight': 3.3268322754274893}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  50%|█████     | 50/100 [49:32<13:22, 16.05s/it]

[I 2025-11-13 23:00:37,414] Trial 49 finished with value: 0.5899660144995261 and parameters: {'learning_rate': 0.023004724769111088, 'depth': 4, 'l2_leaf_reg': 2.0591215525891107, 'subsample': 0.5, 'random_strength': 0.44071160706400836, 'bagging_temperature': 0.45764953215917814, 'border_count': 196, 'scale_pos_weight': 2.7572419504223746}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  51%|█████     | 51/100 [49:44<12:12, 14.95s/it]

[I 2025-11-13 23:00:49,782] Trial 50 finished with value: 0.5812471358028516 and parameters: {'learning_rate': 0.03562476834958329, 'depth': 3, 'l2_leaf_reg': 7.047985044768958, 'subsample': 0.5, 'random_strength': 0.1398033555425967, 'bagging_temperature': 0.7559975718581178, 'border_count': 226, 'scale_pos_weight': 1.8087903985842286}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  52%|█████▏    | 52/100 [50:00<12:07, 15.16s/it]

[I 2025-11-13 23:01:05,421] Trial 51 finished with value: 0.5897776185689771 and parameters: {'learning_rate': 0.019608869073787318, 'depth': 4, 'l2_leaf_reg': 7.352763313490513, 'subsample': 0.6, 'random_strength': 0.3675049438001504, 'bagging_temperature': 0.522409534298728, 'border_count': 188, 'scale_pos_weight': 2.419424505896612}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  53%|█████▎    | 53/100 [50:15<11:55, 15.22s/it]

[I 2025-11-13 23:01:20,788] Trial 52 finished with value: 0.5809067136387059 and parameters: {'learning_rate': 0.045760911443577905, 'depth': 4, 'l2_leaf_reg': 3.23015118968267, 'subsample': 0.5, 'random_strength': 0.9644649658852676, 'bagging_temperature': 0.6250557288987673, 'border_count': 193, 'scale_pos_weight': 1.9430267697355228}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  54%|█████▍    | 54/100 [50:34<12:32, 16.35s/it]

[I 2025-11-13 23:01:39,788] Trial 53 finished with value: 0.5590323152239307 and parameters: {'learning_rate': 0.015528222198894036, 'depth': 5, 'l2_leaf_reg': 8.55903353512847, 'subsample': 0.5, 'random_strength': 0.37211606743011927, 'bagging_temperature': 0.7339472154455066, 'border_count': 180, 'scale_pos_weight': 1.4060978524872871}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  55%|█████▌    | 55/100 [50:54<12:54, 17.21s/it]

[I 2025-11-13 23:01:58,983] Trial 54 finished with value: 0.5732015031974805 and parameters: {'learning_rate': 0.06743938027272295, 'depth': 5, 'l2_leaf_reg': 9.838244329352424, 'subsample': 0.6, 'random_strength': 4.947111689722703e-07, 'bagging_temperature': 0.6726959811416477, 'border_count': 205, 'scale_pos_weight': 3.452009221064289}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  56%|█████▌    | 56/100 [51:06<11:34, 15.78s/it]

[I 2025-11-13 23:02:11,434] Trial 55 finished with value: 0.5705839685819484 and parameters: {'learning_rate': 0.11245287792225221, 'depth': 3, 'l2_leaf_reg': 3.1398168093251435, 'subsample': 0.5, 'random_strength': 0.08426187340824005, 'bagging_temperature': 0.434005162646477, 'border_count': 160, 'scale_pos_weight': 2.525204978901156}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  57%|█████▋    | 57/100 [51:22<11:15, 15.70s/it]

[I 2025-11-13 23:02:26,963] Trial 56 finished with value: 0.5260195309362633 and parameters: {'learning_rate': 0.02664701090662591, 'depth': 4, 'l2_leaf_reg': 5.349024090665809, 'subsample': 0.6, 'random_strength': 0.4986620204672978, 'bagging_temperature': 0.4922607140140459, 'border_count': 186, 'scale_pos_weight': 1.1028055852734204}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  58%|█████▊    | 58/100 [51:33<10:09, 14.51s/it]

[I 2025-11-13 23:02:38,692] Trial 57 finished with value: 0.5727223633314752 and parameters: {'learning_rate': 0.019352272899943776, 'depth': 3, 'l2_leaf_reg': 0.009706140530481817, 'subsample': 0.5, 'random_strength': 0.007989066965172459, 'bagging_temperature': 0.8351943773278048, 'border_count': 223, 'scale_pos_weight': 4.0730580680387085}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  59%|█████▉    | 59/100 [51:49<10:05, 14.76s/it]

[I 2025-11-13 23:02:54,020] Trial 58 finished with value: 0.5908370829056463 and parameters: {'learning_rate': 0.011891093888264488, 'depth': 4, 'l2_leaf_reg': 2.8199875354386505, 'subsample': 0.5, 'random_strength': 0.15348119395117688, 'bagging_temperature': 0.5412171223991011, 'border_count': 212, 'scale_pos_weight': 2.127900362008467}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  60%|██████    | 60/100 [52:08<10:42, 16.07s/it]

[I 2025-11-13 23:03:13,163] Trial 59 finished with value: 0.5878670137697136 and parameters: {'learning_rate': 0.01203940789955262, 'depth': 5, 'l2_leaf_reg': 2.840189237848929, 'subsample': 0.5, 'random_strength': 0.17137655463133608, 'bagging_temperature': 0.556973436977072, 'border_count': 237, 'scale_pos_weight': 2.9003628087387874}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  61%|██████    | 61/100 [52:20<09:41, 14.92s/it]

[I 2025-11-13 23:03:25,385] Trial 60 finished with value: 0.5883195879478264 and parameters: {'learning_rate': 0.018698527016668243, 'depth': 3, 'l2_leaf_reg': 3.962126892557807, 'subsample': 0.5, 'random_strength': 0.991820378472915, 'bagging_temperature': 0.39859704226921466, 'border_count': 211, 'scale_pos_weight': 2.072131161202541}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  62%|██████▏   | 62/100 [52:35<09:30, 15.02s/it]

[I 2025-11-13 23:03:40,648] Trial 61 finished with value: 0.5797239127360166 and parameters: {'learning_rate': 0.012117340317316292, 'depth': 4, 'l2_leaf_reg': 6.0790868576247075, 'subsample': 0.5, 'random_strength': 0.49153932508784365, 'bagging_temperature': 0.6076422818161529, 'border_count': 198, 'scale_pos_weight': 1.6976330830516757}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  63%|██████▎   | 63/100 [52:51<09:21, 15.19s/it]

[I 2025-11-13 23:03:56,220] Trial 62 finished with value: 0.5798782782476203 and parameters: {'learning_rate': 0.014792308861844895, 'depth': 4, 'l2_leaf_reg': 9.869920154751009, 'subsample': 0.6, 'random_strength': 0.17681298347687083, 'bagging_temperature': 0.7115457717195713, 'border_count': 182, 'scale_pos_weight': 3.6564245394856822}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  64%|██████▍   | 64/100 [53:07<09:11, 15.31s/it]

[I 2025-11-13 23:04:11,826] Trial 63 finished with value: 0.5739907451744752 and parameters: {'learning_rate': 0.023526953427944322, 'depth': 4, 'l2_leaf_reg': 1.9961131413255746, 'subsample': 0.6, 'random_strength': 0.09519627045902682, 'bagging_temperature': 0.5148793635588629, 'border_count': 192, 'scale_pos_weight': 4.4230932384344355}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  65%|██████▌   | 65/100 [53:31<10:27, 17.92s/it]

[I 2025-11-13 23:04:35,846] Trial 64 finished with value: 0.5905468382411592 and parameters: {'learning_rate': 0.016123733676508295, 'depth': 6, 'l2_leaf_reg': 3.8375120854219547, 'subsample': 0.5, 'random_strength': 0.5122597493080941, 'bagging_temperature': 0.586965723140727, 'border_count': 172, 'scale_pos_weight': 2.2370234455869777}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  66%|██████▌   | 66/100 [53:54<11:09, 19.70s/it]

[I 2025-11-13 23:04:59,706] Trial 65 finished with value: 0.5856477249932134 and parameters: {'learning_rate': 0.021311294656246916, 'depth': 6, 'l2_leaf_reg': 3.825512573624525, 'subsample': 0.5, 'random_strength': 0.00020971342519825265, 'bagging_temperature': 0.6034254590485435, 'border_count': 172, 'scale_pos_weight': 2.8979083595843163}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  67%|██████▋   | 67/100 [54:12<10:34, 19.21s/it]

[I 2025-11-13 23:05:17,775] Trial 66 finished with value: 0.5874969478261977 and parameters: {'learning_rate': 0.01727805219201792, 'depth': 5, 'l2_leaf_reg': 0.10703914613430045, 'subsample': 0.5, 'random_strength': 0.5776240474905178, 'bagging_temperature': 0.6611352866594019, 'border_count': 32, 'scale_pos_weight': 2.0622521301064065}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  68%|██████▊   | 68/100 [54:36<10:57, 20.55s/it]

[I 2025-11-13 23:05:41,445] Trial 67 finished with value: 0.5522885603757697 and parameters: {'learning_rate': 0.015542371745493465, 'depth': 6, 'l2_leaf_reg': 4.853295189529996, 'subsample': 0.5, 'random_strength': 0.0016438214720008988, 'bagging_temperature': 0.5527300296307285, 'border_count': 139, 'scale_pos_weight': 1.305290824740259}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  69%|██████▉   | 69/100 [54:55<10:19, 19.97s/it]

[I 2025-11-13 23:06:00,066] Trial 68 finished with value: 0.5913289973424404 and parameters: {'learning_rate': 0.014032996478557328, 'depth': 5, 'l2_leaf_reg': 2.0344574500806325, 'subsample': 0.5, 'random_strength': 0.6347384907707038, 'bagging_temperature': 0.4713596028247006, 'border_count': 156, 'scale_pos_weight': 2.530833775377915}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  70%|███████   | 70/100 [55:14<09:49, 19.64s/it]

[I 2025-11-13 23:06:18,920] Trial 69 finished with value: 0.584079296678972 and parameters: {'learning_rate': 0.033884493086881, 'depth': 5, 'l2_leaf_reg': 1.7220887364172575, 'subsample': 0.5, 'random_strength': 0.07814523567404882, 'bagging_temperature': 0.4909838597945016, 'border_count': 154, 'scale_pos_weight': 3.0560613905690146}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  71%|███████   | 71/100 [55:38<10:13, 21.17s/it]

[I 2025-11-13 23:06:43,655] Trial 70 finished with value: 0.5838078110972104 and parameters: {'learning_rate': 0.026486017628812742, 'depth': 6, 'l2_leaf_reg': 0.17364814757842656, 'subsample': 0.5, 'random_strength': 0.5554597532023199, 'bagging_temperature': 0.6487173090094873, 'border_count': 160, 'scale_pos_weight': 2.686566286516627}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  72%|███████▏  | 72/100 [55:58<09:43, 20.83s/it]

[I 2025-11-13 23:07:03,710] Trial 71 finished with value: 0.5766524056999859 and parameters: {'learning_rate': 0.013801923665889948, 'depth': 5, 'l2_leaf_reg': 2.5611467355027724, 'subsample': 0.5, 'random_strength': 1.1280989871749094e-08, 'bagging_temperature': 0.5863160052160775, 'border_count': 202, 'scale_pos_weight': 1.6589610581205259}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 45. Best value: 0.592847:  73%|███████▎  | 73/100 [56:11<08:17, 18.43s/it]

[I 2025-11-13 23:07:16,532] Trial 72 finished with value: 0.5906640604351495 and parameters: {'learning_rate': 0.016819919149967607, 'depth': 3, 'l2_leaf_reg': 3.8004046358645103, 'subsample': 0.5, 'random_strength': 0.2578577058287856, 'bagging_temperature': 0.4589586830331326, 'border_count': 132, 'scale_pos_weight': 2.1825200203057578}. Best is trial 45 with value: 0.5928473276261367.


Best trial: 73. Best value: 0.593243:  74%|███████▍  | 74/100 [56:23<07:11, 16.58s/it]

[I 2025-11-13 23:07:28,809] Trial 73 finished with value: 0.5932432825146656 and parameters: {'learning_rate': 0.011534686245369552, 'depth': 3, 'l2_leaf_reg': 0.8614046888950311, 'subsample': 0.5, 'random_strength': 0.26413074233650147, 'bagging_temperature': 0.439332211636343, 'border_count': 111, 'scale_pos_weight': 2.1249053873597066}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  75%|███████▌  | 75/100 [56:36<06:21, 15.26s/it]

[I 2025-11-13 23:07:40,997] Trial 74 finished with value: 0.5910048977765299 and parameters: {'learning_rate': 0.011759415013449096, 'depth': 3, 'l2_leaf_reg': 0.9525254686302915, 'subsample': 0.5, 'random_strength': 0.24433082018168195, 'bagging_temperature': 0.37639061646087923, 'border_count': 121, 'scale_pos_weight': 2.63163519987216}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  76%|███████▌  | 76/100 [56:48<05:47, 14.50s/it]

[I 2025-11-13 23:07:53,697] Trial 75 finished with value: 0.59057320186794 and parameters: {'learning_rate': 0.011338437727312007, 'depth': 3, 'l2_leaf_reg': 0.4705873441825746, 'subsample': 0.6, 'random_strength': 0.023597994445242902, 'bagging_temperature': 0.4289402010874322, 'border_count': 97, 'scale_pos_weight': 2.561231313256065}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  77%|███████▋  | 77/100 [57:01<05:19, 13.90s/it]

[I 2025-11-13 23:08:06,221] Trial 76 finished with value: 0.5893287349291518 and parameters: {'learning_rate': 0.011844984034873367, 'depth': 3, 'l2_leaf_reg': 1.0531618468853468, 'subsample': 0.5, 'random_strength': 0.15021487263049174, 'bagging_temperature': 0.3786198876543443, 'border_count': 95, 'scale_pos_weight': 1.9045713223578828}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  78%|███████▊  | 78/100 [57:13<04:54, 13.40s/it]

[I 2025-11-13 23:08:18,446] Trial 77 finished with value: 0.5824863100820908 and parameters: {'learning_rate': 0.014227655953221952, 'depth': 3, 'l2_leaf_reg': 0.7847541411847597, 'subsample': 0.5, 'random_strength': 0.0597415306664187, 'bagging_temperature': 0.36611747491611535, 'border_count': 118, 'scale_pos_weight': 3.447420497995835}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  79%|███████▉  | 79/100 [57:29<04:54, 14.04s/it]

[I 2025-11-13 23:08:33,975] Trial 78 finished with value: 0.5497483799276806 and parameters: {'learning_rate': 0.012429809968166866, 'depth': 4, 'l2_leaf_reg': 1.2914879427259724, 'subsample': 0.6, 'random_strength': 0.6929033020889108, 'bagging_temperature': 0.49794568364537317, 'border_count': 111, 'scale_pos_weight': 1.2556829990484513}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  80%|████████  | 80/100 [57:40<04:26, 13.34s/it]

[I 2025-11-13 23:08:45,670] Trial 79 finished with value: 0.5860118928023885 and parameters: {'learning_rate': 0.01126228078525574, 'depth': 3, 'l2_leaf_reg': 0.36370415418300006, 'subsample': 0.5, 'random_strength': 0.30446164529996217, 'bagging_temperature': 0.4221089001918332, 'border_count': 84, 'scale_pos_weight': 3.0903998077103476}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  81%|████████  | 81/100 [57:53<04:07, 13.04s/it]

[I 2025-11-13 23:08:58,013] Trial 80 finished with value: 0.5694997307492556 and parameters: {'learning_rate': 0.041626231208317745, 'depth': 3, 'l2_leaf_reg': 0.7982917603977238, 'subsample': 0.6, 'random_strength': 0.15572335835228093, 'bagging_temperature': 0.3017191583515678, 'border_count': 125, 'scale_pos_weight': 1.5944199900349565}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 73. Best value: 0.593243:  82%|████████▏ | 82/100 [58:05<03:49, 12.75s/it]

[I 2025-11-13 23:09:10,105] Trial 81 finished with value: 0.5891028270465453 and parameters: {'learning_rate': 0.017610958788478865, 'depth': 3, 'l2_leaf_reg': 1.730847516560938, 'subsample': 0.5, 'random_strength': 0.2583913819900506, 'bagging_temperature': 0.46232027428520717, 'border_count': 114, 'scale_pos_weight': 2.004792584377607}. Best is trial 73 with value: 0.5932432825146656.


Best trial: 82. Best value: 0.593718:  83%|████████▎ | 83/100 [58:17<03:33, 12.54s/it]

[I 2025-11-13 23:09:22,153] Trial 82 finished with value: 0.5937182473977077 and parameters: {'learning_rate': 0.0146056237215288, 'depth': 3, 'l2_leaf_reg': 5.399340044266502, 'subsample': 0.5, 'random_strength': 0.3376358733478773, 'bagging_temperature': 0.4663247387696527, 'border_count': 135, 'scale_pos_weight': 2.2636440427560536}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  84%|████████▍ | 84/100 [58:32<03:33, 13.32s/it]

[I 2025-11-13 23:09:37,295] Trial 83 finished with value: 0.5923263133875445 and parameters: {'learning_rate': 0.014836786100734342, 'depth': 4, 'l2_leaf_reg': 5.555503431479726, 'subsample': 0.5, 'random_strength': 0.9056421004239095, 'bagging_temperature': 0.5285992030060938, 'border_count': 142, 'scale_pos_weight': 2.5763502544532613}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  85%|████████▌ | 85/100 [58:44<03:14, 12.94s/it]

[I 2025-11-13 23:09:49,358] Trial 84 finished with value: 0.5915279896784041 and parameters: {'learning_rate': 0.014471039720456488, 'depth': 3, 'l2_leaf_reg': 5.79986260892727, 'subsample': 0.5, 'random_strength': 0.6751310816159918, 'bagging_temperature': 0.519720952911783, 'border_count': 140, 'scale_pos_weight': 2.66894726376927}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  86%|████████▌ | 86/100 [58:59<03:10, 13.59s/it]

[I 2025-11-13 23:10:04,438] Trial 85 finished with value: 0.583557414682012 and parameters: {'learning_rate': 0.014499658210486302, 'depth': 4, 'l2_leaf_reg': 5.356318140812045, 'subsample': 0.5, 'random_strength': 0.6091698047468165, 'bagging_temperature': 0.5207849356943378, 'border_count': 147, 'scale_pos_weight': 3.22180117935675}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  87%|████████▋ | 87/100 [59:11<02:51, 13.19s/it]

[I 2025-11-13 23:10:16,696] Trial 86 finished with value: 0.5874591309958178 and parameters: {'learning_rate': 0.01879282927881638, 'depth': 3, 'l2_leaf_reg': 7.838305132809922, 'subsample': 0.5, 'random_strength': 0.8262506575938443, 'bagging_temperature': 0.7046497720665268, 'border_count': 135, 'scale_pos_weight': 2.947172697048448}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  88%|████████▊ | 88/100 [59:27<02:45, 13.77s/it]

[I 2025-11-13 23:10:31,841] Trial 87 finished with value: 0.5795930463740081 and parameters: {'learning_rate': 0.021119543751650063, 'depth': 4, 'l2_leaf_reg': 4.758911348810033, 'subsample': 0.5, 'random_strength': 0.42409793373121724, 'bagging_temperature': 0.480853048435359, 'border_count': 140, 'scale_pos_weight': 3.7697684920741232}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  89%|████████▉ | 89/100 [59:41<02:35, 14.13s/it]

[I 2025-11-13 23:10:46,792] Trial 88 finished with value: 0.5915687385842139 and parameters: {'learning_rate': 0.013578765884338035, 'depth': 4, 'l2_leaf_reg': 5.932768011122378, 'subsample': 0.5, 'random_strength': 0.9073549570753967, 'bagging_temperature': 0.6156938262264471, 'border_count': 164, 'scale_pos_weight': 2.5110007948472686}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  90%|█████████ | 90/100 [59:54<02:15, 13.53s/it]

[I 2025-11-13 23:10:58,924] Trial 89 finished with value: 0.5910332294447734 and parameters: {'learning_rate': 0.013330196996433032, 'depth': 3, 'l2_leaf_reg': 6.049129158728478, 'subsample': 0.5, 'random_strength': 0.9866012348356752, 'bagging_temperature': 0.6162009712427262, 'border_count': 152, 'scale_pos_weight': 2.7833429001106587}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  91%|█████████ | 91/100 [1:00:06<01:58, 13.17s/it]

[I 2025-11-13 23:11:11,255] Trial 90 finished with value: 0.5919363768687081 and parameters: {'learning_rate': 0.015538354860891694, 'depth': 3, 'l2_leaf_reg': 5.8066546084682775, 'subsample': 0.5, 'random_strength': 0.3353775928859485, 'bagging_temperature': 0.44084936682144654, 'border_count': 156, 'scale_pos_weight': 2.560172624867333}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  92%|█████████▏| 92/100 [1:00:18<01:42, 12.87s/it]

[I 2025-11-13 23:11:23,435] Trial 91 finished with value: 0.5909420444732679 and parameters: {'learning_rate': 0.014935169574084882, 'depth': 3, 'l2_leaf_reg': 5.959408654991819, 'subsample': 0.5, 'random_strength': 0.37483649227553456, 'bagging_temperature': 0.43969591273156877, 'border_count': 144, 'scale_pos_weight': 2.557429774600411}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  93%|█████████▎| 93/100 [1:00:30<01:28, 12.67s/it]

[I 2025-11-13 23:11:35,630] Trial 92 finished with value: 0.5928757911740101 and parameters: {'learning_rate': 0.01584682145340913, 'depth': 3, 'l2_leaf_reg': 7.96216320495846, 'subsample': 0.5, 'random_strength': 0.6251370669788568, 'bagging_temperature': 0.4057139916037779, 'border_count': 130, 'scale_pos_weight': 2.421619841573521}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  94%|█████████▍| 94/100 [1:00:43<01:15, 12.54s/it]

[I 2025-11-13 23:11:47,874] Trial 93 finished with value: 0.586408539571286 and parameters: {'learning_rate': 0.016025026738172844, 'depth': 3, 'l2_leaf_reg': 8.204299376531738, 'subsample': 0.5, 'random_strength': 0.1087351651550102, 'bagging_temperature': 0.5208563019198866, 'border_count': 128, 'scale_pos_weight': 1.8695412835553853}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  95%|█████████▌| 95/100 [1:00:55<01:01, 12.38s/it]

[I 2025-11-13 23:11:59,863] Trial 94 finished with value: 0.5932342695663224 and parameters: {'learning_rate': 0.010841423645444387, 'depth': 3, 'l2_leaf_reg': 4.838138643702556, 'subsample': 0.5, 'random_strength': 0.35406663550003115, 'bagging_temperature': 0.5589433836167238, 'border_count': 164, 'scale_pos_weight': 2.2988590280021537}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  96%|█████████▌| 96/100 [1:01:07<00:49, 12.37s/it]

[I 2025-11-13 23:12:12,228] Trial 95 finished with value: 0.5925458530302179 and parameters: {'learning_rate': 0.011063106420656008, 'depth': 3, 'l2_leaf_reg': 9.976282749666913, 'subsample': 0.5, 'random_strength': 0.3534859671145761, 'bagging_temperature': 0.6366943270519934, 'border_count': 165, 'scale_pos_weight': 2.350873910286636}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  97%|█████████▋| 97/100 [1:01:19<00:36, 12.31s/it]

[I 2025-11-13 23:12:24,400] Trial 96 finished with value: 0.5934580800404573 and parameters: {'learning_rate': 0.011183112376410573, 'depth': 3, 'l2_leaf_reg': 4.504957042687911, 'subsample': 0.6, 'random_strength': 0.06154071432261156, 'bagging_temperature': 0.412221652842261, 'border_count': 103, 'scale_pos_weight': 2.2211964524125825}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  98%|█████████▊| 98/100 [1:01:32<00:24, 12.37s/it]

[I 2025-11-13 23:12:36,901] Trial 97 finished with value: 0.5759571912949183 and parameters: {'learning_rate': 0.010792785073826429, 'depth': 3, 'l2_leaf_reg': 4.515891591654092, 'subsample': 0.6, 'random_strength': 0.05843989064174885, 'bagging_temperature': 0.2875937780278376, 'border_count': 106, 'scale_pos_weight': 1.5748536470838597}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718:  99%|█████████▉| 99/100 [1:01:44<00:12, 12.33s/it]

[I 2025-11-13 23:12:49,148] Trial 98 finished with value: 0.5908427909422572 and parameters: {'learning_rate': 0.010874779924323705, 'depth': 3, 'l2_leaf_reg': 9.683893934963763, 'subsample': 0.6, 'random_strength': 0.20682254620079887, 'bagging_temperature': 0.4052544869919372, 'border_count': 96, 'scale_pos_weight': 2.313413991257198}. Best is trial 82 with value: 0.5937182473977077.


Best trial: 82. Best value: 0.593718: 100%|██████████| 100/100 [1:01:56<00:00, 37.16s/it]

[I 2025-11-13 23:13:01,011] Trial 99 finished with value: 0.5845913474839703 and parameters: {'learning_rate': 0.012992137985040772, 'depth': 3, 'l2_leaf_reg': 3.575191698052905, 'subsample': 0.5, 'random_strength': 0.03129170062505502, 'bagging_temperature': 0.4094274372243147, 'border_count': 77, 'scale_pos_weight': 1.7797178386183823}. Best is trial 82 with value: 0.5937182473977077.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.5937
  Best Hyperparameters:
    learning_rate: 0.0146056237215288
    depth: 3
    l2_leaf_reg: 5.399340044266502
    subsample: 0.5
    random_strength: 0.3376358733478773
    bagging_temperature: 0.4663247387696527
    border_count: 135
    scale_pos_weight: 2.2636440427560536





In [11]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.0146056237215288, 'depth': 3, 'l2_leaf_reg': 5.399340044266502, 'subsample': 0.5, 'random_strength': 0.3376358733478773, 'bagging_temperature': 0.4663247387696527, 'border_count': 135, 'scale_pos_weight': 2.2636440427560536}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5782

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.89      0.80      0.85      4165
   Class 1.0       0.50      0.68      0.58      1235

    accuracy                           0.77      5400
   macro avg       0.70      0.74      0.71      5400
weighted avg       0.80      0.77      0.78      5400



In [12]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.5937_cv.cbm")

print("Done.")

Saving best_model...
Done.


In [14]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 161

Top 10 Most Important Features:
                        feature  importance
152  recovery_feasibility_score    9.306899
59              liab_prct_cubed    6.711657
53          multicar_x_highrisk    6.621012
58            liab_prct_squared    6.408799
60               liab_prct_sqrt    6.031098
47              liab_x_multicar    5.400718
13                    liab_prct    5.391125
62                 liab_inverse    5.281119
61                liab_prct_log    5.067079
67     is_multi_vehicle_unclear    4.585798

Bottom 10 Least Important Features:
                     feature  importance
57   witness_police_multicar         0.0
111       first_time_claimer         0.0
76      evidence_very_strong         0.0
99             heavy_vehicle         0.0
50          witness_x_police         0.0
42             police_binary         0.0
133              high_income         0.0
110         moderate_claimer         0.0
135               low

In [15]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [16]:
prediction.to_csv("results/catboost_5937_cv_prediction.csv", index=False)