In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split, StratifiedKFold, cross_val_score

from cc5_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [5]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [6]:
pre.fit(X, y)
X_proc = pre.transform(X)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


## CatBoost with Optuna Tuning

In [7]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [9]:


def objective(trial: optuna.trial.Trial) -> float:
    
    # 1. Define hyperparameters from Optuna
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        # --- THIS IS THE FIX ---
        # Add the static parameters (including cat_features) here
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'random_state': 42,
        'cat_features': CAT_FEATURES  # <-- Pass cat_features at initialization
    }

    # 2. Set up the model
    # The model now knows about cat_features from the start
    model = CatBoostClassifier(**params)
    
    # 3. Configure CV Splitter
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42) # 3-fold for speed

    # 4. Run CV on the TRAINING data
    # We no longer need 'fit_params' because the model already has the info
    f1_scores = cross_val_score(
        model,
        X_proc,
        y,
        cv=skf,
        scoring='f1'
    )
    
    # 5. Return the mean F1 from the folds
    return np.mean(f1_scores)

In [10]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-12 10:08:43,963] A new study created in memory with name: no-name-58900435-52e2-4174-be36-6c897c75d257



2. Starting Optuna study...


Best trial: 0. Best value: 0.478358:   1%|          | 1/100 [01:09<1:54:46, 69.56s/it]

[I 2025-11-12 10:09:53,527] Trial 0 finished with value: 0.4783576975191634 and parameters: {'learning_rate': 0.1979972206019883, 'depth': 9, 'l2_leaf_reg': 0.18697203111532762, 'subsample': 0.6, 'random_strength': 6.678003128126955e-06, 'bagging_temperature': 0.6468636251717107, 'border_count': 144, 'scale_pos_weight': 1.2195794316456112}. Best is trial 0 with value: 0.4783576975191634.


Best trial: 1. Best value: 0.573883:   2%|▏         | 2/100 [01:26<1:02:58, 38.56s/it]

[I 2025-11-12 10:10:10,388] Trial 1 finished with value: 0.5738827055797331 and parameters: {'learning_rate': 0.047359397337611624, 'depth': 4, 'l2_leaf_reg': 7.106405057753178, 'subsample': 0.6, 'random_strength': 3.571578397797617e-05, 'bagging_temperature': 0.41201235794309554, 'border_count': 245, 'scale_pos_weight': 4.588995406870287}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   3%|▎         | 3/100 [01:52<52:46, 32.65s/it]  

[I 2025-11-12 10:10:36,002] Trial 2 finished with value: 0.5675906224350532 and parameters: {'learning_rate': 0.05414790149653804, 'depth': 6, 'l2_leaf_reg': 1.9762174828578756, 'subsample': 0.7, 'random_strength': 5.919768988806869e-07, 'bagging_temperature': 0.5416036989770739, 'border_count': 84, 'scale_pos_weight': 3.4552677792715496}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   4%|▍         | 4/100 [02:12<44:23, 27.75s/it]

[I 2025-11-12 10:10:56,235] Trial 3 finished with value: 0.5338405817436578 and parameters: {'learning_rate': 0.1825607509555118, 'depth': 5, 'l2_leaf_reg': 0.20619731844205696, 'subsample': 0.8, 'random_strength': 0.0019996668336515807, 'bagging_temperature': 0.11875661980128593, 'border_count': 147, 'scale_pos_weight': 5.659740409021578}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   5%|▌         | 5/100 [02:45<46:50, 29.59s/it]

[I 2025-11-12 10:11:29,087] Trial 4 finished with value: 0.5503964562583811 and parameters: {'learning_rate': 0.03777658033618974, 'depth': 7, 'l2_leaf_reg': 0.002524982012017806, 'subsample': 0.7, 'random_strength': 1.71847760411685e-05, 'bagging_temperature': 0.42086501512565866, 'border_count': 74, 'scale_pos_weight': 7.5481745649401315}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   6%|▌         | 6/100 [02:58<37:36, 24.00s/it]

[I 2025-11-12 10:11:42,247] Trial 5 finished with value: 0.5522707797177705 and parameters: {'learning_rate': 0.2087531466790608, 'depth': 3, 'l2_leaf_reg': 0.37748212034354117, 'subsample': 0.7, 'random_strength': 0.0026149740690000573, 'bagging_temperature': 0.4406425021908983, 'border_count': 162, 'scale_pos_weight': 7.188163010676252}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   7%|▋         | 7/100 [03:23<37:47, 24.38s/it]

[I 2025-11-12 10:12:07,414] Trial 6 finished with value: 0.5450149147079534 and parameters: {'learning_rate': 0.010843273255260424, 'depth': 6, 'l2_leaf_reg': 0.029870777024379187, 'subsample': 0.9, 'random_strength': 0.118152464310231, 'bagging_temperature': 0.09430247926522628, 'border_count': 135, 'scale_pos_weight': 8.686457953584089}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 1. Best value: 0.573883:   8%|▊         | 8/100 [03:38<33:01, 21.54s/it]

[I 2025-11-12 10:12:22,852] Trial 7 finished with value: 0.5468717840650383 and parameters: {'learning_rate': 0.14028246470041097, 'depth': 4, 'l2_leaf_reg': 0.018599411922249683, 'subsample': 0.9, 'random_strength': 3.274549510748025e-07, 'bagging_temperature': 0.3642670837409886, 'border_count': 178, 'scale_pos_weight': 5.780521673938525}. Best is trial 1 with value: 0.5738827055797331.


Best trial: 8. Best value: 0.579009:   9%|▉         | 9/100 [04:49<56:00, 36.93s/it]

[I 2025-11-12 10:13:33,635] Trial 8 finished with value: 0.5790094314126616 and parameters: {'learning_rate': 0.0323673193722595, 'depth': 9, 'l2_leaf_reg': 7.935800929987702, 'subsample': 1.0, 'random_strength': 0.04496326315185521, 'bagging_temperature': 0.8175738563837394, 'border_count': 41, 'scale_pos_weight': 7.186958235291471}. Best is trial 8 with value: 0.5790094314126616.


Best trial: 9. Best value: 0.58312:  10%|█         | 10/100 [05:04<45:12, 30.14s/it]

[I 2025-11-12 10:13:48,561] Trial 9 finished with value: 0.5831204424990215 and parameters: {'learning_rate': 0.030349157879906795, 'depth': 4, 'l2_leaf_reg': 0.0072987857193819675, 'subsample': 0.9, 'random_strength': 2.1482438069078962e-08, 'bagging_temperature': 0.5674257896520676, 'border_count': 98, 'scale_pos_weight': 3.1541439715324624}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  11%|█         | 11/100 [05:16<36:13, 24.42s/it]

[I 2025-11-12 10:14:00,006] Trial 10 finished with value: 0.5760493728007909 and parameters: {'learning_rate': 0.015367927385152367, 'depth': 3, 'l2_leaf_reg': 0.001047631180715527, 'subsample': 1.0, 'random_strength': 1.0650470616556279e-08, 'bagging_temperature': 0.9795235430777316, 'border_count': 94, 'scale_pos_weight': 1.6097533383696754}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  12%|█▏        | 12/100 [06:57<1:10:17, 47.92s/it]

[I 2025-11-12 10:15:41,685] Trial 11 finished with value: 0.4875273977697425 and parameters: {'learning_rate': 0.025602057185755842, 'depth': 10, 'l2_leaf_reg': 0.008835670077498435, 'subsample': 1.0, 'random_strength': 0.21611526043282905, 'bagging_temperature': 0.8168857386576971, 'border_count': 41, 'scale_pos_weight': 3.1993102708357855}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  13%|█▎        | 13/100 [07:40<1:07:03, 46.24s/it]

[I 2025-11-12 10:16:24,065] Trial 12 finished with value: 0.5740182108307788 and parameters: {'learning_rate': 0.023704737355947546, 'depth': 8, 'l2_leaf_reg': 1.2878197326264214, 'subsample': 0.9, 'random_strength': 0.0007975194977697433, 'bagging_temperature': 0.749439926852676, 'border_count': 35, 'scale_pos_weight': 9.540149856127222}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  14%|█▍        | 14/100 [09:10<1:25:35, 59.72s/it]

[I 2025-11-12 10:17:54,918] Trial 13 finished with value: 0.5033539911935616 and parameters: {'learning_rate': 0.09692133147488535, 'depth': 10, 'l2_leaf_reg': 0.05577562875940612, 'subsample': 1.0, 'random_strength': 1.0373352872153193e-08, 'bagging_temperature': 0.9943857103809699, 'border_count': 114, 'scale_pos_weight': 3.320734375428821}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  15%|█▌        | 15/100 [09:59<1:19:44, 56.29s/it]

[I 2025-11-12 10:18:43,254] Trial 14 finished with value: 0.5190825112961465 and parameters: {'learning_rate': 0.08700883294005686, 'depth': 8, 'l2_leaf_reg': 0.007459852553663662, 'subsample': 0.8, 'random_strength': 0.014379797671559271, 'bagging_temperature': 0.7551927790356348, 'border_count': 60, 'scale_pos_weight': 6.757093222969395}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  16%|█▌        | 16/100 [10:32<1:09:06, 49.36s/it]

[I 2025-11-12 10:19:16,538] Trial 15 finished with value: 0.5802076176437915 and parameters: {'learning_rate': 0.026993950599653233, 'depth': 7, 'l2_leaf_reg': 9.008615087652506, 'subsample': 0.5, 'random_strength': 0.4997181382837521, 'bagging_temperature': 0.600435493023613, 'border_count': 199, 'scale_pos_weight': 4.485604538086864}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  17%|█▋        | 17/100 [10:53<56:17, 40.70s/it]  

[I 2025-11-12 10:19:37,081] Trial 16 finished with value: 0.5704441283387935 and parameters: {'learning_rate': 0.017870099891761037, 'depth': 5, 'l2_leaf_reg': 0.5332681648279493, 'subsample': 0.5, 'random_strength': 0.5624409258501686, 'bagging_temperature': 0.23004796057637517, 'border_count': 205, 'scale_pos_weight': 4.606220881031577}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  18%|█▊        | 18/100 [11:28<53:34, 39.20s/it]

[I 2025-11-12 10:20:12,783] Trial 17 finished with value: 0.49433855025291074 and parameters: {'learning_rate': 0.08482505879034628, 'depth': 7, 'l2_leaf_reg': 0.004554846920814696, 'subsample': 0.5, 'random_strength': 0.00023017483744172836, 'bagging_temperature': 0.5927842189720804, 'border_count': 206, 'scale_pos_weight': 2.4063992446843865}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 9. Best value: 0.58312:  19%|█▉        | 19/100 [11:49<45:14, 33.52s/it]

[I 2025-11-12 10:20:33,065] Trial 18 finished with value: 0.571443725722011 and parameters: {'learning_rate': 0.010335005962613053, 'depth': 5, 'l2_leaf_reg': 0.07291882112334826, 'subsample': 0.6, 'random_strength': 9.30635219376632e-07, 'bagging_temperature': 0.27925024597845144, 'border_count': 236, 'scale_pos_weight': 4.561503509464297}. Best is trial 9 with value: 0.5831204424990215.


Best trial: 19. Best value: 0.588967:  20%|██        | 20/100 [12:05<38:00, 28.50s/it]

[I 2025-11-12 10:20:49,884] Trial 19 finished with value: 0.5889666970299015 and parameters: {'learning_rate': 0.019385126792096998, 'depth': 4, 'l2_leaf_reg': 2.3009793288383413, 'subsample': 0.8, 'random_strength': 9.276763101027594e-08, 'bagging_temperature': 0.6602739461164272, 'border_count': 192, 'scale_pos_weight': 2.536999040713331}. Best is trial 19 with value: 0.5889666970299015.


Best trial: 20. Best value: 0.589072:  21%|██        | 21/100 [12:21<32:27, 24.65s/it]

[I 2025-11-12 10:21:05,546] Trial 20 finished with value: 0.5890723106551565 and parameters: {'learning_rate': 0.017217186353929746, 'depth': 4, 'l2_leaf_reg': 1.203907531894403, 'subsample': 0.8, 'random_strength': 8.090514556902086e-08, 'bagging_temperature': 0.6816713032895765, 'border_count': 111, 'scale_pos_weight': 2.3194656626236996}. Best is trial 20 with value: 0.5890723106551565.


Best trial: 20. Best value: 0.589072:  22%|██▏       | 22/100 [12:37<28:27, 21.89s/it]

[I 2025-11-12 10:21:21,005] Trial 21 finished with value: 0.5861296004527294 and parameters: {'learning_rate': 0.015490947939697616, 'depth': 4, 'l2_leaf_reg': 2.1335751270598804, 'subsample': 0.8, 'random_strength': 7.522722057241058e-08, 'bagging_temperature': 0.6721837978465394, 'border_count': 114, 'scale_pos_weight': 2.1877875550527492}. Best is trial 20 with value: 0.5890723106551565.


Best trial: 20. Best value: 0.589072:  23%|██▎       | 23/100 [12:49<24:27, 19.06s/it]

[I 2025-11-12 10:21:33,472] Trial 22 finished with value: 0.5882413788507824 and parameters: {'learning_rate': 0.017046985156175868, 'depth': 3, 'l2_leaf_reg': 2.0023657726599935, 'subsample': 0.8, 'random_strength': 8.769412878297096e-08, 'bagging_temperature': 0.7131674435115389, 'border_count': 121, 'scale_pos_weight': 1.9589908552746436}. Best is trial 20 with value: 0.5890723106551565.


Best trial: 23. Best value: 0.591231:  24%|██▍       | 24/100 [13:01<21:35, 17.05s/it]

[I 2025-11-12 10:21:45,834] Trial 23 finished with value: 0.5912313598727188 and parameters: {'learning_rate': 0.01698610378496628, 'depth': 3, 'l2_leaf_reg': 3.376967881895214, 'subsample': 0.8, 'random_strength': 3.3180642358254912e-06, 'bagging_temperature': 0.887823137052201, 'border_count': 121, 'scale_pos_weight': 2.305486072660985}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  25%|██▌       | 25/100 [13:14<19:41, 15.75s/it]

[I 2025-11-12 10:21:58,539] Trial 24 finished with value: 0.5333088427410844 and parameters: {'learning_rate': 0.02084780136708166, 'depth': 3, 'l2_leaf_reg': 0.8154433576036507, 'subsample': 0.8, 'random_strength': 2.6624200100802776e-06, 'bagging_temperature': 0.8292296926356177, 'border_count': 174, 'scale_pos_weight': 1.13099340758122}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  26%|██▌       | 26/100 [13:33<20:43, 16.80s/it]

[I 2025-11-12 10:22:17,787] Trial 25 finished with value: 0.5890585060440592 and parameters: {'learning_rate': 0.012742715325919383, 'depth': 5, 'l2_leaf_reg': 4.051945413229491, 'subsample': 0.7, 'random_strength': 1.149090978952854e-07, 'bagging_temperature': 0.8987606457310995, 'border_count': 131, 'scale_pos_weight': 2.654017109946521}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  27%|██▋       | 27/100 [13:52<21:15, 17.47s/it]

[I 2025-11-12 10:22:36,832] Trial 26 finished with value: 0.5779447496825525 and parameters: {'learning_rate': 0.014010526898338952, 'depth': 5, 'l2_leaf_reg': 4.145275013655899, 'subsample': 0.7, 'random_strength': 3.4261663681517564e-06, 'bagging_temperature': 0.9208145406456076, 'border_count': 124, 'scale_pos_weight': 3.902382045590831}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  28%|██▊       | 28/100 [14:17<23:29, 19.58s/it]

[I 2025-11-12 10:23:01,323] Trial 27 finished with value: 0.5891540996869744 and parameters: {'learning_rate': 0.012477419334352048, 'depth': 6, 'l2_leaf_reg': 4.2214410904102335, 'subsample': 0.7, 'random_strength': 2.1280514998255633e-07, 'bagging_temperature': 0.8986262277053021, 'border_count': 103, 'scale_pos_weight': 2.6374165231523845}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  29%|██▉       | 29/100 [14:40<24:32, 20.73s/it]

[I 2025-11-12 10:23:24,752] Trial 28 finished with value: 0.5520617767223817 and parameters: {'learning_rate': 0.03954144617223863, 'depth': 6, 'l2_leaf_reg': 0.9347595137431001, 'subsample': 0.6, 'random_strength': 1.4501790979769223e-06, 'bagging_temperature': 0.8643449732858021, 'border_count': 64, 'scale_pos_weight': 1.665833136081925}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  30%|███       | 30/100 [14:53<21:25, 18.36s/it]

[I 2025-11-12 10:23:37,566] Trial 29 finished with value: 0.5067622745237923 and parameters: {'learning_rate': 0.012053147058466456, 'depth': 3, 'l2_leaf_reg': 0.23502922715374236, 'subsample': 0.9, 'random_strength': 8.938682937034273e-06, 'bagging_temperature': 0.9539763049777458, 'border_count': 150, 'scale_pos_weight': 1.020828499281305}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  31%|███       | 31/100 [15:09<20:10, 17.54s/it]

[I 2025-11-12 10:23:53,201] Trial 30 finished with value: 0.5348209466242122 and parameters: {'learning_rate': 0.27949822386150147, 'depth': 4, 'l2_leaf_reg': 5.066939237407143, 'subsample': 0.7, 'random_strength': 9.081880471098139e-05, 'bagging_temperature': 0.7691598277575927, 'border_count': 99, 'scale_pos_weight': 3.718848317583315}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  32%|███▏      | 32/100 [15:27<20:15, 17.87s/it]

[I 2025-11-12 10:24:11,837] Trial 31 finished with value: 0.5883912560331818 and parameters: {'learning_rate': 0.013135208921079576, 'depth': 5, 'l2_leaf_reg': 3.7586659797807878, 'subsample': 0.7, 'random_strength': 2.2980331925536492e-07, 'bagging_temperature': 0.8790541939580062, 'border_count': 132, 'scale_pos_weight': 2.6839327806576567}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  33%|███▎      | 33/100 [15:53<22:23, 20.05s/it]

[I 2025-11-12 10:24:36,980] Trial 32 finished with value: 0.5845534133794087 and parameters: {'learning_rate': 0.020637000981754423, 'depth': 6, 'l2_leaf_reg': 3.3304019309049364, 'subsample': 0.6, 'random_strength': 4.246224914645629e-08, 'bagging_temperature': 0.9028510861735307, 'border_count': 109, 'scale_pos_weight': 2.562091144968248}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  34%|███▍      | 34/100 [16:07<20:18, 18.47s/it]

[I 2025-11-12 10:24:51,752] Trial 33 finished with value: 0.5752342777209197 and parameters: {'learning_rate': 0.012432720160762564, 'depth': 4, 'l2_leaf_reg': 1.07180566139926, 'subsample': 0.7, 'random_strength': 2.1968509974357223e-07, 'bagging_temperature': 0.9051992181536965, 'border_count': 83, 'scale_pos_weight': 4.074127599770839}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  35%|███▌      | 35/100 [16:26<20:09, 18.60s/it]

[I 2025-11-12 10:25:10,678] Trial 34 finished with value: 0.5874719341317552 and parameters: {'learning_rate': 0.010531422795313617, 'depth': 5, 'l2_leaf_reg': 5.4934214544463895, 'subsample': 0.8, 'random_strength': 5.364933542408276e-07, 'bagging_temperature': 0.5134389008666167, 'border_count': 141, 'scale_pos_weight': 2.907232494711061}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  36%|███▌      | 36/100 [16:51<21:55, 20.55s/it]

[I 2025-11-12 10:25:35,782] Trial 35 finished with value: 0.527116026021317 and parameters: {'learning_rate': 0.06531169843248573, 'depth': 6, 'l2_leaf_reg': 0.48067338972828216, 'subsample': 0.6, 'random_strength': 3.8019181309360784e-05, 'bagging_temperature': 0.8027713266508604, 'border_count': 162, 'scale_pos_weight': 1.6898168685217367}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  37%|███▋      | 37/100 [17:13<21:55, 20.89s/it]

[I 2025-11-12 10:25:57,440] Trial 36 finished with value: 0.5644382090915855 and parameters: {'learning_rate': 0.015125672052550247, 'depth': 5, 'l2_leaf_reg': 9.778019048842907, 'subsample': 0.7, 'random_strength': 3.1033440960957358e-06, 'bagging_temperature': 0.6776423589866373, 'border_count': 81, 'scale_pos_weight': 5.219464086073998}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  38%|███▊      | 38/100 [17:27<19:27, 18.84s/it]

[I 2025-11-12 10:26:11,495] Trial 37 finished with value: 0.5652071287121301 and parameters: {'learning_rate': 0.0458295152968045, 'depth': 3, 'l2_leaf_reg': 2.7861044434150823, 'subsample': 0.8, 'random_strength': 3.306161631449007e-08, 'bagging_temperature': 0.9389473775854277, 'border_count': 152, 'scale_pos_weight': 5.187772144958252}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  39%|███▉      | 39/100 [17:43<18:15, 17.96s/it]

[I 2025-11-12 10:26:27,420] Trial 38 finished with value: 0.5848344781184652 and parameters: {'learning_rate': 0.022889926218255755, 'depth': 4, 'l2_leaf_reg': 1.4651553503644008, 'subsample': 0.7, 'random_strength': 1.984709237306061e-07, 'bagging_temperature': 0.8530697064225865, 'border_count': 124, 'scale_pos_weight': 2.023470253526159}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  40%|████      | 40/100 [18:11<21:04, 21.08s/it]

[I 2025-11-12 10:26:55,778] Trial 39 finished with value: 0.57976656659875 and parameters: {'learning_rate': 0.010134828109003556, 'depth': 6, 'l2_leaf_reg': 0.31617413751036777, 'subsample': 0.7, 'random_strength': 2.0735363047046365e-05, 'bagging_temperature': 0.7263268774513565, 'border_count': 107, 'scale_pos_weight': 3.652134465271423}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  41%|████      | 41/100 [18:43<23:57, 24.36s/it]

[I 2025-11-12 10:27:27,797] Trial 40 finished with value: 0.5226222165662786 and parameters: {'learning_rate': 0.03488503833687245, 'depth': 7, 'l2_leaf_reg': 0.1268821701686469, 'subsample': 0.8, 'random_strength': 1.2970292700291647e-06, 'bagging_temperature': 0.4548298824031854, 'border_count': 67, 'scale_pos_weight': 1.485526642572458}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  42%|████▏     | 42/100 [19:00<21:24, 22.14s/it]

[I 2025-11-12 10:27:44,765] Trial 41 finished with value: 0.5899013384179438 and parameters: {'learning_rate': 0.01724360894596843, 'depth': 4, 'l2_leaf_reg': 1.8773721328526622, 'subsample': 0.8, 'random_strength': 1.0596126814275699e-07, 'bagging_temperature': 0.6402166065670809, 'border_count': 255, 'scale_pos_weight': 2.837626556145625}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  43%|████▎     | 43/100 [19:17<19:30, 20.54s/it]

[I 2025-11-12 10:28:01,553] Trial 42 finished with value: 0.5863062245180078 and parameters: {'learning_rate': 0.01737149914516329, 'depth': 4, 'l2_leaf_reg': 0.7294827954389431, 'subsample': 0.9, 'random_strength': 4.450987061364409e-07, 'bagging_temperature': 0.02246871380004234, 'border_count': 133, 'scale_pos_weight': 2.8132580184414326}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  44%|████▍     | 44/100 [19:30<17:01, 18.24s/it]

[I 2025-11-12 10:28:14,432] Trial 43 finished with value: 0.5879445687715039 and parameters: {'learning_rate': 0.012890904829777857, 'depth': 3, 'l2_leaf_reg': 5.621480170330246, 'subsample': 0.8, 'random_strength': 1.0708321629848783e-07, 'bagging_temperature': 0.7876366764430255, 'border_count': 251, 'scale_pos_weight': 2.13893567586701}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  45%|████▌     | 45/100 [19:50<17:08, 18.71s/it]

[I 2025-11-12 10:28:34,235] Trial 44 finished with value: 0.5813336008324121 and parameters: {'learning_rate': 0.029261488833368394, 'depth': 5, 'l2_leaf_reg': 1.5691662392937702, 'subsample': 0.7, 'random_strength': 2.4470255392903193e-08, 'bagging_temperature': 0.6266427502787019, 'border_count': 232, 'scale_pos_weight': 3.0792696852291477}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  46%|████▌     | 46/100 [20:08<16:41, 18.54s/it]

[I 2025-11-12 10:28:52,388] Trial 45 finished with value: 0.5519739901508695 and parameters: {'learning_rate': 0.015384485399753853, 'depth': 4, 'l2_leaf_reg': 3.4157856870564305, 'subsample': 0.9, 'random_strength': 6.302660927997439e-06, 'bagging_temperature': 0.5458929230854184, 'border_count': 98, 'scale_pos_weight': 6.265259248580149}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  47%|████▋     | 47/100 [20:23<15:34, 17.63s/it]

[I 2025-11-12 10:29:07,881] Trial 46 finished with value: 0.5378431939406442 and parameters: {'learning_rate': 0.022694908962718283, 'depth': 4, 'l2_leaf_reg': 5.932557535269641, 'subsample': 0.8, 'random_strength': 5.947901131745537e-07, 'bagging_temperature': 0.3498956804905604, 'border_count': 143, 'scale_pos_weight': 8.203333371436447}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  48%|████▊     | 48/100 [20:41<15:21, 17.72s/it]

[I 2025-11-12 10:29:25,801] Trial 47 finished with value: 0.5836866005446154 and parameters: {'learning_rate': 0.011769737513707574, 'depth': 5, 'l2_leaf_reg': 1.6481298398174542, 'subsample': 0.7, 'random_strength': 3.475517043664105e-08, 'bagging_temperature': 0.7085297686133182, 'border_count': 89, 'scale_pos_weight': 3.267326551972428}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  49%|████▉     | 49/100 [20:53<13:29, 15.87s/it]

[I 2025-11-12 10:29:37,361] Trial 48 finished with value: 0.5655037074097569 and parameters: {'learning_rate': 0.01878071663208612, 'depth': 3, 'l2_leaf_reg': 0.5493864462056359, 'subsample': 0.8, 'random_strength': 1.0161362090661894e-08, 'bagging_temperature': 0.9968841929126011, 'border_count': 53, 'scale_pos_weight': 1.4401804900523278}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  50%|█████     | 50/100 [21:18<15:34, 18.69s/it]

[I 2025-11-12 10:30:02,647] Trial 49 finished with value: 0.5796644025185623 and parameters: {'learning_rate': 0.02601521055969337, 'depth': 6, 'l2_leaf_reg': 6.718529105094235, 'subsample': 0.9, 'random_strength': 2.5571301836036084e-07, 'bagging_temperature': 0.48598210709871736, 'border_count': 105, 'scale_pos_weight': 2.3011255463166735}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  51%|█████     | 51/100 [21:31<13:46, 16.86s/it]

[I 2025-11-12 10:30:15,228] Trial 50 finished with value: 0.5679518775229586 and parameters: {'learning_rate': 0.14828851657252354, 'depth': 3, 'l2_leaf_reg': 2.7769234304537913, 'subsample': 0.7, 'random_strength': 5.977685461079201e-08, 'bagging_temperature': 0.8566889152988175, 'border_count': 161, 'scale_pos_weight': 4.188311878519139}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  52%|█████▏    | 52/100 [21:48<13:29, 16.87s/it]

[I 2025-11-12 10:30:32,108] Trial 51 finished with value: 0.5877946776491423 and parameters: {'learning_rate': 0.020023830464814107, 'depth': 4, 'l2_leaf_reg': 2.8088254440217555, 'subsample': 0.8, 'random_strength': 1.1758769494516872e-07, 'bagging_temperature': 0.6399582194505743, 'border_count': 220, 'scale_pos_weight': 2.7787045619814466}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  53%|█████▎    | 53/100 [22:05<13:19, 17.01s/it]

[I 2025-11-12 10:30:49,459] Trial 52 finished with value: 0.5786099547970053 and parameters: {'learning_rate': 0.014154376754958616, 'depth': 4, 'l2_leaf_reg': 2.1041632286008283, 'subsample': 0.8, 'random_strength': 1.9202505265059414e-08, 'bagging_temperature': 0.6760281769958028, 'border_count': 189, 'scale_pos_weight': 3.480859064788303}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  54%|█████▍    | 54/100 [22:21<12:45, 16.65s/it]

[I 2025-11-12 10:31:05,255] Trial 53 finished with value: 0.5899844588154056 and parameters: {'learning_rate': 0.01684297398033219, 'depth': 4, 'l2_leaf_reg': 1.0774512765047999, 'subsample': 0.8, 'random_strength': 1.2947808308836338e-07, 'bagging_temperature': 0.7489590904469581, 'border_count': 121, 'scale_pos_weight': 2.5122508779197954}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  55%|█████▌    | 55/100 [22:41<13:16, 17.70s/it]

[I 2025-11-12 10:31:25,426] Trial 54 finished with value: 0.5788462877597923 and parameters: {'learning_rate': 0.017731277234168483, 'depth': 5, 'l2_leaf_reg': 1.1111330813398297, 'subsample': 0.9, 'random_strength': 1.3227404731204926e-06, 'bagging_temperature': 0.7495792287788556, 'border_count': 120, 'scale_pos_weight': 1.9032524109228373}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  56%|█████▌    | 56/100 [22:57<12:31, 17.07s/it]

[I 2025-11-12 10:31:41,013] Trial 55 finished with value: 0.5899716749528048 and parameters: {'learning_rate': 0.015992036419334957, 'depth': 4, 'l2_leaf_reg': 0.6372496061720846, 'subsample': 0.8, 'random_strength': 1.3076122670275382e-07, 'bagging_temperature': 0.9540623903591333, 'border_count': 131, 'scale_pos_weight': 2.4164401203121937}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  57%|█████▋    | 57/100 [23:42<18:25, 25.70s/it]

[I 2025-11-12 10:32:26,854] Trial 56 finished with value: 0.5701080880614248 and parameters: {'learning_rate': 0.016432615229489102, 'depth': 8, 'l2_leaf_reg': 0.3207587041968969, 'subsample': 0.8, 'random_strength': 6.365822430374429e-07, 'bagging_temperature': 0.9595774489282445, 'border_count': 139, 'scale_pos_weight': 2.391426277990006}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  58%|█████▊    | 58/100 [23:56<15:21, 21.93s/it]

[I 2025-11-12 10:32:40,000] Trial 57 finished with value: 0.55513710200204 and parameters: {'learning_rate': 0.023903400419305595, 'depth': 3, 'l2_leaf_reg': 0.6220603351978283, 'subsample': 0.9, 'random_strength': 1.7752631690831793e-07, 'bagging_temperature': 0.5933312010713793, 'border_count': 174, 'scale_pos_weight': 1.3304772069687245}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  59%|█████▉    | 59/100 [24:09<13:11, 19.30s/it]

[I 2025-11-12 10:32:53,152] Trial 58 finished with value: 0.5827184368414643 and parameters: {'learning_rate': 0.029018552997987026, 'depth': 3, 'l2_leaf_reg': 0.15910073105699984, 'subsample': 0.8, 'random_strength': 5.237633579978583e-08, 'bagging_temperature': 0.8126557695862893, 'border_count': 113, 'scale_pos_weight': 3.1993820806785633}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  60%|██████    | 60/100 [25:25<24:21, 36.53s/it]

[I 2025-11-12 10:34:09,883] Trial 59 finished with value: 0.5721975569878861 and parameters: {'learning_rate': 0.01440064566910929, 'depth': 9, 'l2_leaf_reg': 1.3378045537176833, 'subsample': 0.8, 'random_strength': 0.004156386834733601, 'bagging_temperature': 0.7154457051446899, 'border_count': 156, 'scale_pos_weight': 9.612410864378681}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  61%|██████    | 61/100 [25:44<20:17, 31.21s/it]

[I 2025-11-12 10:34:28,689] Trial 60 finished with value: 0.5838340278177475 and parameters: {'learning_rate': 0.01134882783664049, 'depth': 4, 'l2_leaf_reg': 0.44536361595225615, 'subsample': 0.8, 'random_strength': 1.904081319278573e-08, 'bagging_temperature': 0.8354516931175164, 'border_count': 74, 'scale_pos_weight': 1.848527470305213}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  62%|██████▏   | 62/100 [26:03<17:29, 27.63s/it]

[I 2025-11-12 10:34:47,954] Trial 61 finished with value: 0.588759303262203 and parameters: {'learning_rate': 0.013285342989144517, 'depth': 5, 'l2_leaf_reg': 3.9641207534030785, 'subsample': 0.7, 'random_strength': 1.3056041586930424e-07, 'bagging_temperature': 0.887588701118805, 'border_count': 130, 'scale_pos_weight': 2.397294254316175}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  63%|██████▎   | 63/100 [26:19<14:52, 24.11s/it]

[I 2025-11-12 10:35:03,870] Trial 62 finished with value: 0.589508325194553 and parameters: {'learning_rate': 0.015913516023026653, 'depth': 4, 'l2_leaf_reg': 0.8091905367850111, 'subsample': 0.8, 'random_strength': 5.918966556392666e-08, 'bagging_temperature': 0.9414495519447954, 'border_count': 117, 'scale_pos_weight': 2.7028436890670964}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  64%|██████▍   | 64/100 [26:36<13:11, 21.99s/it]

[I 2025-11-12 10:35:20,888] Trial 63 finished with value: 0.5848888980495305 and parameters: {'learning_rate': 0.021460192329147754, 'depth': 4, 'l2_leaf_reg': 0.8259324428668562, 'subsample': 0.8, 'random_strength': 3.480870918107306e-07, 'bagging_temperature': 0.9649680315934832, 'border_count': 118, 'scale_pos_weight': 3.0930755394914993}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  65%|██████▌   | 65/100 [26:53<11:53, 20.38s/it]

[I 2025-11-12 10:35:37,517] Trial 64 finished with value: 0.5792608084387701 and parameters: {'learning_rate': 0.0161600636959618, 'depth': 4, 'l2_leaf_reg': 0.03351420497721953, 'subsample': 0.8, 'random_strength': 6.257157734659142e-08, 'bagging_temperature': 0.9292800047005336, 'border_count': 90, 'scale_pos_weight': 3.5919603620725344}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  66%|██████▌   | 66/100 [27:06<10:20, 18.25s/it]

[I 2025-11-12 10:35:50,791] Trial 65 finished with value: 0.588584081189731 and parameters: {'learning_rate': 0.018667899422865397, 'depth': 3, 'l2_leaf_reg': 0.25913681652945203, 'subsample': 0.8, 'random_strength': 2.055137420963775e-06, 'bagging_temperature': 0.786173764361666, 'border_count': 102, 'scale_pos_weight': 2.1650452759535286}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  67%|██████▋   | 67/100 [27:24<09:54, 18.00s/it]

[I 2025-11-12 10:36:08,216] Trial 66 finished with value: 0.5856199108524553 and parameters: {'learning_rate': 0.025065644629734686, 'depth': 4, 'l2_leaf_reg': 1.8833008667074096, 'subsample': 0.9, 'random_strength': 7.42148153484108e-07, 'bagging_temperature': 0.9911935569295419, 'border_count': 113, 'scale_pos_weight': 2.920876830340561}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  68%|██████▊   | 68/100 [27:37<08:52, 16.65s/it]

[I 2025-11-12 10:36:21,728] Trial 67 finished with value: 0.5822573916463378 and parameters: {'learning_rate': 0.011582936364515479, 'depth': 3, 'l2_leaf_reg': 0.9599720776733696, 'subsample': 0.8, 'random_strength': 1.6165401592177143e-08, 'bagging_temperature': 0.940406707881457, 'border_count': 127, 'scale_pos_weight': 1.7359980843796066}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  69%|██████▉   | 69/100 [27:53<08:31, 16.48s/it]

[I 2025-11-12 10:36:37,816] Trial 68 finished with value: 0.5753441468713784 and parameters: {'learning_rate': 0.05925760826815591, 'depth': 4, 'l2_leaf_reg': 1.2169512812849943, 'subsample': 0.9, 'random_strength': 6.9037943342504954e-06, 'bagging_temperature': 0.8430004323277109, 'border_count': 145, 'scale_pos_weight': 2.580306346372046}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  70%|███████   | 70/100 [28:14<08:48, 17.63s/it]

[I 2025-11-12 10:36:58,108] Trial 69 finished with value: 0.5765031584612431 and parameters: {'learning_rate': 0.017097085136850047, 'depth': 5, 'l2_leaf_reg': 0.6488211087527305, 'subsample': 0.8, 'random_strength': 3.2982690134906245e-07, 'bagging_temperature': 0.8843291780346509, 'border_count': 94, 'scale_pos_weight': 3.8890264335292453}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  71%|███████   | 71/100 [29:08<13:48, 28.56s/it]

[I 2025-11-12 10:37:52,192] Trial 70 finished with value: 0.5845881708547185 and parameters: {'learning_rate': 0.01423923954232697, 'depth': 8, 'l2_leaf_reg': 2.1101597863137207, 'subsample': 0.8, 'random_strength': 2.972130181259298e-08, 'bagging_temperature': 0.7667927511580792, 'border_count': 135, 'scale_pos_weight': 4.343627110178038}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  72%|███████▏  | 72/100 [29:30<12:24, 26.59s/it]

[I 2025-11-12 10:38:14,181] Trial 71 finished with value: 0.588441879388706 and parameters: {'learning_rate': 0.013165600262595539, 'depth': 5, 'l2_leaf_reg': 7.874581605135835, 'subsample': 0.7, 'random_strength': 8.257621074917021e-08, 'bagging_temperature': 0.9123245783017112, 'border_count': 118, 'scale_pos_weight': 2.6869052103863944}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  73%|███████▎  | 73/100 [29:57<12:06, 26.92s/it]

[I 2025-11-12 10:38:41,875] Trial 72 finished with value: 0.5801192884366104 and parameters: {'learning_rate': 0.019532802068954566, 'depth': 6, 'l2_leaf_reg': 4.191648213605072, 'subsample': 0.7, 'random_strength': 1.4825932418459533e-07, 'bagging_temperature': 0.8694737734395757, 'border_count': 108, 'scale_pos_weight': 2.077143625033112}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  74%|███████▍  | 74/100 [30:13<10:12, 23.55s/it]

[I 2025-11-12 10:38:57,569] Trial 73 finished with value: 0.5809183038002076 and parameters: {'learning_rate': 0.015308974369749855, 'depth': 4, 'l2_leaf_reg': 2.6966424147944683, 'subsample': 0.7, 'random_strength': 4.766510976211002e-08, 'bagging_temperature': 0.9676700642006008, 'border_count': 138, 'scale_pos_weight': 3.42211066900816}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  75%|███████▌  | 75/100 [30:35<09:35, 23.00s/it]

[I 2025-11-12 10:39:19,278] Trial 74 finished with value: 0.5850039756867734 and parameters: {'learning_rate': 0.011000603880056442, 'depth': 5, 'l2_leaf_reg': 0.0010084862396239178, 'subsample': 0.8, 'random_strength': 0.0002271034968539119, 'bagging_temperature': 0.909614924330063, 'border_count': 125, 'scale_pos_weight': 2.8866086728554525}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  76%|███████▌  | 76/100 [30:52<08:33, 21.39s/it]

[I 2025-11-12 10:39:36,914] Trial 75 finished with value: 0.5412963456276553 and parameters: {'learning_rate': 0.012148458191350455, 'depth': 4, 'l2_leaf_reg': 1.5929123912026228, 'subsample': 0.6, 'random_strength': 3.546871524443729e-07, 'bagging_temperature': 0.821033877122354, 'border_count': 129, 'scale_pos_weight': 1.2018794405475626}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 23. Best value: 0.591231:  77%|███████▋  | 77/100 [31:12<07:58, 20.81s/it]

[I 2025-11-12 10:39:56,380] Trial 76 finished with value: 0.5344535503326328 and parameters: {'learning_rate': 0.015992221573388678, 'depth': 5, 'l2_leaf_reg': 0.39201978873167426, 'subsample': 0.7, 'random_strength': 1.2534815215587327e-07, 'bagging_temperature': 0.6238785764037298, 'border_count': 114, 'scale_pos_weight': 9.939933084985547}. Best is trial 23 with value: 0.5912313598727188.


Best trial: 77. Best value: 0.592151:  78%|███████▊  | 78/100 [31:30<07:21, 20.05s/it]

[I 2025-11-12 10:40:14,649] Trial 77 finished with value: 0.5921506942035311 and parameters: {'learning_rate': 0.010187778992423046, 'depth': 4, 'l2_leaf_reg': 4.7768640731726455, 'subsample': 0.8, 'random_strength': 7.470999613751303e-08, 'bagging_temperature': 0.5691931351789441, 'border_count': 103, 'scale_pos_weight': 2.3973369442371877}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  79%|███████▉  | 79/100 [31:45<06:28, 18.50s/it]

[I 2025-11-12 10:40:29,543] Trial 78 finished with value: 0.592009226816158 and parameters: {'learning_rate': 0.010395169580796245, 'depth': 3, 'l2_leaf_reg': 4.754784170012402, 'subsample': 0.8, 'random_strength': 1.4656222511566141e-08, 'bagging_temperature': 0.566648441047734, 'border_count': 79, 'scale_pos_weight': 2.3771722513829165}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  80%|████████  | 80/100 [32:00<05:47, 17.35s/it]

[I 2025-11-12 10:40:44,207] Trial 79 finished with value: 0.576114438689468 and parameters: {'learning_rate': 0.010182520302360153, 'depth': 3, 'l2_leaf_reg': 4.655963234149962, 'subsample': 0.8, 'random_strength': 1.498831800798089e-08, 'bagging_temperature': 0.5279992596660499, 'border_count': 74, 'scale_pos_weight': 1.597237963107303}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  81%|████████  | 81/100 [32:12<04:59, 15.75s/it]

[I 2025-11-12 10:40:56,234] Trial 80 finished with value: 0.5903765438791888 and parameters: {'learning_rate': 0.011010366968846427, 'depth': 3, 'l2_leaf_reg': 6.064615984112751, 'subsample': 0.8, 'random_strength': 2.6439520880946627e-08, 'bagging_temperature': 0.5622732284296622, 'border_count': 82, 'scale_pos_weight': 2.4815167970585676}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  82%|████████▏ | 82/100 [32:24<04:22, 14.59s/it]

[I 2025-11-12 10:41:08,094] Trial 81 finished with value: 0.5899716710621514 and parameters: {'learning_rate': 0.011046017109904846, 'depth': 3, 'l2_leaf_reg': 7.9188804756130775, 'subsample': 0.8, 'random_strength': 3.3204023801792165e-08, 'bagging_temperature': 0.5677345877877011, 'border_count': 78, 'scale_pos_weight': 2.4416424228379126}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  83%|████████▎ | 83/100 [32:37<04:02, 14.27s/it]

[I 2025-11-12 10:41:21,627] Trial 82 finished with value: 0.5902619242029971 and parameters: {'learning_rate': 0.010148955797642936, 'depth': 3, 'l2_leaf_reg': 8.079871112702335, 'subsample': 0.8, 'random_strength': 3.437867428541621e-08, 'bagging_temperature': 0.5770608409120049, 'border_count': 54, 'scale_pos_weight': 2.4153543534340893}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  84%|████████▍ | 84/100 [32:52<03:49, 14.34s/it]

[I 2025-11-12 10:41:36,130] Trial 83 finished with value: 0.5907752229062073 and parameters: {'learning_rate': 0.010900790710692746, 'depth': 3, 'l2_leaf_reg': 7.745975253652325, 'subsample': 0.8, 'random_strength': 2.7896888890035687e-08, 'bagging_temperature': 0.5693111495036078, 'border_count': 33, 'scale_pos_weight': 2.2965245349617085}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  85%|████████▌ | 85/100 [33:05<03:30, 14.01s/it]

[I 2025-11-12 10:41:49,370] Trial 84 finished with value: 0.5901181793109339 and parameters: {'learning_rate': 0.01080253432951266, 'depth': 3, 'l2_leaf_reg': 9.999841272360154, 'subsample': 0.8, 'random_strength': 1.1065932920048032e-08, 'bagging_temperature': 0.5664390526927717, 'border_count': 52, 'scale_pos_weight': 1.9362515768551527}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  86%|████████▌ | 86/100 [33:18<03:11, 13.68s/it]

[I 2025-11-12 10:42:02,296] Trial 85 finished with value: 0.5895111735472511 and parameters: {'learning_rate': 0.010003457912608354, 'depth': 3, 'l2_leaf_reg': 8.984901755490123, 'subsample': 0.8, 'random_strength': 1.1929451065325434e-08, 'bagging_temperature': 0.4909285198252763, 'border_count': 47, 'scale_pos_weight': 1.9152453958348707}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  87%|████████▋ | 87/100 [33:32<03:00, 13.89s/it]

[I 2025-11-12 10:42:16,660] Trial 86 finished with value: 0.591889535117112 and parameters: {'learning_rate': 0.010815791733297015, 'depth': 3, 'l2_leaf_reg': 9.82844984619115, 'subsample': 0.8, 'random_strength': 2.972201972516655e-08, 'bagging_temperature': 0.568591035119959, 'border_count': 33, 'scale_pos_weight': 2.1725563864062973}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  88%|████████▊ | 88/100 [33:46<02:47, 13.99s/it]

[I 2025-11-12 10:42:30,871] Trial 87 finished with value: 0.5049600638022643 and parameters: {'learning_rate': 0.011230156181935435, 'depth': 3, 'l2_leaf_reg': 6.374509805053506, 'subsample': 0.9, 'random_strength': 2.3825343867222924e-08, 'bagging_temperature': 0.5614136765624094, 'border_count': 34, 'scale_pos_weight': 1.0007408838757992}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  89%|████████▉ | 89/100 [34:01<02:35, 14.09s/it]

[I 2025-11-12 10:42:45,209] Trial 88 finished with value: 0.5898384361711994 and parameters: {'learning_rate': 0.012180852274775455, 'depth': 3, 'l2_leaf_reg': 7.311680526204383, 'subsample': 0.8, 'random_strength': 1.4125047058816448e-08, 'bagging_temperature': 0.4612424244105744, 'border_count': 45, 'scale_pos_weight': 2.239748758178671}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  90%|█████████ | 90/100 [34:14<02:17, 13.72s/it]

[I 2025-11-12 10:42:58,048] Trial 89 finished with value: 0.5662376962008839 and parameters: {'learning_rate': 0.010795333822352406, 'depth': 3, 'l2_leaf_reg': 5.121335220003513, 'subsample': 0.8, 'random_strength': 1.019281791615871e-08, 'bagging_temperature': 0.4012857176736605, 'border_count': 55, 'scale_pos_weight': 1.4416282687336748}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  91%|█████████ | 91/100 [34:26<01:59, 13.30s/it]

[I 2025-11-12 10:43:10,366] Trial 90 finished with value: 0.5839076999382343 and parameters: {'learning_rate': 0.013532609222066559, 'depth': 3, 'l2_leaf_reg': 9.292662684956088, 'subsample': 0.8, 'random_strength': 3.824076531284655e-08, 'bagging_temperature': 0.5719454504907007, 'border_count': 40, 'scale_pos_weight': 1.7980455608696566}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  92%|█████████▏| 92/100 [34:41<01:50, 13.76s/it]

[I 2025-11-12 10:43:25,207] Trial 91 finished with value: 0.5909766210151969 and parameters: {'learning_rate': 0.012345137320459651, 'depth': 3, 'l2_leaf_reg': 6.460506739194542, 'subsample': 0.8, 'random_strength': 2.4169326095134007e-08, 'bagging_temperature': 0.6140758085187425, 'border_count': 32, 'scale_pos_weight': 2.079276580100468}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  93%|█████████▎| 93/100 [34:55<01:36, 13.81s/it]

[I 2025-11-12 10:43:39,147] Trial 92 finished with value: 0.5915297436227546 and parameters: {'learning_rate': 0.010006104881551752, 'depth': 3, 'l2_leaf_reg': 6.41133245483916, 'subsample': 0.8, 'random_strength': 2.363611477227039e-08, 'bagging_temperature': 0.6124855378428941, 'border_count': 32, 'scale_pos_weight': 2.0692143643321255}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  94%|█████████▍| 94/100 [35:07<01:20, 13.49s/it]

[I 2025-11-12 10:43:51,867] Trial 93 finished with value: 0.5920350934863033 and parameters: {'learning_rate': 0.01020149705643866, 'depth': 3, 'l2_leaf_reg': 6.210078907109561, 'subsample': 0.8, 'random_strength': 2.4012618790121292e-08, 'bagging_temperature': 0.5221727764958304, 'border_count': 38, 'scale_pos_weight': 2.103772764381711}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  95%|█████████▌| 95/100 [35:21<01:07, 13.53s/it]

[I 2025-11-12 10:44:05,485] Trial 94 finished with value: 0.5897704546911018 and parameters: {'learning_rate': 0.012438536484065477, 'depth': 3, 'l2_leaf_reg': 5.727375083127486, 'subsample': 0.8, 'random_strength': 2.436754928297854e-08, 'bagging_temperature': 0.6040037607200547, 'border_count': 38, 'scale_pos_weight': 2.139111693035857}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  96%|█████████▌| 96/100 [35:34<00:53, 13.27s/it]

[I 2025-11-12 10:44:18,153] Trial 95 finished with value: 0.5749803188695556 and parameters: {'learning_rate': 0.010177227326556092, 'depth': 3, 'l2_leaf_reg': 3.23565149002755, 'subsample': 0.8, 'random_strength': 2.4033546886057406e-08, 'bagging_temperature': 0.5201496200206162, 'border_count': 62, 'scale_pos_weight': 1.6122120377949187}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  97%|█████████▋| 97/100 [35:47<00:39, 13.33s/it]

[I 2025-11-12 10:44:31,627] Trial 96 finished with value: 0.5912740338466372 and parameters: {'learning_rate': 0.011804434107478577, 'depth': 3, 'l2_leaf_reg': 6.29766075999241, 'subsample': 0.8, 'random_strength': 5.1169369299874374e-08, 'bagging_temperature': 0.6140385237462618, 'border_count': 34, 'scale_pos_weight': 2.244079509919057}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  98%|█████████▊| 98/100 [36:02<00:27, 13.87s/it]

[I 2025-11-12 10:44:46,750] Trial 97 finished with value: 0.587752278481268 and parameters: {'learning_rate': 0.011720156965360239, 'depth': 3, 'l2_leaf_reg': 6.836489660363154, 'subsample': 0.8, 'random_strength': 5.225542608283866e-08, 'bagging_temperature': 0.6565914774868797, 'border_count': 32, 'scale_pos_weight': 3.010175180611438}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151:  99%|█████████▉| 99/100 [36:16<00:13, 13.76s/it]

[I 2025-11-12 10:45:00,243] Trial 98 finished with value: 0.5502452429842166 and parameters: {'learning_rate': 0.01432146887622977, 'depth': 3, 'l2_leaf_reg': 4.901361719929764, 'subsample': 0.9, 'random_strength': 7.756657129215044e-08, 'bagging_temperature': 0.6054161131109497, 'border_count': 45, 'scale_pos_weight': 1.269444052136957}. Best is trial 77 with value: 0.5921506942035311.


Best trial: 77. Best value: 0.592151: 100%|██████████| 100/100 [36:29<00:00, 21.90s/it]

[I 2025-11-12 10:45:13,581] Trial 99 finished with value: 0.5917327188203472 and parameters: {'learning_rate': 0.012951213885885838, 'depth': 3, 'l2_leaf_reg': 3.742858121642672, 'subsample': 0.8, 'random_strength': 1.7260125946577633e-08, 'bagging_temperature': 0.5428827851299092, 'border_count': 67, 'scale_pos_weight': 2.135399749502426}. Best is trial 77 with value: 0.5921506942035311.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.5922
  Best Hyperparameters:
    learning_rate: 0.010187778992423046
    depth: 4
    l2_leaf_reg: 4.7768640731726455
    subsample: 0.8
    random_strength: 7.470999613751303e-08
    bagging_temperature: 0.5691931351789441
    border_count: 103
    scale_pos_weight: 2.3973369442371877





In [11]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.010187778992423046, 'depth': 4, 'l2_leaf_reg': 4.7768640731726455, 'subsample': 0.8, 'random_strength': 7.470999613751303e-08, 'bagging_temperature': 0.5691931351789441, 'border_count': 103, 'scale_pos_weight': 2.3973369442371877}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5830

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.90      0.80      0.84      4165
   Class 1.0       0.50      0.70      0.58      1235

    accuracy                           0.77      5400
   macro avg       0.70      0.75      0.71      5400
weighted avg       0.81      0.77      0.78      5400



In [15]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6020.cbm")

print("Done.")

Saving best_model...
Done.


In [16]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 161

Top 10 Most Important Features:
                        feature  importance
58            liab_prct_squared   14.854113
152  recovery_feasibility_score   10.996216
62                 liab_inverse    8.988131
61                liab_prct_log    6.934850
60               liab_prct_sqrt    5.346997
63         liab_inverse_squared    5.229173
51           witness_x_multicar    4.556612
46                liab_x_police    3.137194
53          multicar_x_highrisk    2.836996
59              liab_prct_cubed    2.705797

Bottom 10 Least Important Features:
                feature  importance
107   very_high_mileage         0.0
97    mid_price_vehicle         0.0
105        high_mileage         0.0
104  is_compact_vehicle         0.0
42        police_binary         0.0
102   is_medium_vehicle         0.0
101       medium_weight         0.0
100       light_vehicle         0.0
98      economy_vehicle         0.0
160     annual_income_z       

In [17]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            0
2       4655051            0
3       6728725            1
4       9848460            1


In [18]:
prediction.to_csv("results/catboost_6020_prediction.csv", index=False)