In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc3_preprocessor import Preprocessor

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Fit complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


## Vanilla CatBoost Model (Default Parameters)

In [8]:
cb_clf = cb.CatBoostClassifier(
    objective='Logloss',
    random_state=42,
    thread_count=-1
)

In [9]:
cat_feature_names = pre.cat_for_encoding_
cb_clf.fit(X_train_proc, y_train, 
           cat_features=cat_feature_names,
           verbose=False)

<catboost.core.CatBoostClassifier at 0x118200440>

In [10]:
test_probabilities = cb_clf.predict_proba(X_test_proc)[:, 1]

test_classes = cb_clf.predict(X_test_proc)

print(f"Accuracy: {accuracy_score(y_test, test_classes)}")
print(f"F1 Score: {f1_score(y_test, test_classes)}")
print(f"ROC AUC Score: {roc_auc_score(y_test, test_probabilities)}") # Use probabilities
print(f"PR AUC (Average Precision): {average_precision_score(y_test, test_probabilities)}") # Use probabilities
print(f"Precision: {precision_score(y_test, test_classes)}")
print(f"Recall: {recall_score(y_test, test_classes)}")

Accuracy: 0.8137037037037037
F1 Score: 0.5204957102001907
ROC AUC Score: 0.8388279036310881
PR AUC (Average Precision): 0.6048398830102898
Precision: 0.6326767091541136
Recall: 0.4421052631578947


## CatBoost with Optuna Tuning

In [11]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [13]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)

    pruning_cb = CatBoostPruningCallback(trial, "Logloss")
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False,
        callbacks=[pruning_cb]
    )

    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [14]:
print("\n2. Starting Optuna study...")

MAX_ITERATIONS = 1000

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.HyperbandPruner( 
        min_resource=1,
        max_resource=MAX_ITERATIONS, # Tells Hyperband the max iterations
        reduction_factor=3
    )
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-06 17:28:46,930] A new study created in memory with name: no-name-c65866c3-10e4-443c-b60a-17dfcf4a7786



2. Starting Optuna study...


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:28:53,432] Trial 0 finished with value: 0.5517786561264822 and parameters: {'learning_rate': 0.021556678012661715, 'depth': 10, 'l2_leaf_reg': 0.001072370556175943, 'subsample': 0.7, 'random_strength': 1.0655225386985244e-06, 'bagging_temperature': 0.7608744448647068, 'border_count': 86, 'scale_pos_weight': 9.30150988805618}. Best is trial 0 with value: 0.5517786561264822.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:28:55,825] Trial 1 finished with value: 0.5801413164745258 and parameters: {'learning_rate': 0.07443760277537546, 'depth': 8, 'l2_leaf_reg': 0.007852376895872948, 'subsample': 0.6, 'random_strength': 0.15172839951740802, 'bagging_temperature': 0.8976514683878873, 'border_count': 118, 'scale_pos_weight': 1.9897478412073917}. Best is trial 1 with value: 0.5801413164745258.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:28:56,506] Trial 2 finished with value: 0.5796443435489973 and parameters: {'learning_rate': 0.016374740538347133, 'depth': 4, 'l2_leaf_reg': 0.007698024670133893, 'subsample': 0.9, 'random_strength': 0.00038121167033367556, 'bagging_temperature': 0.9832964453334078, 'border_count': 84, 'scale_pos_weight': 1.7523570401787558}. Best is trial 1 with value: 0.5801413164745258.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:01,498] Trial 3 finished with value: 0.5814241486068111 and parameters: {'learning_rate': 0.04033000190691903, 'depth': 10, 'l2_leaf_reg': 0.1534504877335671, 'subsample': 0.6, 'random_strength': 9.439392983037564e-06, 'bagging_temperature': 0.9516862613318826, 'border_count': 231, 'scale_pos_weight': 3.6743436044830413}. Best is trial 3 with value: 0.5814241486068111.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:06,467] Trial 4 finished with value: 0.5841346153846154 and parameters: {'learning_rate': 0.011080173829451685, 'depth': 3, 'l2_leaf_reg': 0.1992589086234483, 'subsample': 0.9, 'random_strength': 8.612362904373863e-06, 'bagging_temperature': 0.10229486690063017, 'border_count': 42, 'scale_pos_weight': 1.564721081175226}. Best is trial 4 with value: 0.5841346153846154.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:07,380] Trial 5 finished with value: 0.5940663176265271 and parameters: {'learning_rate': 0.07418456446692281, 'depth': 6, 'l2_leaf_reg': 2.3812110915150972, 'subsample': 0.5, 'random_strength': 1.4221059080314768e-08, 'bagging_temperature': 0.005530475621733366, 'border_count': 109, 'scale_pos_weight': 2.260222289201304}. Best is trial 5 with value: 0.5940663176265271.
[I 2025-11-06 17:29:07,441] Trial 6 finished with value: 0.49913344887348354 and parameters: {'learning_rate': 0.029399569377151113, 'depth': 7, 'l2_leaf_reg': 2.2203129570230273, 'subsample': 0.5, 'random_strength': 0.3845989012932069, 'bagging_temperature': 0.7439343867383446, 'border_count': 163, 'scale_pos_weight': 9.378667806167883}. Best is trial 5 with value: 0.5940663176265271.
[I 2025-11-06 17:29:07,518] Trial 7 finished with value: 0.5349693251533743 and parameters: {'learning_rate': 0.267861756173839, 'depth': 6, 'l2_leaf_reg': 0.053942934870635915, 'subsample': 0.6, 'random_strength': 3.9

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:07,619] Trial 8 finished with value: 0.5287691187181355 and parameters: {'learning_rate': 0.13168868777965959, 'depth': 9, 'l2_leaf_reg': 2.3629393247003945, 'subsample': 1.0, 'random_strength': 1.389472808279142e-07, 'bagging_temperature': 0.8795136844811136, 'border_count': 131, 'scale_pos_weight': 6.219574835417837}. Best is trial 5 with value: 0.5940663176265271.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:13,510] Trial 9 finished with value: 0.5861434702636419 and parameters: {'learning_rate': 0.013483094577320647, 'depth': 3, 'l2_leaf_reg': 0.21327078334682573, 'subsample': 0.9, 'random_strength': 7.920757235248359e-05, 'bagging_temperature': 0.31493933773265637, 'border_count': 68, 'scale_pos_weight': 3.122530672953768}. Best is trial 5 with value: 0.5940663176265271.
[I 2025-11-06 17:29:13,583] Trial 10 finished with value: 0.5260663507109005 and parameters: {'learning_rate': 0.077271705429714, 'depth': 5, 'l2_leaf_reg': 3.847464500920684, 'subsample': 0.5, 'random_strength': 1.1667580422448806e-08, 'bagging_temperature': 0.44129716967049654, 'border_count': 190, 'scale_pos_weight': 7.2007539875867455}. Best is trial 5 with value: 0.5940663176265271.
[I 2025-11-06 17:29:13,641] Trial 11 finished with value: 0.5428498337170632 and parameters: {'learning_rate': 0.13269608691258958, 'depth': 3, 'l2_leaf_reg': 0.5235873047373298, 'subsample': 0.8, 'random_strength': 0

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:13,749] Trial 12 finished with value: 0.5444915254237288 and parameters: {'learning_rate': 0.050273102528706794, 'depth': 5, 'l2_leaf_reg': 0.7110648689413441, 'subsample': 0.8, 'random_strength': 0.004832299679322828, 'bagging_temperature': 0.000990367034609997, 'border_count': 84, 'scale_pos_weight': 3.6070308153538386}. Best is trial 5 with value: 0.5940663176265271.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
Best trial: 13. Best value: 0.597597:  16%|█▌        | 16/100 [00:35<02:28,  1.77s/it]

[I 2025-11-06 17:29:22,256] Trial 13 finished with value: 0.597596622279961 and parameters: {'learning_rate': 0.010816686294109716, 'depth': 7, 'l2_leaf_reg': 8.027338578821842, 'subsample': 1.0, 'random_strength': 1.009206852184485e-08, 'bagging_temperature': 0.2780626792049565, 'border_count': 73, 'scale_pos_weight': 2.822674437729011}. Best is trial 13 with value: 0.597596622279961.
[I 2025-11-06 17:29:22,321] Trial 14 finished with value: 0.4929508993680117 and parameters: {'learning_rate': 0.11125215296639115, 'depth': 7, 'l2_leaf_reg': 7.9424417632168405, 'subsample': 1.0, 'random_strength': 3.442291753561066e-08, 'bagging_temperature': 0.24386983025584957, 'border_count': 169, 'scale_pos_weight': 1.0400511183799992}. Best is trial 13 with value: 0.597596622279961.
[I 2025-11-06 17:29:22,452] Trial 15 finished with value: 0.5845755022683085 and parameters: {'learning_rate': 0.031960625425080706, 'depth': 6, 'l2_leaf_reg': 8.790438455059038, 'subsample': 0.7, 'random_strength': 2.

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:22,522] Trial 16 finished with value: 0.5467589034076351 and parameters: {'learning_rate': 0.20749614864182125, 'depth': 8, 'l2_leaf_reg': 0.8826006063006402, 'subsample': 1.0, 'random_strength': 1.0189669544519279e-08, 'bagging_temperature': 0.0040995047559927444, 'border_count': 67, 'scale_pos_weight': 4.815215149375518}. Best is trial 13 with value: 0.597596622279961.
[I 2025-11-06 17:29:22,596] Trial 17 finished with value: 0.54871395167576 and parameters: {'learning_rate': 0.06624093933279829, 'depth': 8, 'l2_leaf_reg': 0.04827608534780013, 'subsample': 0.7, 'random_strength': 2.3812391349784157e-07, 'bagging_temperature': 0.18124837522000872, 'border_count': 105, 'scale_pos_weight': 4.685814506538536}. Best is trial 13 with value: 0.597596622279961.
[I 2025-11-06 17:29:22,650] Trial 18 finished with value: 0.5773847125710676 and parameters: {'learning_rate': 0.021342016730814775, 'depth': 5, 'l2_leaf_reg': 1.3766042155702765, 'subsample': 0.5, 'random_strength

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:22,772] Trial 20 finished with value: 0.5168018539976825 and parameters: {'learning_rate': 0.09461956039169657, 'depth': 6, 'l2_leaf_reg': 9.698737048600659, 'subsample': 0.6, 'random_strength': 4.504073161095364e-05, 'bagging_temperature': 0.5939741851497076, 'border_count': 253, 'scale_pos_weight': 7.270728912432385}. Best is trial 13 with value: 0.597596622279961.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:29,110] Trial 21 finished with value: 0.5998694516971279 and parameters: {'learning_rate': 0.011120804939988044, 'depth': 4, 'l2_leaf_reg': 0.3203896185865553, 'subsample': 0.9, 'random_strength': 0.03443317052185157, 'bagging_temperature': 0.3175949378538905, 'border_count': 59, 'scale_pos_weight': 2.6612180500653415}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:29,322] Trial 22 finished with value: 0.578441127694859 and parameters: {'learning_rate': 0.0117375570003924, 'depth': 4, 'l2_leaf_reg': 0.4027550103161496, 'subsample': 0.9, 'random_strength': 0.022428361157450963, 'bagging_temperature': 0.34227051319532426, 'border_count': 63, 'scale_pos_weight': 2.3616259059241744}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:29,400] Trial 23 finished with value: 0.5738287560581583 and parameters: {'learning_rate': 0.017977989790637684, 'depth': 4, 'l2_leaf_reg': 1.4893390631030199, 'subsample': 1.0, 'random_strength': 0.04621464904928708, 'bagging_temperature': 0.158890418253821, 'border_count': 53, 'scale_pos_weight': 3.219963524456251}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:29,534] Trial 24 finished with value: 0.5119638826185101 and parameters: {'learning_rate': 0.01052029685519338, 'depth': 5, 'l2_leaf_reg': 4.037774920049204, 'subsample': 0.9, 'random_strength': 0.000851288852579773, 'bagging_temperature': 0.21805173981879808, 'border_count': 90, 'scale_pos_weight': 1.106645791670488}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:29,592] Trial 25 finished with value: 0.5524475524475524 and parameters: {'learning_rate': 0.026851615014818128, 'depth': 6, 'l2_leaf_reg': 0.345257506622309, 'subsample': 1.0, 'random_strength': 0.008638175511299017, 'bagging_temperature': 0.07473779790694447, 'border_count': 103, 'scale_pos_weight': 4.177301037294073}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:29,666] Trial 26 finished with value: 0.5728092783505154 and parameters: {'learning_rate': 0.01500597875242839, 'depth': 9, 'l2_leaf_reg': 0.05477469629067444, 'subsample': 0.8, 'random_strength': 0

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:29,790] Trial 28 finished with value: 0.5724748245346354 and parameters: {'learning_rate': 0.1718012026015212, 'depth': 4, 'l2_leaf_reg': 0.014069922271165644, 'subsample': 1.0, 'random_strength': 7.858542669798608e-07, 'bagging_temperature': 0.4056292525164976, 'border_count': 126, 'scale_pos_weight': 2.974665211236297}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:29,882] Trial 29 finished with value: 0.5113969758519522 and parameters: {'learning_rate': 0.02192341902522363, 'depth': 9, 'l2_leaf_reg': 4.897789060512646, 'subsample': 0.7, 'random_strength': 0.0003152570660241448, 'bagging_temperature': 0.6607055273357099, 'border_count': 93, 'scale_pos_weight': 8.308034006529953}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:29,952] Trial 30 finished with value: 0.5792431622330461 and parameters: {'learning_rate': 0.06000950761404346, 'depth': 8, 'l2_leaf_reg': 2.455684242486362, 'subsample': 0.8, 'random_strength': 2.

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:34,464] Trial 31 finished with value: 0.5850965961361545 and parameters: {'learning_rate': 0.013713891564518943, 'depth': 3, 'l2_leaf_reg': 0.0010544343426481318, 'subsample': 0.9, 'random_strength': 4.406512697122077e-05, 'bagging_temperature': 0.3053843809884589, 'border_count': 74, 'scale_pos_weight': 3.1290806979963746}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:34,531] Trial 32 finished with value: 0.568427961929767 and parameters: {'learning_rate': 0.013060698412110763, 'depth': 3, 'l2_leaf_reg': 0.10105761679991525, 'subsample': 0.9, 'random_strength': 5.7913264998960504e-08, 'bagging_temperature': 0.3393401564683779, 'border_count': 52, 'scale_pos_weight': 2.09134235641545}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:34,603] Trial 33 finished with value: 0.5458885941644562 and parameters: {'learning_rate': 0.018571884158624912, 'depth': 4, 'l2_leaf_reg': 0.02103198839402745, 'subsample': 0.9, 'random_stren

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:34,668] Trial 34 finished with value: 0.5344628417404698 and parameters: {'learning_rate': 0.010506487220436915, 'depth': 3, 'l2_leaf_reg': 0.209188647499193, 'subsample': 1.0, 'random_strength': 9.578803401417708e-05, 'bagging_temperature': 0.38555739287806634, 'border_count': 119, 'scale_pos_weight': 1.515316828485808}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:34,737] Trial 35 finished with value: 0.5706070287539936 and parameters: {'learning_rate': 0.01546457074256123, 'depth': 4, 'l2_leaf_reg': 0.0019914488382010184, 'subsample': 0.8, 'random_strength': 1.4116282789531148e-05, 'bagging_temperature': 0.07604286733765965, 'border_count': 72, 'scale_pos_weight': 3.2251302389299052}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:34,848] Trial 36 finished with value: 0.5777357300536109 and parameters: {'learning_rate': 0.02409306361350177, 'depth': 5, 'l2_leaf_reg': 0.27623508083619625, 'subsample': 0.9, 'random_stre

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:35,160] Trial 37 finished with value: 0.5456431535269709 and parameters: {'learning_rate': 0.010036932271580715, 'depth': 6, 'l2_leaf_reg': 0.1063318774436825, 'subsample': 0.6, 'random_strength': 1.5317430380526051e-06, 'bagging_temperature': 0.12089861727667059, 'border_count': 66, 'scale_pos_weight': 4.296885129079751}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:35,234] Trial 38 finished with value: 0.5330748727889508 and parameters: {'learning_rate': 0.035103018750510424, 'depth': 7, 'l2_leaf_reg': 0.8552641746456998, 'subsample': 1.0, 'random_strength': 0.20535883142908168, 'bagging_temperature': 0.5431358847732146, 'border_count': 146, 'scale_pos_weight': 5.200296087713463}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:35,287] Trial 39 finished with value: 0.5399300887335305 and parameters: {'learning_rate': 0.08220964394794147, 'depth': 3, 'l2_leaf_reg': 0.16137709350050206, 'subsample': 0.9, 'random_strength'

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:35,704] Trial 40 finished with value: 0.5857558139534884 and parameters: {'learning_rate': 0.012571082292009591, 'depth': 10, 'l2_leaf_reg': 5.8981257420993884, 'subsample': 1.0, 'random_strength': 9.533242821885754e-08, 'bagging_temperature': 0.8135284861786554, 'border_count': 83, 'scale_pos_weight': 1.9780656902497382}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:35,835] Trial 41 finished with value: 0.5786802030456852 and parameters: {'learning_rate': 0.013123919633571656, 'depth': 10, 'l2_leaf_reg': 6.052212497756254, 'subsample': 1.0, 'random_strength': 3.4458027785278186e-08, 'bagging_temperature': 0.7493261694385902, 'border_count': 85, 'scale_pos_weight': 1.9884256881610791}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:35,956] Trial 42 finished with value: 0.5646010530579182 and parameters: {'learning_rate': 0.016134839236943053, 'depth': 10, 'l2_leaf_reg': 3.351222858019693, 'subsample': 1.0, 'random_strength': 9.940150530696614e-08, 'bagging_temperature': 0.8668067209512661, 'border_count': 59, 'scale_pos_weight': 1.4788254062678807}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:36,053] Trial 43 finished with value: 0.580829756795422 and parameters: {'learning_rate': 0.012219188722067205, 'depth': 9, 'l2_leaf_reg': 2.451685091629685, 'subsample': 0.5, 'random_strength': 3.001855796807203e-08, 'bagging_temperature': 0.7981260453005621, 'border_count': 80, 'scale_pos_weight': 1.9889954233711213}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:36,669] Trial 44 finished with value: 0.5856524427952999 and parameters: {'learning_rate': 0.019061096199197186, 'depth': 5, 'l2_leaf_reg': 0.5939265453853655, 'subsample': 1.0, 'random_strength': 0.0016543295200129088, 'bagging_temperature': 0.9709028584855058, 'border_count': 98, 'scale_pos_weight': 2.8359241219298204}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:36,720] Trial 45 finished with value: 0.4281150159744409 and parameters: {'learning_rate': 0.014659789823240107, 'depth': 3, 'l2_leaf_reg': 5.9016874619864526, 'subsample': 0.9, 'random_strength': 2.7925017400007466e-07, 'bagging_temperature': 0.41587462587199586, 'border_count': 43, 'scale_pos_weight': 1.2954520057268977}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:36,839] Trial 46 finished with value: 0.5835951134380454 and parameters: {'learning_rate': 0.011527507432155077, 'depth': 8, 'l2_leaf_reg': 1.2067863561500358, 'subsample': 0.9, 'random_streng

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:36,943] Trial 47 finished with value: 0.5494862087614927 and parameters: {'learning_rate': 0.04488411180689702, 'depth': 4, 'l2_leaf_reg': 2.0500420614316623, 'subsample': 1.0, 'random_strength': 1.0506612697456894e-08, 'bagging_temperature': 0.040955759011908205, 'border_count': 111, 'scale_pos_weight': 3.5484266922236696}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:37,080] Trial 48 finished with value: 0.5793727772389267 and parameters: {'learning_rate': 0.10808412317737091, 'depth': 10, 'l2_leaf_reg': 6.690121506187122, 'subsample': 0.8, 'random_strength': 5.8044795357095596e-06, 'bagging_temperature': 0.6899276232531668, 'border_count': 70, 'scale_pos_weight': 2.7211119505409513}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:40,680] Trial 49 finished with value: 0.5880515423662632 and parameters: {'learning_rate': 0.025441715304752647, 'depth': 7, 'l2_leaf_reg': 2.979714802269998, 'subsample': 0.6, 'random_strength': 8.823132170166144e-08, 'bagging_temperature': 0.8207459142734752, 'border_count': 33, 'scale_pos_weight': 1.7001973593389716}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:40,745] Trial 50 finished with value: 0.5345037795659595 and parameters: {'learning_rate': 0.028304587119252822, 'depth': 7, 'l2_leaf_reg': 0.4833551157733217, 'subsample': 0.5, 'random_strength': 3.578348454838189e-08, 'bagging_temperature': 0.49114574532211563, 'border_count': 35, 'scale_pos_weight': 6.324521879419285}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:48,671] Trial 51 finished with value: 0.5891829689298044 and parameters: {'learning_rate': 0.01757304304673686, 'depth': 7, 'l2_leaf_reg': 9.96243433462408, 'subsample': 0.6, 'random_strength': 1.2293244900880628e-07, 'bagging_temperature': 0.8354305583091868, 'border_count': 46, 'scale_pos_weight': 1.8158906756065112}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:48,754] Trial 52 finished with value: 0.5780164245104232 and parameters: {'learning_rate': 0.022364418471987613, 'depth': 6, 'l2_leaf_reg': 9.953685039147542, 'subsample': 0.6, 'random_strength': 1.741711233226917e-07, 'bagging_temperature': 0.9109588108791957, 'border_count': 42, 'scale_pos_weight': 2.395405268652999}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:48,831] Trial 53 finished with value: 0.5839080459770115 and parameters: {'learning_rate': 0.018269312000054553, 'depth': 8, 'l2_leaf_reg': 2.812902541868396, 'subsample': 0.6, 'random_strength': 5.

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
Best trial: 21. Best value: 0.599869:  56%|█████▌    | 56/100 [01:02<00:42,  1.02it/s]

[I 2025-11-06 17:29:48,906] Trial 54 finished with value: 0.5480274442538593 and parameters: {'learning_rate': 0.05441756184981882, 'depth': 7, 'l2_leaf_reg': 1.7396300965585783, 'subsample': 0.5, 'random_strength': 3.9471474656705603e-07, 'bagging_temperature': 0.7838347704653279, 'border_count': 61, 'scale_pos_weight': 1.3008735452456666}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:49,039] Trial 55 finished with value: 0.5613636363636364 and parameters: {'learning_rate': 0.01618955162985423, 'depth': 7, 'l2_leaf_reg': 1.0355202144481614, 'subsample': 0.7, 'random_strength': 1.8059497486298258e-05, 'bagging_temperature': 0.7197089760208022, 'border_count': 33, 'scale_pos_weight': 3.314243846298264}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:49,104] Trial 56 finished with value: 0.49786628733997157 and parameters: {'learning_rate': 0.01997665838579059, 'depth': 6, 'l2_leaf_reg': 3.849271314322587, 'subsample': 0.6, 'random_strength':

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:49,201] Trial 57 finished with value: 0.5823510672188595 and parameters: {'learning_rate': 0.07324252088201881, 'depth': 7, 'l2_leaf_reg': 0.03871296418282296, 'subsample': 0.5, 'random_strength': 1.944737066512482e-08, 'bagging_temperature': 0.9121350107121471, 'border_count': 67, 'scale_pos_weight': 2.5883239030214096}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:49,296] Trial 58 finished with value: 0.5539803707742639 and parameters: {'learning_rate': 0.02446612222287758, 'depth': 6, 'l2_leaf_reg': 0.07300787089259439, 'subsample': 0.6, 'random_strength': 0.00020020124477451945, 'bagging_temperature': 0.2785582246865844, 'border_count': 165, 'scale_pos_weight': 3.8449859009293794}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:49,374] Trial 59 finished with value: 0.5119208277103013 and parameters: {'learning_rate': 0.03228428947205137, 'depth': 8, 'l2_leaf_reg': 0.24440387518739265, 'subsample': 0.7, 'random_streng

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:49,538] Trial 60 finished with value: 0.5831949518432414 and parameters: {'learning_rate': 0.014210613600117876, 'depth': 7, 'l2_leaf_reg': 3.135736044474442, 'subsample': 0.5, 'random_strength': 0.014502286601032291, 'bagging_temperature': 0.20242767910584292, 'border_count': 39, 'scale_pos_weight': 2.2348743990395903}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:57,078] Trial 61 finished with value: 0.58675799086758 and parameters: {'learning_rate': 0.012957353344694103, 'depth': 5, 'l2_leaf_reg': 7.777556734677626, 'subsample': 0.6, 'random_strength': 6.160387158092755e-08, 'bagging_temperature': 0.8466668279803737, 'border_count': 85, 'scale_pos_weight': 1.793480709030772}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:29:57,734] Trial 62 finished with value: 0.5759517527327553 and parameters: {'learning_rate': 0.01131350716513878, 'depth': 5, 'l2_leaf_reg': 7.6302828695379405, 'subsample': 0.6, 'random_strength': 1.3823005584595092e-06, 'bagging_temperature': 0.8555858959339591, 'border_count': 87, 'scale_pos_weight': 1.7280086683937779}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:29:57,794] Trial 63 finished with value: 0.5765765765765766 and parameters: {'learning_rate': 0.017290042838706478, 'depth': 5, 'l2_leaf_reg': 4.757819805535341, 'subsample': 0.6, 'random_strength': 2.2216458258518544e-08, 'bagging_temperature': 0.6168656306986069, 'border_count': 77, 'scale_pos_weight': 2.888396168304235}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:04,076] Trial 64 finished with value: 0.5863874345549738 and parameters: {'learning_rate': 0.011023148295118924, 'depth': 4, 'l2_leaf_reg': 7.954469650326033, 'subsample': 0.7, 'random_strength': 1.0158702381099384e-08, 'bagging_temperature': 0.938069576085129, 'border_count': 102, 'scale_pos_weight': 1.876665861680328}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:04,581] Trial 65 finished with value: 0.5804347826086956 and parameters: {'learning_rate': 0.010100584728593933, 'depth': 4, 'l2_leaf_reg': 8.185424996682926, 'subsample': 0.7, 'random_strength': 1.0187516769255162e-08, 'bagging_temperature': 0.9924407696740963, 'border_count': 98, 'scale_pos_weight': 1.8253784022213462}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:04,705] Trial 66 finished with value: 0.5292841648590022 and parameters: {'learning_rate': 0.011278320902833618, 'depth': 5, 'l2_leaf_reg': 4.411284765735531, 'subsample': 0.6, 'random_strength': 5.107174975712663e-08, 'bagging_temperature': 0.9357800318043956, 'border_count': 108, 'scale_pos_weight': 1.357396190421548}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:04,770] Trial 67 finished with value: 0.499027027027027 and parameters: {'learning_rate': 0.012580713496013078, 'depth': 6, 'l2_leaf_reg': 1.8023718403195617, 'subsample': 0.7, 'random_strength':

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:04,976] Trial 68 finished with value: 0.5816628327308577 and parameters: {'learning_rate': 0.01405621464517651, 'depth': 4, 'l2_leaf_reg': 5.776799976337499, 'subsample': 0.6, 'random_strength': 1.6066852523422693e-08, 'bagging_temperature': 0.8827013312107412, 'border_count': 190, 'scale_pos_weight': 2.3121433475250988}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:05,040] Trial 69 finished with value: 0.5546813532651456 and parameters: {'learning_rate': 0.016939478033902895, 'depth': 6, 'l2_leaf_reg': 9.129828882291234, 'subsample': 0.5, 'random_strength': 5.025582191105064e-07, 'bagging_temperature': 0.9430616204281985, 'border_count': 90, 'scale_pos_weight': 1.7848760124727159}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:05,104] Trial 70 finished with value: 0.5773847125710676 and parameters: {'learning_rate': 0.0630581851526336, 'depth': 5, 'l2_leaf_reg': 3.363162920855909, 'subsample': 0.7, 'random_strength': 4

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:05,622] Trial 71 finished with value: 0.5726290516206483 and parameters: {'learning_rate': 0.011851857294061434, 'depth': 4, 'l2_leaf_reg': 4.624105478220135, 'subsample': 0.8, 'random_strength': 0.0030093479025506066, 'bagging_temperature': 0.5399692052319445, 'border_count': 73, 'scale_pos_weight': 2.9941989185313287}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:05,684] Trial 72 finished with value: 0.5686145764937623 and parameters: {'learning_rate': 0.015327583399897565, 'depth': 3, 'l2_leaf_reg': 0.12432519009123048, 'subsample': 0.8, 'random_strength': 1.3504061342512065e-07, 'bagging_temperature': 0.8948351476925391, 'border_count': 51, 'scale_pos_weight': 2.1186435315209122}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:05,752] Trial 73 finished with value: 0.48721804511278194 and parameters: {'learning_rate': 0.013550483625899013, 'depth': 7, 'l2_leaf_reg': 0.3317152732825195, 'subsample': 0.6, 'random_streng

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:05,832] Trial 74 finished with value: 0.5744888023369036 and parameters: {'learning_rate': 0.0897351952325053, 'depth': 4, 'l2_leaf_reg': 7.093056823367781, 'subsample': 0.7, 'random_strength': 3.026512383514664e-07, 'bagging_temperature': 0.7894486930325713, 'border_count': 65, 'scale_pos_weight': 2.378010868714332}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:09,302] Trial 75 finished with value: 0.5766509433962265 and parameters: {'learning_rate': 0.010926615551276234, 'depth': 3, 'l2_leaf_reg': 0.7014320563653731, 'subsample': 0.5, 'random_strength': 0.6844391302899339, 'bagging_temperature': 0.9635111675961782, 'border_count': 122, 'scale_pos_weight': 3.3459663770664974}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:09,379] Trial 76 finished with value: 0.5539358600583091 and parameters: {'learning_rate': 0.020898242322717333, 'depth': 7, 'l2_leaf_reg': 2.6516907524916977, 'subsample': 0.6, 'random_strength': 0.04466434929663121, 'bagging_temperature': 0.3420848255038961, 'border_count': 82, 'scale_pos_weight': 1.4185607602843566}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:09,509] Trial 77 finished with value: 0.5559131134352373 and parameters: {'learning_rate': 0.012623376919965379, 'depth': 5, 'l2_leaf_reg': 5.463597336844925, 'subsample': 0.7, 'random_strength': 8.516063337329652e-08, 'bagging_temperature': 0.4249506116160495, 'border_count': 50, 'scale_pos_weight': 1.7115115359510598}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:09,603] Trial 78 finished with value: 0.5727163461538461 and parameters: {'learning_rate': 0.014744066672690202, 'depth': 7, 'l2_leaf_reg': 7.341903874220319, 'subsample': 0.6, 'random_strength': 3.0292964367867417e-06, 'bagging_temperature': 0.24823031910657165, 'border_count': 93, 'scale_pos_weight': 3.0049918428109637}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:16,266] Trial 79 finished with value: 0.597843841881738 and parameters: {'learning_rate': 0.010161902858023528, 'depth': 6, 'l2_leaf_reg': 2.13552173465026, 'subsample': 0.9, 'random_strength': 1.4037640271433752e-08, 'bagging_temperature': 0.28680792530377835, 'border_count': 74, 'scale_pos_weight': 2.6488721311950263}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:16,358] Trial 80 finished with value: 0.5717360114777619 and parameters: {'learning_rate': 0.0103254214682503, 'depth': 6, 'l2_leaf_reg': 9.978339687545335, 'subsample': 0.8, 'random_strength': 1.500273922168329e-08, 'bagging_temperature': 0.2862392707583453, 'border_count': 114, 'scale_pos_weight': 1.883508152971554}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:16,665] Trial 81 finished with value: 0.5806658130601793 and parameters: {'learning_rate': 0.01176565218152377, 'depth': 6, 'l2_leaf_reg': 0.16140807567497853, 'subsample': 0.9, 'random_strength': 3.721077337572981e-08, 'bagging_temperature': 0.3793339987282367, 'border_count': 76, 'scale_pos_weight': 2.745836633073406}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:16,793] Trial 82 finished with value: 0.5780310224754669 and parameters: {'learning_rate': 0.010034072664533374, 'depth': 5, 'l2_leaf_reg': 0.0023841025562885004, 'subsample': 0.9, 'random_strength': 2.6683611165187096e-08, 'bagging_temperature': 0.30656307575286557, 'border_count': 63, 'scale_pos_weight': 2.2410801147878026}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
Best trial: 21. Best value: 0.599869:  86%|████████▌ | 86/100 [01:30<00:07,  1.95it/s]

[I 2025-11-06 17:30:16,963] Trial 83 finished with value: 0.5773847125710676 and parameters: {'learning_rate': 0.013154245881450458, 'depth': 6, 'l2_leaf_reg': 2.030822081030725, 'subsample': 0.9, 'random_strength': 1.009240056159548e-08, 'bagging_temperature': 0.7646358211557988, 'border_count': 70, 'scale_pos_weight': 2.550601309761197}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:17,040] Trial 84 finished with value: 0.5520147484856466 and parameters: {'learning_rate': 0.07041181483883581, 'depth': 7, 'l2_leaf_reg': 3.5554524775652245, 'subsample': 0.8, 'random_strength': 7.564605518137142e-08, 'bagging_temperature': 0.23244266978117326, 'border_count': 38, 'scale_pos_weight': 4.2914369758000195}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:17,157] Trial 85 finished with value: 0.5710804224207962 and parameters: {'learning_rate': 0.010916596910016788, 'depth': 8, 'l2_leaf_reg': 1.0843786451867514, 'subsample': 0.9, 'random_strength':

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:17,228] Trial 86 finished with value: 0.4281150159744409 and parameters: {'learning_rate': 0.015884632905981324, 'depth': 4, 'l2_leaf_reg': 1.4546515762013508, 'subsample': 0.9, 'random_strength': 0.0010422637289766693, 'bagging_temperature': 0.09800491073599579, 'border_count': 229, 'scale_pos_weight': 1.1876929652393204}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:17,304] Trial 87 finished with value: 0.5694486291051521 and parameters: {'learning_rate': 0.1408976814460989, 'depth': 6, 'l2_leaf_reg': 0.8411339666055628, 'subsample': 0.5, 'random_strength': 5.225823597171409e-08, 'bagging_temperature': 0.17407330779127542, 'border_count': 87, 'scale_pos_weight': 3.1038401094348136}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:17,368] Trial 88 finished with value: 0.5680551000327977 and parameters: {'learning_rate': 0.27243344818746446, 'depth': 3, 'l2_leaf_reg': 0.4406186886580358, 'subsample': 1.0, 'random_strength

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:17,453] Trial 89 finished with value: 0.5743243243243243 and parameters: {'learning_rate': 0.011962501519106685, 'depth': 4, 'l2_leaf_reg': 5.023312927183903, 'subsample': 0.7, 'random_strength': 1.5340473495705443e-07, 'bagging_temperature': 0.333986316026284, 'border_count': 102, 'scale_pos_weight': 1.9648288582891624}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:17,561] Trial 90 finished with value: 0.5574043261231281 and parameters: {'learning_rate': 0.014047866321315817, 'depth': 7, 'l2_leaf_reg': 0.07992165487503297, 'subsample': 0.6, 'random_strength': 3.1128697113378384e-08, 'bagging_temperature': 0.5108856007392056, 'border_count': 81, 'scale_pos_weight': 3.426170014741329}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:17,930] Trial 91 finished with value: 0.5854770433083686 and parameters: {'learning_rate': 0.013203892021909708, 'depth': 9, 'l2_leaf_reg': 6.761430889841764, 'subsample': 1.0, 'random_strength': 1.9413064947135894e-07, 'bagging_temperature': 0.8349326262157507, 'border_count': 72, 'scale_pos_weight': 2.4833574929738127}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:19,616] Trial 92 finished with value: 0.5788643533123028 and parameters: {'learning_rate': 0.012196577558466398, 'depth': 8, 'l2_leaf_reg': 4.148164832390094, 'subsample': 1.0, 'random_strength': 1.0373769539548002e-07, 'bagging_temperature': 0.6544223709460335, 'border_count': 94, 'scale_pos_weight': 1.6252304109463602}. Best is trial 21 with value: 0.5998694516971279.


  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")


[I 2025-11-06 17:30:19,878] Trial 93 finished with value: 0.5860400829302004 and parameters: {'learning_rate': 0.0110949072928277, 'depth': 5, 'l2_leaf_reg': 2.7487486120791496, 'subsample': 1.0, 'random_strength': 0.00011926043713787389, 'bagging_temperature': 0.8168597165371808, 'border_count': 84, 'scale_pos_weight': 2.005739706792013}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:19,991] Trial 94 finished with value: 0.5295415543025251 and parameters: {'learning_rate': 0.011340214720315225, 'depth': 5, 'l2_leaf_reg': 2.4840234632834535, 'subsample': 1.0, 'random_strength': 0.00020741912329580664, 'bagging_temperature': 0.8546269498701865, 'border_count': 62, 'scale_pos_weight': 6.9204759141688434}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:20,053] Trial 95 finished with value: 0.5780310224754669 and parameters: {'learning_rate': 0.01726536339925138, 'depth': 6, 'l2_leaf_reg': 1.6288890678268932, 'subsample': 1.0, 'random_strength':

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
Best trial: 21. Best value: 0.599869:  98%|█████████▊| 98/100 [01:33<00:00,  4.41it/s]

[I 2025-11-06 17:30:20,131] Trial 96 finished with value: 0.5781151170145478 and parameters: {'learning_rate': 0.010774718020328415, 'depth': 5, 'l2_leaf_reg': 3.0571969737001456, 'subsample': 0.9, 'random_strength': 1.586305013060817e-08, 'bagging_temperature': 0.8133392853765729, 'border_count': 69, 'scale_pos_weight': 2.1438639622019937}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:20,238] Trial 97 finished with value: 0.5354969574036511 and parameters: {'learning_rate': 0.01526360582384809, 'depth': 4, 'l2_leaf_reg': 2.1389198504100584, 'subsample': 1.0, 'random_strength': 0.07300731671493839, 'bagging_temperature': 0.19691737670554688, 'border_count': 89, 'scale_pos_weight': 4.021978861822159}. Best is trial 21 with value: 0.5998694516971279.
[I 2025-11-06 17:30:20,329] Trial 98 finished with value: 0.5627270187203129 and parameters: {'learning_rate': 0.05726350650431647, 'depth': 7, 'l2_leaf_reg': 8.072185739993252, 'subsample': 0.5, 'random_strength': 0.

  pruning_cb = CatBoostPruningCallback(trial, "Logloss")
Best trial: 21. Best value: 0.599869: 100%|██████████| 100/100 [01:33<00:00,  1.07it/s]

[I 2025-11-06 17:30:20,405] Trial 99 finished with value: 0.5743685687558466 and parameters: {'learning_rate': 0.01882330332910022, 'depth': 6, 'l2_leaf_reg': 5.398984888063465, 'subsample': 0.9, 'random_strength': 6.8249138560461234e-06, 'bagging_temperature': 0.8698616605332148, 'border_count': 42, 'scale_pos_weight': 2.8592269437310094}. Best is trial 21 with value: 0.5998694516971279.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.5999
  Best Hyperparameters:
    learning_rate: 0.011120804939988044
    depth: 4
    l2_leaf_reg: 0.3203896185865553
    subsample: 0.9
    random_strength: 0.03443317052185157
    bagging_temperature: 0.3175949378538905
    border_count: 59
    scale_pos_weight: 2.6612180500653415





In [19]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 1000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 50 # Keep early stopping
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.011120804939988044, 'depth': 4, 'l2_leaf_reg': 0.3203896185865553, 'subsample': 0.9, 'random_strength': 0.03443317052185157, 'bagging_temperature': 0.3175949378538905, 'border_count': 59, 'scale_pos_weight': 2.6612180500653415}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5999

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.91      0.78      0.84      4165
   Class 1.0       0.50      0.74      0.60      1235

    accuracy                           0.77      5400
   macro avg       0.71      0.76      0.72      5400
weighted avg       0.82      0.77      0.79      5400



In [20]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [17]:
# prediction.to_csv("results/catboost_hyperband_prediction.csv", index=False)

In [21]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 136

Top 10 Most Important Features:
                        feature  importance
44              liab_x_multicar   12.575262
135  recovery_feasibility_score    7.330751
55                 liab_inverse    5.605071
45         liab_x_highrisk_site    5.448074
13                    liab_prct    4.695390
54                liab_prct_log    4.604083
52              liab_prct_cubed    4.423122
58     is_multi_vehicle_unclear    4.225854
51            liab_prct_squared    3.843335
56         liab_inverse_squared    3.649937

Bottom 10 Least Important Features:
                   feature  importance
91           light_vehicle         0.0
93      is_compact_vehicle         0.0
94            high_mileage         0.0
97        frequent_claimer         0.0
99      first_time_claimer         0.0
100  very_frequent_claimer         0.0
101           large_payout         0.0
102          medium_payout         0.0
103           small_payout         0.0
