In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc3_preprocessor import Preprocessor

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Fit complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


## Vanilla CatBoost Model (Default Parameters)

In [8]:
cb_clf = cb.CatBoostClassifier(
    objective='Logloss',
    random_state=42,
    thread_count=-1
)

In [9]:
cat_feature_names = pre.cat_for_encoding_
cb_clf.fit(X_train_proc, y_train, 
           cat_features=cat_feature_names,
           verbose=False)

<catboost.core.CatBoostClassifier at 0x11a8c4440>

In [10]:
test_probabilities = cb_clf.predict_proba(X_test_proc)[:, 1]

test_classes = cb_clf.predict(X_test_proc)

print(f"Accuracy: {accuracy_score(y_test, test_classes)}")
print(f"F1 Score: {f1_score(y_test, test_classes)}")
print(f"ROC AUC Score: {roc_auc_score(y_test, test_probabilities)}") # Use probabilities
print(f"PR AUC (Average Precision): {average_precision_score(y_test, test_probabilities)}") # Use probabilities
print(f"Precision: {precision_score(y_test, test_classes)}")
print(f"Recall: {recall_score(y_test, test_classes)}")

Accuracy: 0.8137037037037037
F1 Score: 0.5204957102001907
ROC AUC Score: 0.8388279036310881
PR AUC (Average Precision): 0.6048398830102898
Precision: 0.6326767091541136
Recall: 0.4421052631578947


## CatBoost with Optuna Tuning

In [11]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [13]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [14]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-06 16:57:50,738] A new study created in memory with name: no-name-aec1e816-dc13-4e8b-90d5-6b1ad4bd04a9



2. Starting Optuna study...


Best trial: 0. Best value: 0.520815:   1%|          | 1/100 [00:04<06:53,  4.17s/it]

[I 2025-11-06 16:57:54,920] Trial 0 finished with value: 0.520814880425155 and parameters: {'learning_rate': 0.25190835595374395, 'depth': 10, 'l2_leaf_reg': 0.0034793470599189568, 'subsample': 0.9, 'random_strength': 0.00031279008166931657, 'bagging_temperature': 0.8875151796622847, 'border_count': 255, 'scale_pos_weight': 1.2060568759412733}. Best is trial 0 with value: 0.520814880425155.


Best trial: 1. Best value: 0.594839:   2%|▏         | 2/100 [00:09<07:54,  4.84s/it]

[I 2025-11-06 16:58:00,223] Trial 1 finished with value: 0.5948387096774194 and parameters: {'learning_rate': 0.019148947087785292, 'depth': 6, 'l2_leaf_reg': 3.2224252746861013, 'subsample': 0.8, 'random_strength': 3.2450254356187198e-06, 'bagging_temperature': 0.020305884209805614, 'border_count': 207, 'scale_pos_weight': 2.891243147531931}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   3%|▎         | 3/100 [00:13<07:07,  4.41s/it]

[I 2025-11-06 16:58:04,117] Trial 2 finished with value: 0.5704672897196261 and parameters: {'learning_rate': 0.07764711628331457, 'depth': 10, 'l2_leaf_reg': 0.01763724581209577, 'subsample': 0.5, 'random_strength': 0.0003355453741508129, 'bagging_temperature': 0.293190677685641, 'border_count': 41, 'scale_pos_weight': 2.135381930933187}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   4%|▍         | 4/100 [00:24<11:01,  6.89s/it]

[I 2025-11-06 16:58:14,819] Trial 3 finished with value: 0.5628710199676201 and parameters: {'learning_rate': 0.011707954919585865, 'depth': 10, 'l2_leaf_reg': 0.7855674323889377, 'subsample': 0.8, 'random_strength': 3.812312214320188e-07, 'bagging_temperature': 0.2729117180830871, 'border_count': 222, 'scale_pos_weight': 6.024900386615226}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   5%|▌         | 5/100 [00:28<09:44,  6.15s/it]

[I 2025-11-06 16:58:19,667] Trial 4 finished with value: 0.5423983943803311 and parameters: {'learning_rate': 0.01797184751233291, 'depth': 8, 'l2_leaf_reg': 1.0496238704151708, 'subsample': 0.7, 'random_strength': 0.18531669137751464, 'bagging_temperature': 0.9136180179456515, 'border_count': 73, 'scale_pos_weight': 6.384302660324709}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   6%|▌         | 6/100 [00:36<10:08,  6.47s/it]

[I 2025-11-06 16:58:26,754] Trial 5 finished with value: 0.5463838487094301 and parameters: {'learning_rate': 0.011829538288069331, 'depth': 9, 'l2_leaf_reg': 0.008438675163733946, 'subsample': 0.9, 'random_strength': 0.0006818183620584402, 'bagging_temperature': 0.9358631774211223, 'border_count': 234, 'scale_pos_weight': 8.0381085744552}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   7%|▋         | 7/100 [00:38<07:49,  5.05s/it]

[I 2025-11-06 16:58:28,871] Trial 6 finished with value: 0.5241670577193805 and parameters: {'learning_rate': 0.037641050451517985, 'depth': 7, 'l2_leaf_reg': 0.1639301573381784, 'subsample': 0.5, 'random_strength': 0.002045649112333333, 'bagging_temperature': 0.6675220994521268, 'border_count': 206, 'scale_pos_weight': 7.951192504892755}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   8%|▊         | 8/100 [00:38<05:37,  3.67s/it]

[I 2025-11-06 16:58:29,599] Trial 7 finished with value: 0.5905310300703774 and parameters: {'learning_rate': 0.2084281603666071, 'depth': 4, 'l2_leaf_reg': 0.007897918456968125, 'subsample': 0.8, 'random_strength': 3.271694818228003e-07, 'bagging_temperature': 0.7339958213755656, 'border_count': 176, 'scale_pos_weight': 2.713483358186511}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:   9%|▉         | 9/100 [00:39<04:13,  2.79s/it]

[I 2025-11-06 16:58:30,438] Trial 8 finished with value: 0.5311242603550296 and parameters: {'learning_rate': 0.14747720386790833, 'depth': 4, 'l2_leaf_reg': 0.12121961218898068, 'subsample': 0.5, 'random_strength': 0.9739384443802036, 'bagging_temperature': 0.6755478238200716, 'border_count': 227, 'scale_pos_weight': 7.172090812377809}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  10%|█         | 10/100 [00:42<04:20,  2.90s/it]

[I 2025-11-06 16:58:33,581] Trial 9 finished with value: 0.5526384195476995 and parameters: {'learning_rate': 0.14765226363832462, 'depth': 9, 'l2_leaf_reg': 1.9108395268998204, 'subsample': 0.8, 'random_strength': 1.1083985289690445e-05, 'bagging_temperature': 0.6839435293302565, 'border_count': 92, 'scale_pos_weight': 5.593604350727223}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  11%|█         | 11/100 [00:46<04:29,  3.03s/it]

[I 2025-11-06 16:58:36,916] Trial 10 finished with value: 0.5725235849056604 and parameters: {'learning_rate': 0.03065795283997719, 'depth': 6, 'l2_leaf_reg': 8.091373548784244, 'subsample': 1.0, 'random_strength': 1.7995851223104994e-08, 'bagging_temperature': 0.029092272264026598, 'border_count': 140, 'scale_pos_weight': 3.5836033251196184}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  12%|█▏        | 12/100 [00:47<03:53,  2.66s/it]

[I 2025-11-06 16:58:38,718] Trial 11 finished with value: 0.5791642142436727 and parameters: {'learning_rate': 0.07686416580602864, 'depth': 3, 'l2_leaf_reg': 0.0011937752124575988, 'subsample': 0.7, 'random_strength': 1.909862983517866e-06, 'bagging_temperature': 0.011235457533026611, 'border_count': 173, 'scale_pos_weight': 3.6339460034440525}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  13%|█▎        | 13/100 [00:51<04:06,  2.83s/it]

[I 2025-11-06 16:58:41,945] Trial 12 finished with value: 0.5786713286713286 and parameters: {'learning_rate': 0.023674586056524824, 'depth': 5, 'l2_leaf_reg': 0.029934240831527372, 'subsample': 0.7, 'random_strength': 6.855692626521456e-08, 'bagging_temperature': 0.45731326855962806, 'border_count': 166, 'scale_pos_weight': 3.6996444400419577}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  14%|█▍        | 14/100 [00:52<03:26,  2.40s/it]

[I 2025-11-06 16:58:43,364] Trial 13 finished with value: 0.5159991049451779 and parameters: {'learning_rate': 0.05059439822319247, 'depth': 5, 'l2_leaf_reg': 4.766947730937801, 'subsample': 0.9, 'random_strength': 1.1296099356867346e-05, 'bagging_temperature': 0.4808528320609313, 'border_count': 135, 'scale_pos_weight': 9.57221736752047}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  15%|█▌        | 15/100 [00:53<02:41,  1.90s/it]

[I 2025-11-06 16:58:44,088] Trial 14 finished with value: 0.5912361020274689 and parameters: {'learning_rate': 0.24659465064235464, 'depth': 3, 'l2_leaf_reg': 0.2652137776451179, 'subsample': 0.6, 'random_strength': 4.205881861743407e-07, 'bagging_temperature': 0.26756677350858066, 'border_count': 197, 'scale_pos_weight': 2.578564827198933}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  16%|█▌        | 16/100 [00:54<02:17,  1.64s/it]

[I 2025-11-06 16:58:45,128] Trial 15 finished with value: 0.5572320499479708 and parameters: {'learning_rate': 0.09048532644886441, 'depth': 3, 'l2_leaf_reg': 0.2977671711581206, 'subsample': 0.6, 'random_strength': 1.9284411392729697e-05, 'bagging_temperature': 0.19220496964832362, 'border_count': 192, 'scale_pos_weight': 4.945182292038169}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  17%|█▋        | 17/100 [01:01<04:33,  3.30s/it]

[I 2025-11-06 16:58:52,277] Trial 16 finished with value: 0.5561450044208665 and parameters: {'learning_rate': 0.018037537844781227, 'depth': 6, 'l2_leaf_reg': 0.4817096531782106, 'subsample': 0.6, 'random_strength': 0.010796168933562486, 'bagging_temperature': 0.1465357536342325, 'border_count': 118, 'scale_pos_weight': 1.2229249483591174}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 1. Best value: 0.594839:  18%|█▊        | 18/100 [01:03<03:45,  2.75s/it]

[I 2025-11-06 16:58:53,756] Trial 17 finished with value: 0.5506179332106232 and parameters: {'learning_rate': 0.29680303622857956, 'depth': 7, 'l2_leaf_reg': 2.010607146320272, 'subsample': 0.6, 'random_strength': 1.3115985149347256e-06, 'bagging_temperature': 0.3628345982113581, 'border_count': 199, 'scale_pos_weight': 4.675789557727512}. Best is trial 1 with value: 0.5948387096774194.


Best trial: 18. Best value: 0.596806:  19%|█▉        | 19/100 [01:05<03:43,  2.76s/it]

[I 2025-11-06 16:58:56,534] Trial 18 finished with value: 0.5968063872255489 and parameters: {'learning_rate': 0.05209175605624951, 'depth': 4, 'l2_leaf_reg': 0.04204622412901022, 'subsample': 0.6, 'random_strength': 2.0442388773047322e-08, 'bagging_temperature': 0.1408791979899406, 'border_count': 160, 'scale_pos_weight': 2.5304984856126653}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  20%|██        | 20/100 [01:07<03:22,  2.54s/it]

[I 2025-11-06 16:58:58,550] Trial 19 finished with value: 0.5674217907227616 and parameters: {'learning_rate': 0.04277111153790552, 'depth': 5, 'l2_leaf_reg': 0.050278918033968396, 'subsample': 0.7, 'random_strength': 2.5269431680798256e-08, 'bagging_temperature': 0.12028996941935478, 'border_count': 156, 'scale_pos_weight': 4.455633994694937}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  21%|██        | 21/100 [01:11<03:40,  2.79s/it]

[I 2025-11-06 16:59:01,935] Trial 20 finished with value: 0.5874906924795235 and parameters: {'learning_rate': 0.027884204700393014, 'depth': 4, 'l2_leaf_reg': 0.04562295153435316, 'subsample': 1.0, 'random_strength': 6.913344481859309e-08, 'bagging_temperature': 0.08818144771948389, 'border_count': 108, 'scale_pos_weight': 1.9049060620098435}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  22%|██▏       | 22/100 [01:12<03:08,  2.41s/it]

[I 2025-11-06 16:59:03,463] Trial 21 finished with value: 0.5921859864384889 and parameters: {'learning_rate': 0.12027068694095304, 'depth': 3, 'l2_leaf_reg': 0.27765015639335094, 'subsample': 0.6, 'random_strength': 3.20835417112054e-07, 'bagging_temperature': 0.23966263180484215, 'border_count': 187, 'scale_pos_weight': 2.7783340002419212}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  23%|██▎       | 23/100 [01:14<02:40,  2.08s/it]

[I 2025-11-06 16:59:04,778] Trial 22 finished with value: 0.5871501272264631 and parameters: {'learning_rate': 0.12215332177888664, 'depth': 4, 'l2_leaf_reg': 0.08731474169650164, 'subsample': 0.6, 'random_strength': 3.0392075733885424e-06, 'bagging_temperature': 0.19822510666353674, 'border_count': 181, 'scale_pos_weight': 2.9456473934913867}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  24%|██▍       | 24/100 [01:16<02:48,  2.21s/it]

[I 2025-11-06 16:59:07,291] Trial 23 finished with value: 0.5887708649468892 and parameters: {'learning_rate': 0.062413911891814035, 'depth': 3, 'l2_leaf_reg': 2.111992543929264, 'subsample': 0.8, 'random_strength': 1.5138794078069904e-07, 'bagging_temperature': 0.3716286221712537, 'border_count': 154, 'scale_pos_weight': 1.7844905406069063}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  25%|██▌       | 25/100 [01:18<02:30,  2.01s/it]

[I 2025-11-06 16:59:08,832] Trial 24 finished with value: 0.5818070818070818 and parameters: {'learning_rate': 0.11015939227419116, 'depth': 5, 'l2_leaf_reg': 4.709409921837666, 'subsample': 0.7, 'random_strength': 6.220399097824005e-05, 'bagging_temperature': 0.0012441703662957604, 'border_count': 213, 'scale_pos_weight': 3.1883915605966875}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 18. Best value: 0.596806:  26%|██▌       | 26/100 [01:22<03:19,  2.69s/it]

[I 2025-11-06 16:59:13,113] Trial 25 finished with value: 0.5755148741418764 and parameters: {'learning_rate': 0.02079497202633949, 'depth': 6, 'l2_leaf_reg': 0.018477191174107774, 'subsample': 0.5, 'random_strength': 1.429395779032731e-08, 'bagging_temperature': 0.08794545974628429, 'border_count': 255, 'scale_pos_weight': 4.083671602800209}. Best is trial 18 with value: 0.5968063872255489.


Best trial: 26. Best value: 0.599659:  27%|██▋       | 27/100 [01:27<04:20,  3.57s/it]

[I 2025-11-06 16:59:18,741] Trial 26 finished with value: 0.5996592844974447 and parameters: {'learning_rate': 0.014696817707115293, 'depth': 4, 'l2_leaf_reg': 0.7059639525465884, 'subsample': 0.6, 'random_strength': 1.3862518259079903e-06, 'bagging_temperature': 0.20339854575996982, 'border_count': 134, 'scale_pos_weight': 2.350183208263932}. Best is trial 26 with value: 0.5996592844974447.


Best trial: 26. Best value: 0.599659:  28%|██▊       | 28/100 [01:37<06:22,  5.31s/it]

[I 2025-11-06 16:59:28,118] Trial 27 finished with value: 0.5334598955861415 and parameters: {'learning_rate': 0.015114242986431924, 'depth': 7, 'l2_leaf_reg': 8.912449246566647, 'subsample': 0.7, 'random_strength': 6.584871663326676e-05, 'bagging_temperature': 0.5661798351554058, 'border_count': 122, 'scale_pos_weight': 1.0472100829772906}. Best is trial 26 with value: 0.5996592844974447.


Best trial: 26. Best value: 0.599659:  29%|██▉       | 29/100 [01:42<06:08,  5.19s/it]

[I 2025-11-06 16:59:33,009] Trial 28 finished with value: 0.5891249549873965 and parameters: {'learning_rate': 0.014600892465093815, 'depth': 4, 'l2_leaf_reg': 0.8397468825741167, 'subsample': 0.8, 'random_strength': 5.143960949831411e-06, 'bagging_temperature': 0.379363949123683, 'border_count': 78, 'scale_pos_weight': 2.0413952006862406}. Best is trial 26 with value: 0.5996592844974447.


Best trial: 26. Best value: 0.599659:  30%|███       | 30/100 [01:45<05:20,  4.58s/it]

[I 2025-11-06 16:59:36,158] Trial 29 finished with value: 0.5759599332220368 and parameters: {'learning_rate': 0.034217188416442064, 'depth': 5, 'l2_leaf_reg': 3.212624099093142, 'subsample': 0.9, 'random_strength': 1.1231183733905185e-06, 'bagging_temperature': 0.16730099442850227, 'border_count': 246, 'scale_pos_weight': 1.3986809139664684}. Best is trial 26 with value: 0.5996592844974447.


Best trial: 26. Best value: 0.599659:  31%|███       | 31/100 [01:47<04:22,  3.80s/it]

[I 2025-11-06 16:59:38,154] Trial 30 finished with value: 0.5520103761348898 and parameters: {'learning_rate': 0.026084380595358267, 'depth': 6, 'l2_leaf_reg': 0.0019434406588703828, 'subsample': 0.6, 'random_strength': 6.182713308504692e-08, 'bagging_temperature': 0.08294914549220332, 'border_count': 146, 'scale_pos_weight': 5.217048259734767}. Best is trial 26 with value: 0.5996592844974447.


Best trial: 31. Best value: 0.600742:  32%|███▏      | 32/100 [01:49<03:39,  3.23s/it]

[I 2025-11-06 16:59:40,060] Trial 31 finished with value: 0.600742490718866 and parameters: {'learning_rate': 0.05606351742585853, 'depth': 3, 'l2_leaf_reg': 0.4156255250431988, 'subsample': 0.6, 'random_strength': 6.983638365384724e-07, 'bagging_temperature': 0.23818262081692881, 'border_count': 185, 'scale_pos_weight': 2.383191711602386}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  33%|███▎      | 33/100 [01:54<04:21,  3.90s/it]

[I 2025-11-06 16:59:45,527] Trial 32 finished with value: 0.599388379204893 and parameters: {'learning_rate': 0.010155097662271973, 'depth': 4, 'l2_leaf_reg': 0.4867492969967468, 'subsample': 0.5, 'random_strength': 8.639613521848255e-07, 'bagging_temperature': 0.3310992010497731, 'border_count': 161, 'scale_pos_weight': 2.3343871447145017}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  34%|███▍      | 34/100 [02:00<04:53,  4.45s/it]

[I 2025-11-06 16:59:51,257] Trial 33 finished with value: 0.5990404386566142 and parameters: {'learning_rate': 0.010275813904345338, 'depth': 4, 'l2_leaf_reg': 0.6013476726111039, 'subsample': 0.5, 'random_strength': 5.779306851236846e-08, 'bagging_temperature': 0.3264875097932768, 'border_count': 161, 'scale_pos_weight': 2.2702484226710675}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  35%|███▌      | 35/100 [02:04<04:47,  4.43s/it]

[I 2025-11-06 16:59:55,629] Trial 34 finished with value: 0.5960539979231568 and parameters: {'learning_rate': 0.010166748674412087, 'depth': 3, 'l2_leaf_reg': 0.4574382196635102, 'subsample': 0.5, 'random_strength': 1.0855476620380279e-06, 'bagging_temperature': 0.32297724880229395, 'border_count': 126, 'scale_pos_weight': 2.2132513996675685}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  36%|███▌      | 36/100 [02:10<05:02,  4.73s/it]

[I 2025-11-06 17:00:01,064] Trial 35 finished with value: 0.5816419266888821 and parameters: {'learning_rate': 0.0138382026918524, 'depth': 4, 'l2_leaf_reg': 1.1209326925782948, 'subsample': 0.5, 'random_strength': 9.483842068360251e-08, 'bagging_temperature': 0.435783342688274, 'border_count': 99, 'scale_pos_weight': 3.2745270303931346}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  37%|███▋      | 37/100 [02:14<04:46,  4.55s/it]

[I 2025-11-06 17:00:05,194] Trial 36 finished with value: 0.5887378640776699 and parameters: {'learning_rate': 0.012364905609529284, 'depth': 3, 'l2_leaf_reg': 0.3818169010536677, 'subsample': 0.5, 'random_strength': 3.0353145807931896e-05, 'bagging_temperature': 0.5494146448874002, 'border_count': 40, 'scale_pos_weight': 1.6584347181121109}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  38%|███▊      | 38/100 [02:19<04:47,  4.63s/it]

[I 2025-11-06 17:00:10,010] Trial 37 finished with value: 0.5712697971658794 and parameters: {'learning_rate': 0.01003400836654076, 'depth': 5, 'l2_leaf_reg': 0.6513482611762885, 'subsample': 0.5, 'random_strength': 5.075925279447171e-06, 'bagging_temperature': 0.30519641026775773, 'border_count': 149, 'scale_pos_weight': 4.0684193669541955}. Best is trial 31 with value: 0.600742490718866.


Best trial: 31. Best value: 0.600742:  39%|███▉      | 39/100 [02:24<04:57,  4.88s/it]

[I 2025-11-06 17:00:15,479] Trial 38 finished with value: 0.5958153437396214 and parameters: {'learning_rate': 0.016819772881830102, 'depth': 4, 'l2_leaf_reg': 0.17582852863525886, 'subsample': 0.5, 'random_strength': 6.113344667047216e-07, 'bagging_temperature': 0.24245095925028345, 'border_count': 168, 'scale_pos_weight': 2.534747516624657}. Best is trial 31 with value: 0.600742490718866.


Best trial: 39. Best value: 0.601235:  40%|████      | 40/100 [02:30<05:05,  5.10s/it]

[I 2025-11-06 17:00:21,085] Trial 39 finished with value: 0.6012354152367879 and parameters: {'learning_rate': 0.012329265214293108, 'depth': 4, 'l2_leaf_reg': 1.3367110797592299, 'subsample': 0.5, 'random_strength': 1.803563573083937e-07, 'bagging_temperature': 0.40991948833808883, 'border_count': 54, 'scale_pos_weight': 2.263536445918072}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  41%|████      | 41/100 [02:34<04:46,  4.85s/it]

[I 2025-11-06 17:00:25,356] Trial 40 finished with value: 0.5816409423233144 and parameters: {'learning_rate': 0.02023315890486856, 'depth': 3, 'l2_leaf_reg': 1.4700664333184839, 'subsample': 0.6, 'random_strength': 0.00037269431516635685, 'bagging_temperature': 0.820837075775209, 'border_count': 56, 'scale_pos_weight': 1.4885100100598327}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  42%|████▏     | 42/100 [02:40<04:51,  5.03s/it]

[I 2025-11-06 17:00:30,791] Trial 41 finished with value: 0.5963559915907498 and parameters: {'learning_rate': 0.012552955081697596, 'depth': 4, 'l2_leaf_reg': 0.7411041499911468, 'subsample': 0.5, 'random_strength': 4.9313982667094094e-08, 'bagging_temperature': 0.4227626894981338, 'border_count': 57, 'scale_pos_weight': 2.169624347923746}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  43%|████▎     | 43/100 [02:45<04:53,  5.15s/it]

[I 2025-11-06 17:00:36,242] Trial 42 finished with value: 0.5835884874464177 and parameters: {'learning_rate': 0.011148945726154106, 'depth': 4, 'l2_leaf_reg': 0.18953198595669907, 'subsample': 0.5, 'random_strength': 2.4126697482803307e-07, 'bagging_temperature': 0.35496620562562176, 'border_count': 132, 'scale_pos_weight': 3.141770078509288}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  44%|████▍     | 44/100 [02:51<04:57,  5.32s/it]

[I 2025-11-06 17:00:41,952] Trial 43 finished with value: 0.5948542024013722 and parameters: {'learning_rate': 0.015617209485323767, 'depth': 5, 'l2_leaf_reg': 1.2355122174064912, 'subsample': 0.5, 'random_strength': 1.5051837258509086e-07, 'bagging_temperature': 0.22721709943356438, 'border_count': 32, 'scale_pos_weight': 2.3060574130268425}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  45%|████▌     | 45/100 [02:55<04:37,  5.05s/it]

[I 2025-11-06 17:00:46,382] Trial 44 finished with value: 0.5417384880571288 and parameters: {'learning_rate': 0.01306191867954641, 'depth': 3, 'l2_leaf_reg': 0.09447715963176104, 'subsample': 0.5, 'random_strength': 7.507786818283037e-07, 'bagging_temperature': 0.5380643872301702, 'border_count': 214, 'scale_pos_weight': 6.168639290305857}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  46%|████▌     | 46/100 [03:02<04:56,  5.49s/it]

[I 2025-11-06 17:00:52,902] Trial 45 finished with value: 0.5783767946088485 and parameters: {'learning_rate': 0.022318492907701454, 'depth': 9, 'l2_leaf_reg': 2.9614251444484894, 'subsample': 0.6, 'random_strength': 3.422364805250615e-08, 'bagging_temperature': 0.3114630909950311, 'border_count': 177, 'scale_pos_weight': 3.9843796682067523}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  47%|████▋     | 47/100 [03:03<03:43,  4.21s/it]

[I 2025-11-06 17:00:54,124] Trial 46 finished with value: 0.535421686746988 and parameters: {'learning_rate': 0.07113116583449938, 'depth': 4, 'l2_leaf_reg': 0.6064104613524264, 'subsample': 0.5, 'random_strength': 3.2856185056997628e-06, 'bagging_temperature': 0.4046654709974971, 'border_count': 87, 'scale_pos_weight': 6.79088960785296}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  48%|████▊     | 48/100 [03:04<02:57,  3.42s/it]

[I 2025-11-06 17:00:55,695] Trial 47 finished with value: 0.5153898000449337 and parameters: {'learning_rate': 0.04402050285428015, 'depth': 5, 'l2_leaf_reg': 1.063834623027227, 'subsample': 0.6, 'random_strength': 1.6402253578365505e-07, 'bagging_temperature': 0.600634935753062, 'border_count': 110, 'scale_pos_weight': 9.61866291677996}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  49%|████▉     | 49/100 [03:09<03:05,  3.64s/it]

[I 2025-11-06 17:00:59,845] Trial 48 finished with value: 0.5771812080536913 and parameters: {'learning_rate': 0.011156605704818065, 'depth': 3, 'l2_leaf_reg': 0.3534101398008644, 'subsample': 0.5, 'random_strength': 0.0018641990105619562, 'bagging_temperature': 0.4949945304807931, 'border_count': 165, 'scale_pos_weight': 3.504389359607237}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  50%|█████     | 50/100 [03:15<03:42,  4.46s/it]

[I 2025-11-06 17:01:06,206] Trial 49 finished with value: 0.5297760838494521 and parameters: {'learning_rate': 0.010075596409462782, 'depth': 4, 'l2_leaf_reg': 0.12820255266335473, 'subsample': 0.6, 'random_strength': 9.331056709998989e-06, 'bagging_temperature': 0.27024300169170123, 'border_count': 185, 'scale_pos_weight': 1.0366199974397192}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  51%|█████     | 51/100 [03:19<03:25,  4.19s/it]

[I 2025-11-06 17:01:09,760] Trial 50 finished with value: 0.520136518771331 and parameters: {'learning_rate': 0.01817283727955538, 'depth': 3, 'l2_leaf_reg': 0.22313560688444198, 'subsample': 0.7, 'random_strength': 0.017152549425858196, 'bagging_temperature': 0.3286229877140891, 'border_count': 140, 'scale_pos_weight': 8.697856853318536}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  52%|█████▏    | 52/100 [03:20<02:43,  3.41s/it]

[I 2025-11-06 17:01:11,358] Trial 51 finished with value: 0.5927631578947369 and parameters: {'learning_rate': 0.06558893120245879, 'depth': 4, 'l2_leaf_reg': 0.06282117344752533, 'subsample': 0.6, 'random_strength': 1.2187777778696996e-08, 'bagging_temperature': 0.2115794071712706, 'border_count': 158, 'scale_pos_weight': 2.539845105361924}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  53%|█████▎    | 53/100 [03:22<02:14,  2.86s/it]

[I 2025-11-06 17:01:12,925] Trial 52 finished with value: 0.5863377609108159 and parameters: {'learning_rate': 0.0563888042316366, 'depth': 4, 'l2_leaf_reg': 0.017890859524293038, 'subsample': 0.6, 'random_strength': 3.294864996539341e-08, 'bagging_temperature': 0.2800355608806855, 'border_count': 162, 'scale_pos_weight': 2.8790067660451633}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  54%|█████▍    | 54/100 [03:23<01:48,  2.37s/it]

[I 2025-11-06 17:01:14,145] Trial 53 finished with value: 0.5819165378670789 and parameters: {'learning_rate': 0.08724988976763463, 'depth': 5, 'l2_leaf_reg': 0.008966880711301743, 'subsample': 0.5, 'random_strength': 1.025512709593052e-08, 'bagging_temperature': 0.1542301792071975, 'border_count': 173, 'scale_pos_weight': 1.7364481397044504}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  55%|█████▌    | 55/100 [03:27<02:11,  2.93s/it]

[I 2025-11-06 17:01:18,382] Trial 54 finished with value: 0.5948542024013722 and parameters: {'learning_rate': 0.03854217474955772, 'depth': 4, 'l2_leaf_reg': 0.5259730461659596, 'subsample': 0.6, 'random_strength': 4.727972223283005e-07, 'bagging_temperature': 0.1370088031818789, 'border_count': 190, 'scale_pos_weight': 2.387607301420104}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  56%|█████▌    | 56/100 [03:36<03:31,  4.80s/it]

[I 2025-11-06 17:01:27,538] Trial 55 finished with value: 0.5878810688746707 and parameters: {'learning_rate': 0.011634025578033825, 'depth': 8, 'l2_leaf_reg': 1.6755457029036531, 'subsample': 0.7, 'random_strength': 2.044538026937127e-06, 'bagging_temperature': 0.4637905869447328, 'border_count': 149, 'scale_pos_weight': 1.9866676775349512}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  57%|█████▋    | 57/100 [03:39<02:57,  4.13s/it]

[I 2025-11-06 17:01:30,105] Trial 56 finished with value: 0.5913322632423756 and parameters: {'learning_rate': 0.04973714817677604, 'depth': 3, 'l2_leaf_reg': 0.032008866458711954, 'subsample': 0.6, 'random_strength': 1.1104387734822137e-07, 'bagging_temperature': 0.05850310328078748, 'border_count': 202, 'scale_pos_weight': 2.8193960083862692}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  58%|█████▊    | 58/100 [03:43<02:53,  4.13s/it]

[I 2025-11-06 17:01:34,253] Trial 57 finished with value: 0.5824308062575211 and parameters: {'learning_rate': 0.016238947324889036, 'depth': 4, 'l2_leaf_reg': 0.840534436252811, 'subsample': 0.5, 'random_strength': 2.6257691886343832e-08, 'bagging_temperature': 0.2502805284755011, 'border_count': 231, 'scale_pos_weight': 3.246361469439394}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  59%|█████▉    | 59/100 [03:47<02:48,  4.11s/it]

[I 2025-11-06 17:01:38,313] Trial 58 finished with value: 0.5765326837190419 and parameters: {'learning_rate': 0.031631228516836564, 'depth': 5, 'l2_leaf_reg': 5.447484828430065, 'subsample': 0.6, 'random_strength': 2.800635484735543e-07, 'bagging_temperature': 0.18632205150744227, 'border_count': 58, 'scale_pos_weight': 1.5093649400134146}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  60%|██████    | 60/100 [03:52<02:53,  4.34s/it]

[I 2025-11-06 17:01:43,193] Trial 59 finished with value: 0.5761646184624178 and parameters: {'learning_rate': 0.014364549826276605, 'depth': 3, 'l2_leaf_reg': 2.634488783200898, 'subsample': 0.5, 'random_strength': 2.3111643638517805e-07, 'bagging_temperature': 0.9866687540411976, 'border_count': 135, 'scale_pos_weight': 3.7228306027672895}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  61%|██████    | 61/100 [03:56<02:47,  4.28s/it]

[I 2025-11-06 17:01:47,339] Trial 60 finished with value: 0.5877551020408164 and parameters: {'learning_rate': 0.02451462675760196, 'depth': 4, 'l2_leaf_reg': 0.33284272978445456, 'subsample': 0.7, 'random_strength': 6.10700240927532e-07, 'bagging_temperature': 0.11954139978967904, 'border_count': 195, 'scale_pos_weight': 1.904761008223076}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  62%|██████▏   | 62/100 [04:02<02:57,  4.66s/it]

[I 2025-11-06 17:01:52,890] Trial 61 finished with value: 0.5978186775732788 and parameters: {'learning_rate': 0.01296840761213988, 'depth': 4, 'l2_leaf_reg': 0.7022984334998708, 'subsample': 0.5, 'random_strength': 4.683181685222497e-08, 'bagging_temperature': 0.3425121142214103, 'border_count': 55, 'scale_pos_weight': 2.322357275249707}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  63%|██████▎   | 63/100 [04:07<03:02,  4.93s/it]

[I 2025-11-06 17:01:58,440] Trial 62 finished with value: 0.5943118666230794 and parameters: {'learning_rate': 0.013088035534390665, 'depth': 4, 'l2_leaf_reg': 0.9151664185438593, 'subsample': 0.5, 'random_strength': 5.162488737913582e-08, 'bagging_temperature': 0.39884969036027457, 'border_count': 73, 'scale_pos_weight': 2.6603134822759116}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  64%|██████▍   | 64/100 [04:14<03:12,  5.36s/it]

[I 2025-11-06 17:02:04,798] Trial 63 finished with value: 0.5961670088980151 and parameters: {'learning_rate': 0.011528305776548831, 'depth': 5, 'l2_leaf_reg': 0.48392421102055505, 'subsample': 0.5, 'random_strength': 2.136295107180526e-08, 'bagging_temperature': 0.35051189203613853, 'border_count': 66, 'scale_pos_weight': 2.3158367461630593}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  65%|██████▌   | 65/100 [04:19<03:08,  5.39s/it]

[I 2025-11-06 17:02:10,262] Trial 64 finished with value: 0.5761421319796954 and parameters: {'learning_rate': 0.017521839981734524, 'depth': 4, 'l2_leaf_reg': 1.4211595061653521, 'subsample': 0.6, 'random_strength': 7.615146314040428e-08, 'bagging_temperature': 0.2920076563138188, 'border_count': 171, 'scale_pos_weight': 1.3294740567513803}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  66%|██████▌   | 66/100 [04:24<02:57,  5.22s/it]

[I 2025-11-06 17:02:15,073] Trial 65 finished with value: 0.5915136162127929 and parameters: {'learning_rate': 0.014371148430991658, 'depth': 3, 'l2_leaf_reg': 0.1416462807398716, 'subsample': 0.5, 'random_strength': 4.1267954019271986e-08, 'bagging_temperature': 0.20232309458851475, 'border_count': 180, 'scale_pos_weight': 2.8952271213241576}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  67%|██████▋   | 67/100 [04:25<02:09,  3.91s/it]

[I 2025-11-06 17:02:15,951] Trial 66 finished with value: 0.54724111866969 and parameters: {'learning_rate': 0.1954235733809041, 'depth': 5, 'l2_leaf_reg': 0.06566064126851535, 'subsample': 0.5, 'random_strength': 1.7233026006241237e-06, 'bagging_temperature': 0.3345042582954476, 'border_count': 46, 'scale_pos_weight': 5.715088211648617}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  68%|██████▊   | 68/100 [04:29<02:04,  3.88s/it]

[I 2025-11-06 17:02:19,758] Trial 67 finished with value: 0.5886601661249549 and parameters: {'learning_rate': 0.019817192782153492, 'depth': 6, 'l2_leaf_reg': 0.2645557636383452, 'subsample': 0.6, 'random_strength': 1.0313034245803314e-07, 'bagging_temperature': 0.4353672484478158, 'border_count': 157, 'scale_pos_weight': 2.0848685273578793}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  69%|██████▉   | 69/100 [04:43<03:37,  7.02s/it]

[I 2025-11-06 17:02:34,084] Trial 68 finished with value: 0.5779591836734694 and parameters: {'learning_rate': 0.010710890192155561, 'depth': 10, 'l2_leaf_reg': 2.409378437265153, 'subsample': 1.0, 'random_strength': 1.831697451556417e-08, 'bagging_temperature': 0.38828999214679555, 'border_count': 86, 'scale_pos_weight': 1.6428721741739807}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  70%|███████   | 70/100 [04:44<02:38,  5.27s/it]

[I 2025-11-06 17:02:35,294] Trial 69 finished with value: 0.5646739130434782 and parameters: {'learning_rate': 0.08880466595741512, 'depth': 3, 'l2_leaf_reg': 0.030656008218986, 'subsample': 0.5, 'random_strength': 4.1705020303243563e-07, 'bagging_temperature': 0.25925520581800254, 'border_count': 114, 'scale_pos_weight': 4.463929979332926}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  71%|███████   | 71/100 [04:49<02:32,  5.28s/it]

[I 2025-11-06 17:02:40,573] Trial 70 finished with value: 0.577935522034901 and parameters: {'learning_rate': 0.0126023150323929, 'depth': 4, 'l2_leaf_reg': 0.011342173756785064, 'subsample': 0.5, 'random_strength': 9.113726002382525e-07, 'bagging_temperature': 0.1722751418239164, 'border_count': 97, 'scale_pos_weight': 3.476534918894272}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  72%|███████▏  | 72/100 [04:55<02:31,  5.41s/it]

[I 2025-11-06 17:02:46,288] Trial 71 finished with value: 0.5988065988065988 and parameters: {'learning_rate': 0.012543145722190636, 'depth': 4, 'l2_leaf_reg': 0.6173425430224013, 'subsample': 0.5, 'random_strength': 4.6500057291495345e-08, 'bagging_temperature': 0.43085790419490494, 'border_count': 60, 'scale_pos_weight': 2.1412679879341505}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  73%|███████▎  | 73/100 [05:00<02:25,  5.39s/it]

[I 2025-11-06 17:02:51,628] Trial 72 finished with value: 0.5981370592149036 and parameters: {'learning_rate': 0.013537434740578615, 'depth': 4, 'l2_leaf_reg': 0.6120197729522956, 'subsample': 0.5, 'random_strength': 1.561039770603238e-07, 'bagging_temperature': 0.5257498530137309, 'border_count': 47, 'scale_pos_weight': 2.500822253758497}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  74%|███████▍  | 74/100 [05:06<02:19,  5.36s/it]

[I 2025-11-06 17:02:56,924] Trial 73 finished with value: 0.5843392138656762 and parameters: {'learning_rate': 0.013368652599811256, 'depth': 4, 'l2_leaf_reg': 0.6418223627990725, 'subsample': 0.5, 'random_strength': 2.0719989865073314e-07, 'bagging_temperature': 0.5998478798204401, 'border_count': 48, 'scale_pos_weight': 3.080417503881362}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  75%|███████▌  | 75/100 [05:12<02:19,  5.58s/it]

[I 2025-11-06 17:03:03,033] Trial 74 finished with value: 0.5977165883143049 and parameters: {'learning_rate': 0.015498783338067652, 'depth': 5, 'l2_leaf_reg': 0.47631911193377074, 'subsample': 0.5, 'random_strength': 1.2472923947067111e-07, 'bagging_temperature': 0.5229589560045356, 'border_count': 34, 'scale_pos_weight': 2.468814740087492}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  76%|███████▌  | 76/100 [05:17<02:14,  5.60s/it]

[I 2025-11-06 17:03:08,676] Trial 75 finished with value: 0.5933188090050835 and parameters: {'learning_rate': 0.011880649800920998, 'depth': 4, 'l2_leaf_reg': 1.0296565663396318, 'subsample': 0.5, 'random_strength': 4.0070088379890253e-07, 'bagging_temperature': 0.4910422679063957, 'border_count': 67, 'scale_pos_weight': 1.9689251571345525}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  77%|███████▋  | 77/100 [05:22<02:00,  5.23s/it]

[I 2025-11-06 17:03:13,033] Trial 76 finished with value: 0.5942778352292313 and parameters: {'learning_rate': 0.01108426231172201, 'depth': 3, 'l2_leaf_reg': 0.3842352850434195, 'subsample': 0.5, 'random_strength': 0.7899205687180811, 'bagging_temperature': 0.4426109646182178, 'border_count': 50, 'scale_pos_weight': 2.2201634272354855}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  78%|███████▊  | 78/100 [05:27<01:57,  5.34s/it]

[I 2025-11-06 17:03:18,620] Trial 77 finished with value: 0.56694286960314 and parameters: {'learning_rate': 0.01640329574640925, 'depth': 4, 'l2_leaf_reg': 1.730018840954881, 'subsample': 0.5, 'random_strength': 7.346846847660631e-08, 'bagging_temperature': 0.41263850211954517, 'border_count': 60, 'scale_pos_weight': 1.228588674291377}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  79%|███████▉  | 79/100 [05:33<01:54,  5.47s/it]

[I 2025-11-06 17:03:24,405] Trial 78 finished with value: 0.5979850503737406 and parameters: {'learning_rate': 0.0136575553189403, 'depth': 5, 'l2_leaf_reg': 0.7168113062900149, 'subsample': 0.5, 'random_strength': 2.879573524944951e-06, 'bagging_temperature': 0.4682307887099631, 'border_count': 38, 'scale_pos_weight': 2.7097720421459424}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  80%|████████  | 80/100 [05:38<01:45,  5.28s/it]

[I 2025-11-06 17:03:29,233] Trial 79 finished with value: 0.5955786736020806 and parameters: {'learning_rate': 0.018643052895925973, 'depth': 5, 'l2_leaf_reg': 0.2500708789778055, 'subsample': 0.5, 'random_strength': 3.0453645527674595e-06, 'bagging_temperature': 0.6516550595193763, 'border_count': 37, 'scale_pos_weight': 2.688524852893063}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  81%|████████  | 81/100 [05:45<01:48,  5.69s/it]

[I 2025-11-06 17:03:35,892] Trial 80 finished with value: 0.5831399845320959 and parameters: {'learning_rate': 0.014635018969244327, 'depth': 5, 'l2_leaf_reg': 4.349177757087865, 'subsample': 0.8, 'random_strength': 2.1642086134493015e-05, 'bagging_temperature': 0.46474591908113005, 'border_count': 43, 'scale_pos_weight': 1.7074155943112426}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  82%|████████▏ | 82/100 [05:50<01:41,  5.63s/it]

[I 2025-11-06 17:03:41,364] Trial 81 finished with value: 0.5984305697714091 and parameters: {'learning_rate': 0.013333464545559072, 'depth': 4, 'l2_leaf_reg': 0.7204392381152387, 'subsample': 0.5, 'random_strength': 1.7345963290792984e-06, 'bagging_temperature': 0.5126498107609536, 'border_count': 53, 'scale_pos_weight': 2.314650547179842}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  83%|████████▎ | 83/100 [05:56<01:34,  5.57s/it]

[I 2025-11-06 17:03:46,814] Trial 82 finished with value: 0.5894143438772315 and parameters: {'learning_rate': 0.01006239315748117, 'depth': 4, 'l2_leaf_reg': 0.848870808923908, 'subsample': 0.5, 'random_strength': 5.834492297647078e-06, 'bagging_temperature': 0.5223145706938843, 'border_count': 78, 'scale_pos_weight': 2.975959782787099}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  84%|████████▍ | 84/100 [06:00<01:23,  5.23s/it]

[I 2025-11-06 17:03:51,232] Trial 83 finished with value: 0.595939751146038 and parameters: {'learning_rate': 0.01350483664303469, 'depth': 3, 'l2_leaf_reg': 0.5619692904577446, 'subsample': 0.5, 'random_strength': 1.457953474772756e-06, 'bagging_temperature': 0.3759834960343033, 'border_count': 67, 'scale_pos_weight': 2.6160256402351427}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  85%|████████▌ | 85/100 [06:06<01:20,  5.35s/it]

[I 2025-11-06 17:03:56,880] Trial 84 finished with value: 0.5814581458145814 and parameters: {'learning_rate': 0.012037918051314522, 'depth': 4, 'l2_leaf_reg': 1.2580121984447312, 'subsample': 0.5, 'random_strength': 8.913665437270681e-07, 'bagging_temperature': 0.5741530337618487, 'border_count': 40, 'scale_pos_weight': 3.2981109905098425}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  86%|████████▌ | 86/100 [06:13<01:22,  5.90s/it]

[I 2025-11-06 17:04:04,045] Trial 85 finished with value: 0.5864547862277715 and parameters: {'learning_rate': 0.010728276900117246, 'depth': 6, 'l2_leaf_reg': 0.394964192020612, 'subsample': 0.5, 'random_strength': 2.381356956179306e-06, 'bagging_temperature': 0.4687365173269326, 'border_count': 51, 'scale_pos_weight': 1.8385000327319483}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  87%|████████▋ | 87/100 [06:18<01:14,  5.75s/it]

[I 2025-11-06 17:04:09,454] Trial 86 finished with value: 0.5968365553602812 and parameters: {'learning_rate': 0.01567475266147255, 'depth': 4, 'l2_leaf_reg': 1.9998145418249984, 'subsample': 0.5, 'random_strength': 1.2463940143637759e-05, 'bagging_temperature': 0.5199132012170002, 'border_count': 64, 'scale_pos_weight': 2.138893402858074}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  88%|████████▊ | 88/100 [06:24<01:08,  5.73s/it]

[I 2025-11-06 17:04:15,121] Trial 87 finished with value: 0.5814696485623003 and parameters: {'learning_rate': 0.013928998369710876, 'depth': 4, 'l2_leaf_reg': 0.20408062239130192, 'subsample': 0.9, 'random_strength': 4.826832979086367e-06, 'bagging_temperature': 0.6143463102690458, 'border_count': 76, 'scale_pos_weight': 1.587584042933691}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  89%|████████▉ | 89/100 [06:28<00:56,  5.16s/it]

[I 2025-11-06 17:04:18,960] Trial 88 finished with value: 0.5899326707277974 and parameters: {'learning_rate': 0.021985940772960205, 'depth': 3, 'l2_leaf_reg': 0.698386056620641, 'subsample': 0.5, 'random_strength': 0.00011484852617853952, 'bagging_temperature': 0.30745734879022213, 'border_count': 53, 'scale_pos_weight': 2.772198143445997}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  90%|█████████ | 90/100 [06:32<00:49,  4.92s/it]

[I 2025-11-06 17:04:23,307] Trial 89 finished with value: 0.5287846481876333 and parameters: {'learning_rate': 0.012151170716526225, 'depth': 5, 'l2_leaf_reg': 1.003305197300653, 'subsample': 0.6, 'random_strength': 5.927955086561115e-07, 'bagging_temperature': 0.22782031778512662, 'border_count': 41, 'scale_pos_weight': 7.672049991698911}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  91%|█████████ | 91/100 [06:37<00:45,  5.03s/it]

[I 2025-11-06 17:04:28,587] Trial 90 finished with value: 0.5738225946258307 and parameters: {'learning_rate': 0.017190483634379375, 'depth': 7, 'l2_leaf_reg': 1.41137206921232, 'subsample': 0.5, 'random_strength': 3.2642466574240835e-07, 'bagging_temperature': 0.5630948396220219, 'border_count': 32, 'scale_pos_weight': 3.893477422868786}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  92%|█████████▏| 92/100 [06:43<00:42,  5.27s/it]

[I 2025-11-06 17:04:34,439] Trial 91 finished with value: 0.5982905982905983 and parameters: {'learning_rate': 0.01315122029558342, 'depth': 4, 'l2_leaf_reg': 0.7057719132185142, 'subsample': 0.5, 'random_strength': 1.555882029596915e-07, 'bagging_temperature': 0.34581787854420326, 'border_count': 46, 'scale_pos_weight': 2.275966818352217}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  93%|█████████▎| 93/100 [06:49<00:37,  5.30s/it]

[I 2025-11-06 17:04:39,810] Trial 92 finished with value: 0.5987345987345988 and parameters: {'learning_rate': 0.01105572715832555, 'depth': 4, 'l2_leaf_reg': 0.4563526030383827, 'subsample': 0.5, 'random_strength': 1.9247361977969041e-07, 'bagging_temperature': 0.4466413347874969, 'border_count': 47, 'scale_pos_weight': 2.444558952717898}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  94%|█████████▍| 94/100 [06:54<00:32,  5.35s/it]

[I 2025-11-06 17:04:45,277] Trial 93 finished with value: 0.5985939069300301 and parameters: {'learning_rate': 0.010723783245685469, 'depth': 4, 'l2_leaf_reg': 0.3114754212251896, 'subsample': 0.5, 'random_strength': 1.5874555149340564e-07, 'bagging_temperature': 0.44259125039031405, 'border_count': 62, 'scale_pos_weight': 2.427889832484932}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  95%|█████████▌| 95/100 [06:59<00:26,  5.37s/it]

[I 2025-11-06 17:04:50,688] Trial 94 finished with value: 0.5973201692524682 and parameters: {'learning_rate': 0.010654225938400416, 'depth': 4, 'l2_leaf_reg': 0.33361691149246653, 'subsample': 0.5, 'random_strength': 2.5110156473312885e-07, 'bagging_temperature': 0.3663815365425751, 'border_count': 63, 'scale_pos_weight': 2.099426168208427}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  96%|█████████▌| 96/100 [07:04<00:20,  5.10s/it]

[I 2025-11-06 17:04:55,143] Trial 95 finished with value: 0.5869646182495345 and parameters: {'learning_rate': 0.011381390360939825, 'depth': 3, 'l2_leaf_reg': 0.4431052380860711, 'subsample': 0.5, 'random_strength': 9.733213495073932e-08, 'bagging_temperature': 0.4169589774764338, 'border_count': 85, 'scale_pos_weight': 1.8544539118974175}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  97%|█████████▋| 97/100 [07:10<00:15,  5.32s/it]

[I 2025-11-06 17:05:00,985] Trial 96 finished with value: 0.5973763874873865 and parameters: {'learning_rate': 0.010027019009276094, 'depth': 4, 'l2_leaf_reg': 0.295748693625921, 'subsample': 0.6, 'random_strength': 1.3362540008683458e-06, 'bagging_temperature': 0.44868350333935125, 'border_count': 216, 'scale_pos_weight': 2.3744559204081246}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  98%|█████████▊| 98/100 [07:15<00:10,  5.38s/it]

[I 2025-11-06 17:05:06,499] Trial 97 finished with value: 0.5742071881606765 and parameters: {'learning_rate': 0.012264028159747732, 'depth': 4, 'l2_leaf_reg': 0.5550924020187662, 'subsample': 0.5, 'random_strength': 2.119264666311847e-07, 'bagging_temperature': 0.28835765472267233, 'border_count': 71, 'scale_pos_weight': 1.342389144704084}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235:  99%|█████████▉| 99/100 [07:17<00:04,  4.34s/it]

[I 2025-11-06 17:05:08,429] Trial 98 finished with value: 0.5861533685190935 and parameters: {'learning_rate': 0.04681516109809052, 'depth': 3, 'l2_leaf_reg': 0.15299523549436217, 'subsample': 0.5, 'random_strength': 6.57760881977799e-07, 'bagging_temperature': 0.390712462016847, 'border_count': 144, 'scale_pos_weight': 3.031318445117751}. Best is trial 39 with value: 0.6012354152367879.


Best trial: 39. Best value: 0.601235: 100%|██████████| 100/100 [07:19<00:00,  4.39s/it]

[I 2025-11-06 17:05:10,233] Trial 99 finished with value: 0.5823475887170154 and parameters: {'learning_rate': 0.060554361765801236, 'depth': 4, 'l2_leaf_reg': 0.11215411327731291, 'subsample': 0.6, 'random_strength': 3.05196111156036e-08, 'bagging_temperature': 0.49983259116576406, 'border_count': 151, 'scale_pos_weight': 3.3112751272468466}. Best is trial 39 with value: 0.6012354152367879.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6012
  Best Hyperparameters:
    learning_rate: 0.012329265214293108
    depth: 4
    l2_leaf_reg: 1.3367110797592299
    subsample: 0.5
    random_strength: 1.803563573083937e-07
    bagging_temperature: 0.40991948833808883
    border_count: 54
    scale_pos_weight: 2.263536445918072





In [26]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 1000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 50 # Keep early stopping
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.012329265214293108, 'depth': 4, 'l2_leaf_reg': 1.3367110797592299, 'subsample': 0.5, 'random_strength': 1.803563573083937e-07, 'bagging_temperature': 0.40991948833808883, 'border_count': 54, 'scale_pos_weight': 2.263536445918072}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.6012

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.90      0.81      0.85      4165
   Class 1.0       0.52      0.71      0.60      1235

    accuracy                           0.78      5400
   macro avg       0.71      0.76      0.73      5400
weighted avg       0.82      0.78      0.80      5400



In [27]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [28]:
prediction.to_csv("results/catboost_2_prediction.csv", index=False)

In [29]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 136

Top 10 Most Important Features:
                        feature  importance
44              liab_x_multicar   11.261667
135  recovery_feasibility_score    9.755106
58     is_multi_vehicle_unclear    4.774472
53               liab_prct_sqrt    4.758176
51            liab_prct_squared    4.595245
55                 liab_inverse    4.462763
13                    liab_prct    4.366832
45         liab_x_highrisk_site    4.164962
54                liab_prct_log    3.882838
50          multicar_x_highrisk    3.744198

Bottom 10 Least Important Features:
                   feature  importance
87          luxury_vehicle         0.0
89         economy_vehicle         0.0
91           light_vehicle         0.0
94            high_mileage         0.0
59           is_single_car         0.0
100  very_frequent_claimer         0.0
102          medium_payout         0.0
103           small_payout         0.0
104      very_large_payout         0.0
