In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier, Pool, EFeaturesSelectionAlgorithm
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from f1_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor()

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting CatBoost Preprocessor (stateless)...
Fit complete.
Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.
Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.


In [8]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [9]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
CAT_FEATURES = pre.cat_features_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']


In [13]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 66
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [14]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-12 11:00:08,614] A new study created in memory with name: no-name-b0c0ec35-726a-4053-9adc-c80394d0fac3



2. Starting Optuna study...


Best trial: 0. Best value: 0.508103:   1%|          | 1/100 [00:02<03:46,  2.28s/it]

[I 2025-11-12 11:00:10,897] Trial 0 finished with value: 0.5081033727551467 and parameters: {'learning_rate': 0.027766732918131434, 'depth': 5, 'l2_leaf_reg': 0.8660510278387105, 'subsample': 0.6, 'random_strength': 4.1368678509374466e-05, 'bagging_temperature': 0.5226685219562379, 'border_count': 210, 'scale_pos_weight': 9.96930389019623}. Best is trial 0 with value: 0.5081033727551467.


Best trial: 1. Best value: 0.559906:   2%|▏         | 2/100 [00:04<03:54,  2.40s/it]

[I 2025-11-12 11:00:13,371] Trial 1 finished with value: 0.5599060297572436 and parameters: {'learning_rate': 0.026953927726262754, 'depth': 7, 'l2_leaf_reg': 0.0036324691972938906, 'subsample': 0.6, 'random_strength': 1.070020057175014e-05, 'bagging_temperature': 0.6229738942827687, 'border_count': 127, 'scale_pos_weight': 1.7966722831522886}. Best is trial 1 with value: 0.5599060297572436.


Best trial: 2. Best value: 0.571597:   3%|▎         | 3/100 [00:07<03:53,  2.41s/it]

[I 2025-11-12 11:00:15,802] Trial 2 finished with value: 0.5715969357690042 and parameters: {'learning_rate': 0.028823699599070024, 'depth': 4, 'l2_leaf_reg': 0.016762576827752517, 'subsample': 1.0, 'random_strength': 0.9144763014871734, 'bagging_temperature': 0.07480806786734195, 'border_count': 188, 'scale_pos_weight': 3.4828736347290823}. Best is trial 2 with value: 0.5715969357690042.


Best trial: 2. Best value: 0.571597:   4%|▍         | 4/100 [00:09<03:33,  2.22s/it]

[I 2025-11-12 11:00:17,731] Trial 3 finished with value: 0.5295383879454676 and parameters: {'learning_rate': 0.03880841065215588, 'depth': 6, 'l2_leaf_reg': 4.861820098393965, 'subsample': 0.6, 'random_strength': 0.000535133608711793, 'bagging_temperature': 0.4645072087935377, 'border_count': 202, 'scale_pos_weight': 7.043262684118848}. Best is trial 2 with value: 0.5715969357690042.


Best trial: 4. Best value: 0.581457:   5%|▌         | 5/100 [00:16<06:21,  4.01s/it]

[I 2025-11-12 11:00:24,922] Trial 4 finished with value: 0.5814569536423841 and parameters: {'learning_rate': 0.012971955585257275, 'depth': 9, 'l2_leaf_reg': 0.8191005335992144, 'subsample': 0.9, 'random_strength': 6.440073568664852e-05, 'bagging_temperature': 0.7511968870543737, 'border_count': 53, 'scale_pos_weight': 2.86306626085406}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:   6%|▌         | 6/100 [00:21<07:00,  4.48s/it]

[I 2025-11-12 11:00:30,301] Trial 5 finished with value: 0.5663650075414781 and parameters: {'learning_rate': 0.05479733777849365, 'depth': 10, 'l2_leaf_reg': 3.9240030576850193, 'subsample': 0.7, 'random_strength': 1.6362282006217336e-08, 'bagging_temperature': 0.8486378640372164, 'border_count': 224, 'scale_pos_weight': 2.038782706481536}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:   7%|▋         | 7/100 [00:23<05:32,  3.58s/it]

[I 2025-11-12 11:00:32,026] Trial 6 finished with value: 0.5347567030784508 and parameters: {'learning_rate': 0.22761526040530874, 'depth': 8, 'l2_leaf_reg': 7.544635456713555, 'subsample': 1.0, 'random_strength': 0.10793330308506735, 'bagging_temperature': 0.5181999424113368, 'border_count': 189, 'scale_pos_weight': 6.221466092022323}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:   8%|▊         | 8/100 [00:30<07:05,  4.63s/it]

[I 2025-11-12 11:00:38,908] Trial 7 finished with value: 0.5505263157894736 and parameters: {'learning_rate': 0.0176762520042349, 'depth': 10, 'l2_leaf_reg': 1.9336558366181764, 'subsample': 0.6, 'random_strength': 0.018639767245861343, 'bagging_temperature': 0.11660896487726147, 'border_count': 42, 'scale_pos_weight': 6.2075693824670175}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:   9%|▉         | 9/100 [00:31<05:15,  3.47s/it]

[I 2025-11-12 11:00:39,832] Trial 8 finished with value: 0.5328222328701485 and parameters: {'learning_rate': 0.09301866608381752, 'depth': 3, 'l2_leaf_reg': 4.777077636227768, 'subsample': 0.8, 'random_strength': 0.0023852307752262403, 'bagging_temperature': 0.6110904818938545, 'border_count': 113, 'scale_pos_weight': 6.443098840457064}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  10%|█         | 10/100 [00:34<05:11,  3.46s/it]

[I 2025-11-12 11:00:43,280] Trial 9 finished with value: 0.5058823529411764 and parameters: {'learning_rate': 0.014517696460813044, 'depth': 5, 'l2_leaf_reg': 0.00548036425544693, 'subsample': 0.9, 'random_strength': 3.567422943032563e-08, 'bagging_temperature': 0.7611184237671953, 'border_count': 222, 'scale_pos_weight': 9.959869893911126}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  11%|█         | 11/100 [00:41<06:28,  4.36s/it]

[I 2025-11-12 11:00:49,683] Trial 10 finished with value: 0.5617977528089888 and parameters: {'learning_rate': 0.010058347769010925, 'depth': 8, 'l2_leaf_reg': 0.2278094473769148, 'subsample': 0.8, 'random_strength': 1.4248715390383394e-06, 'bagging_temperature': 0.9513328137877849, 'border_count': 32, 'scale_pos_weight': 4.385265520083826}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  12%|█▏        | 12/100 [00:42<05:02,  3.44s/it]

[I 2025-11-12 11:00:51,013] Trial 11 finished with value: 0.5618672665916761 and parameters: {'learning_rate': 0.06713377842217921, 'depth': 3, 'l2_leaf_reg': 0.030808688053061233, 'subsample': 1.0, 'random_strength': 0.9180633637136155, 'bagging_temperature': 0.017339685868463788, 'border_count': 86, 'scale_pos_weight': 3.8852327809599116}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  13%|█▎        | 13/100 [00:45<04:37,  3.19s/it]

[I 2025-11-12 11:00:53,639] Trial 12 finished with value: 0.5646123260437376 and parameters: {'learning_rate': 0.017873581379971883, 'depth': 4, 'l2_leaf_reg': 0.03987542853978633, 'subsample': 0.9, 'random_strength': 2.393014137312872e-06, 'bagging_temperature': 0.2609356927779447, 'border_count': 165, 'scale_pos_weight': 3.745560644047871}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  14%|█▍        | 14/100 [00:51<06:06,  4.26s/it]

[I 2025-11-12 11:01:00,350] Trial 13 finished with value: 0.5773610180843939 and parameters: {'learning_rate': 0.011771783304959054, 'depth': 9, 'l2_leaf_reg': 0.25130932573856996, 'subsample': 0.9, 'random_strength': 0.000686546454107293, 'bagging_temperature': 0.3132673013022193, 'border_count': 253, 'scale_pos_weight': 2.7725690059039327}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  15%|█▌        | 15/100 [00:59<07:42,  5.44s/it]

[I 2025-11-12 11:01:08,539] Trial 14 finished with value: 0.48950381679389315 and parameters: {'learning_rate': 0.010052870778556081, 'depth': 9, 'l2_leaf_reg': 0.3071847388911201, 'subsample': 0.9, 'random_strength': 0.0004967138816487444, 'bagging_temperature': 0.31916353232856115, 'border_count': 251, 'scale_pos_weight': 1.142356268291512}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  16%|█▌        | 16/100 [01:02<06:34,  4.70s/it]

[I 2025-11-12 11:01:11,504] Trial 15 finished with value: 0.567193675889328 and parameters: {'learning_rate': 0.14079036747395215, 'depth': 9, 'l2_leaf_reg': 0.5383311241426433, 'subsample': 0.8, 'random_strength': 0.012091823406616093, 'bagging_temperature': 0.2647365092965725, 'border_count': 77, 'scale_pos_weight': 2.6535261965925203}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  17%|█▋        | 17/100 [01:07<06:28,  4.68s/it]

[I 2025-11-12 11:01:16,141] Trial 16 finished with value: 0.5558847293293833 and parameters: {'learning_rate': 0.014661671700881252, 'depth': 8, 'l2_leaf_reg': 0.10664834653264016, 'subsample': 0.9, 'random_strength': 0.0001490084623026656, 'bagging_temperature': 0.7562937484505758, 'border_count': 149, 'scale_pos_weight': 5.055970355935258}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  18%|█▊        | 18/100 [01:11<06:02,  4.42s/it]

[I 2025-11-12 11:01:19,976] Trial 17 finished with value: 0.5311203319502075 and parameters: {'learning_rate': 0.036247219300833924, 'depth': 9, 'l2_leaf_reg': 1.2755855701510466, 'subsample': 0.7, 'random_strength': 2.7332903892146663e-07, 'bagging_temperature': 0.3420441379250497, 'border_count': 70, 'scale_pos_weight': 7.980932907090008}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  19%|█▉        | 19/100 [01:14<05:26,  4.04s/it]

[I 2025-11-12 11:01:23,105] Trial 18 finished with value: 0.5727878211227403 and parameters: {'learning_rate': 0.02043925361984439, 'depth': 7, 'l2_leaf_reg': 0.12575552641524917, 'subsample': 0.5, 'random_strength': 3.175248792065963e-05, 'bagging_temperature': 0.40113874654502035, 'border_count': 250, 'scale_pos_weight': 2.9625685962716717}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  20%|██        | 20/100 [01:24<07:47,  5.84s/it]

[I 2025-11-12 11:01:33,149] Trial 19 finished with value: 0.4704112337011033 and parameters: {'learning_rate': 0.011935365388496384, 'depth': 10, 'l2_leaf_reg': 0.33714461594391254, 'subsample': 0.9, 'random_strength': 0.0020949501159023285, 'bagging_temperature': 0.719642717845427, 'border_count': 105, 'scale_pos_weight': 1.0239588150147694}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  21%|██        | 21/100 [01:27<06:22,  4.84s/it]

[I 2025-11-12 11:01:35,675] Trial 20 finished with value: 0.540497617787189 and parameters: {'learning_rate': 0.2783813394776174, 'depth': 9, 'l2_leaf_reg': 0.010789243153641909, 'subsample': 1.0, 'random_strength': 0.00023656421140796514, 'bagging_temperature': 0.15582274083406503, 'border_count': 164, 'scale_pos_weight': 4.878502036402095}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  22%|██▏       | 22/100 [01:29<05:33,  4.27s/it]

[I 2025-11-12 11:01:38,610] Trial 21 finished with value: 0.5760262725779968 and parameters: {'learning_rate': 0.020508101425155755, 'depth': 7, 'l2_leaf_reg': 0.09085788193169973, 'subsample': 0.5, 'random_strength': 1.788450846860915e-05, 'bagging_temperature': 0.4083423539648094, 'border_count': 254, 'scale_pos_weight': 2.7300476154432562}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  23%|██▎       | 23/100 [01:32<04:56,  3.85s/it]

[I 2025-11-12 11:01:41,492] Trial 22 finished with value: 0.5800135043889264 and parameters: {'learning_rate': 0.021536769637996365, 'depth': 7, 'l2_leaf_reg': 0.001232512061643844, 'subsample': 0.5, 'random_strength': 7.379983966475795e-06, 'bagging_temperature': 0.21093119342500222, 'border_count': 237, 'scale_pos_weight': 2.5654352027555833}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  24%|██▍       | 24/100 [01:36<04:38,  3.67s/it]

[I 2025-11-12 11:01:44,726] Trial 23 finished with value: 0.5714285714285714 and parameters: {'learning_rate': 0.014613911799427188, 'depth': 6, 'l2_leaf_reg': 0.0011956024500071058, 'subsample': 0.7, 'random_strength': 4.328253949534746e-06, 'bagging_temperature': 0.1777278784642351, 'border_count': 231, 'scale_pos_weight': 2.0281095982315986}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  25%|██▌       | 25/100 [01:40<04:58,  3.98s/it]

[I 2025-11-12 11:01:49,445] Trial 24 finished with value: 0.573236317621006 and parameters: {'learning_rate': 0.012814655123288053, 'depth': 8, 'l2_leaf_reg': 0.0012503569713166302, 'subsample': 0.8, 'random_strength': 3.2179314197442134e-07, 'bagging_temperature': 0.21749022066953183, 'border_count': 54, 'scale_pos_weight': 3.2143047380937766}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  26%|██▌       | 26/100 [01:46<05:23,  4.37s/it]

[I 2025-11-12 11:01:54,708] Trial 25 finished with value: 0.5614035087719298 and parameters: {'learning_rate': 0.023281412350687054, 'depth': 9, 'l2_leaf_reg': 2.0727708550455994, 'subsample': 0.5, 'random_strength': 0.0017298235544740746, 'bagging_temperature': 0.9923728638604692, 'border_count': 236, 'scale_pos_weight': 4.417261029334176}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  27%|██▋       | 27/100 [01:48<04:43,  3.88s/it]

[I 2025-11-12 11:01:57,446] Trial 26 finished with value: 0.5674617301530794 and parameters: {'learning_rate': 0.03720219838661256, 'depth': 8, 'l2_leaf_reg': 0.06356991671014885, 'subsample': 0.9, 'random_strength': 7.643706319281155e-05, 'bagging_temperature': 0.6372665739854961, 'border_count': 180, 'scale_pos_weight': 2.2833537589395343}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  28%|██▊       | 28/100 [01:57<06:24,  5.34s/it]

[I 2025-11-12 11:02:06,182] Trial 27 finished with value: 0.5295367615809605 and parameters: {'learning_rate': 0.01670971648649354, 'depth': 10, 'l2_leaf_reg': 0.8220806695461541, 'subsample': 0.8, 'random_strength': 7.722344059012395e-06, 'bagging_temperature': 0.8988870488936426, 'border_count': 142, 'scale_pos_weight': 1.5612345419936564}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  29%|██▉       | 29/100 [02:03<06:21,  5.38s/it]

[I 2025-11-12 11:02:11,649] Trial 28 finished with value: 0.5616515837104072 and parameters: {'learning_rate': 0.012333421750060546, 'depth': 6, 'l2_leaf_reg': 0.20812954664446412, 'subsample': 0.7, 'random_strength': 5.700182419436973e-07, 'bagging_temperature': 0.3484503348101505, 'border_count': 209, 'scale_pos_weight': 4.04925313963942}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 4. Best value: 0.581457:  30%|███       | 30/100 [02:05<05:16,  4.52s/it]

[I 2025-11-12 11:02:14,160] Trial 29 finished with value: 0.5490600051506567 and parameters: {'learning_rate': 0.03037486648686876, 'depth': 7, 'l2_leaf_reg': 0.4994986776293382, 'subsample': 1.0, 'random_strength': 8.035482618151906e-05, 'bagging_temperature': 0.4510386500277017, 'border_count': 95, 'scale_pos_weight': 5.431966786457228}. Best is trial 4 with value: 0.5814569536423841.


Best trial: 30. Best value: 0.584018:  31%|███       | 31/100 [02:07<04:24,  3.84s/it]

[I 2025-11-12 11:02:16,409] Trial 30 finished with value: 0.5840184149950675 and parameters: {'learning_rate': 0.0478728874729642, 'depth': 5, 'l2_leaf_reg': 0.7315853862602, 'subsample': 0.9, 'random_strength': 0.007454044632138896, 'bagging_temperature': 0.5821736756158232, 'border_count': 206, 'scale_pos_weight': 2.692029580837999}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  32%|███▏      | 32/100 [02:08<03:26,  3.03s/it]

[I 2025-11-12 11:02:17,566] Trial 31 finished with value: 0.583883751651255 and parameters: {'learning_rate': 0.0862301124579979, 'depth': 5, 'l2_leaf_reg': 0.8564061166379715, 'subsample': 0.9, 'random_strength': 0.0430447136532108, 'bagging_temperature': 0.572887353343354, 'border_count': 237, 'scale_pos_weight': 2.6047836715962105}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  33%|███▎      | 33/100 [02:10<02:43,  2.44s/it]

[I 2025-11-12 11:02:18,640] Trial 32 finished with value: 0.5820433436532507 and parameters: {'learning_rate': 0.10214437532105418, 'depth': 5, 'l2_leaf_reg': 1.1195249398919658, 'subsample': 0.9, 'random_strength': 0.08017987568585355, 'bagging_temperature': 0.5695841095921812, 'border_count': 211, 'scale_pos_weight': 2.342869987619905}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  34%|███▍      | 34/100 [02:11<02:18,  2.10s/it]

[I 2025-11-12 11:02:19,931] Trial 33 finished with value: 0.563391442155309 and parameters: {'learning_rate': 0.10040861435353635, 'depth': 5, 'l2_leaf_reg': 1.0184759344757996, 'subsample': 0.9, 'random_strength': 0.1774836104308883, 'bagging_temperature': 0.5645219150018097, 'border_count': 205, 'scale_pos_weight': 1.6457061429396809}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  35%|███▌      | 35/100 [02:12<01:54,  1.76s/it]

[I 2025-11-12 11:02:20,890] Trial 34 finished with value: 0.5708333333333333 and parameters: {'learning_rate': 0.1337524850393061, 'depth': 4, 'l2_leaf_reg': 1.8961086164872858, 'subsample': 1.0, 'random_strength': 0.017373398704142017, 'bagging_temperature': 0.6825672158887157, 'border_count': 189, 'scale_pos_weight': 3.3162506089764205}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  36%|███▌      | 36/100 [02:13<01:45,  1.65s/it]

[I 2025-11-12 11:02:22,303] Trial 35 finished with value: 0.567978184651344 and parameters: {'learning_rate': 0.060382225883774863, 'depth': 5, 'l2_leaf_reg': 2.914925754440419, 'subsample': 0.8, 'random_strength': 0.1533057797972427, 'bagging_temperature': 0.5653427702025383, 'border_count': 217, 'scale_pos_weight': 1.7128946022153306}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  37%|███▋      | 37/100 [02:15<01:40,  1.59s/it]

[I 2025-11-12 11:02:23,737] Trial 36 finished with value: 0.5700627427547057 and parameters: {'learning_rate': 0.0805013822081479, 'depth': 4, 'l2_leaf_reg': 0.5729801515173153, 'subsample': 0.9, 'random_strength': 0.05244678616063618, 'bagging_temperature': 0.8044096294684886, 'border_count': 131, 'scale_pos_weight': 3.304816057641028}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  38%|███▊      | 38/100 [02:16<01:33,  1.51s/it]

[I 2025-11-12 11:02:25,050] Trial 37 finished with value: 0.5784418356456777 and parameters: {'learning_rate': 0.1195734320577158, 'depth': 6, 'l2_leaf_reg': 8.46901768228374, 'subsample': 1.0, 'random_strength': 0.007011473240239848, 'bagging_temperature': 0.6546954448055423, 'border_count': 197, 'scale_pos_weight': 2.16252905859281}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  39%|███▉      | 39/100 [02:17<01:21,  1.33s/it]

[I 2025-11-12 11:02:25,978] Trial 38 finished with value: 0.5177548682703322 and parameters: {'learning_rate': 0.16767900916337677, 'depth': 5, 'l2_leaf_reg': 1.254686342380059, 'subsample': 0.8, 'random_strength': 0.30617811499665676, 'bagging_temperature': 0.5700577694951412, 'border_count': 172, 'scale_pos_weight': 7.899465032586088}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  40%|████      | 40/100 [02:18<01:24,  1.41s/it]

[I 2025-11-12 11:02:27,572] Trial 39 finished with value: 0.5450662110209312 and parameters: {'learning_rate': 0.050117225551955816, 'depth': 6, 'l2_leaf_reg': 3.2204152965552946, 'subsample': 0.9, 'random_strength': 0.04307001654954489, 'bagging_temperature': 0.4806803599205734, 'border_count': 218, 'scale_pos_weight': 1.4199553926786388}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  41%|████      | 41/100 [02:20<01:31,  1.55s/it]

[I 2025-11-12 11:02:29,460] Trial 40 finished with value: 0.5675990675990676 and parameters: {'learning_rate': 0.047175178739079573, 'depth': 5, 'l2_leaf_reg': 0.8407770240190044, 'subsample': 1.0, 'random_strength': 0.006989709165362071, 'bagging_temperature': 0.8231745734105325, 'border_count': 197, 'scale_pos_weight': 3.645800191739496}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 30. Best value: 0.584018:  42%|████▏     | 42/100 [02:22<01:34,  1.63s/it]

[I 2025-11-12 11:02:31,271] Trial 41 finished with value: 0.5780619111709286 and parameters: {'learning_rate': 0.07219199904431588, 'depth': 4, 'l2_leaf_reg': 0.003995331694500653, 'subsample': 0.6, 'random_strength': 0.3249058964618848, 'bagging_temperature': 0.532906417250395, 'border_count': 237, 'scale_pos_weight': 2.532945889754032}. Best is trial 30 with value: 0.5840184149950675.


Best trial: 42. Best value: 0.58562:  43%|████▎     | 43/100 [02:23<01:24,  1.48s/it] 

[I 2025-11-12 11:02:32,388] Trial 42 finished with value: 0.5856200069468566 and parameters: {'learning_rate': 0.09325715313438826, 'depth': 5, 'l2_leaf_reg': 0.0021410243996902215, 'subsample': 0.9, 'random_strength': 0.04848483602713997, 'bagging_temperature': 0.6979998048549101, 'border_count': 239, 'scale_pos_weight': 2.2902998251039337}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  44%|████▍     | 44/100 [02:25<01:21,  1.46s/it]

[I 2025-11-12 11:02:33,822] Trial 43 finished with value: 0.5809217577706324 and parameters: {'learning_rate': 0.09737144033917804, 'depth': 5, 'l2_leaf_reg': 0.16730363728578945, 'subsample': 0.9, 'random_strength': 0.031321014107963036, 'bagging_temperature': 0.6910291204135213, 'border_count': 225, 'scale_pos_weight': 2.1677966447663772}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  45%|████▌     | 45/100 [02:26<01:20,  1.47s/it]

[I 2025-11-12 11:02:35,306] Trial 44 finished with value: 0.5613259668508287 and parameters: {'learning_rate': 0.08041151326393053, 'depth': 4, 'l2_leaf_reg': 5.703321803321573, 'subsample': 0.9, 'random_strength': 0.0833666762908296, 'bagging_temperature': 0.6096662518431839, 'border_count': 242, 'scale_pos_weight': 4.167390956624523}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  46%|████▌     | 46/100 [02:27<01:12,  1.34s/it]

[I 2025-11-12 11:02:36,336] Trial 45 finished with value: 0.5760736196319018 and parameters: {'learning_rate': 0.11678190310957577, 'depth': 3, 'l2_leaf_reg': 0.40134725763634005, 'subsample': 1.0, 'random_strength': 0.0044522680214776275, 'bagging_temperature': 0.7495480406008461, 'border_count': 211, 'scale_pos_weight': 3.06726055373093}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  47%|████▋     | 47/100 [02:28<01:07,  1.27s/it]

[I 2025-11-12 11:02:37,438] Trial 46 finished with value: 0.5682819383259912 and parameters: {'learning_rate': 0.15300792751911993, 'depth': 5, 'l2_leaf_reg': 1.7945804418693347, 'subsample': 0.8, 'random_strength': 0.47115699382046683, 'bagging_temperature': 0.8801099016335645, 'border_count': 224, 'scale_pos_weight': 1.9772536892283818}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  48%|████▊     | 48/100 [02:30<01:06,  1.27s/it]

[I 2025-11-12 11:02:38,716] Trial 47 finished with value: 0.540587219343696 and parameters: {'learning_rate': 0.21304400410214733, 'depth': 6, 'l2_leaf_reg': 0.6937111184915101, 'subsample': 0.9, 'random_strength': 0.0009885952397108591, 'bagging_temperature': 0.5143929610942086, 'border_count': 184, 'scale_pos_weight': 1.3616124410855794}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  49%|████▉     | 49/100 [02:31<01:01,  1.20s/it]

[I 2025-11-12 11:02:39,740] Trial 48 finished with value: 0.5668881826061832 and parameters: {'learning_rate': 0.19363993474464486, 'depth': 5, 'l2_leaf_reg': 0.013184590852504969, 'subsample': 0.9, 'random_strength': 0.02771693941231013, 'bagging_temperature': 0.5881717287309911, 'border_count': 121, 'scale_pos_weight': 3.5445526441192263}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  50%|█████     | 50/100 [02:32<01:07,  1.35s/it]

[I 2025-11-12 11:02:41,434] Trial 49 finished with value: 0.5815602836879432 and parameters: {'learning_rate': 0.043598813930471214, 'depth': 3, 'l2_leaf_reg': 0.029287538248215214, 'subsample': 0.8, 'random_strength': 0.0914992970065419, 'bagging_temperature': 0.6609335829383794, 'border_count': 246, 'scale_pos_weight': 2.4169252815445783}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  51%|█████     | 51/100 [02:35<01:19,  1.63s/it]

[I 2025-11-12 11:02:43,727] Trial 50 finished with value: 0.5074561403508772 and parameters: {'learning_rate': 0.045873036994688886, 'depth': 3, 'l2_leaf_reg': 0.028720522554619942, 'subsample': 0.8, 'random_strength': 0.8625078761638599, 'bagging_temperature': 0.6755637126142379, 'border_count': 244, 'scale_pos_weight': 9.530083108684767}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  52%|█████▏    | 52/100 [02:36<01:17,  1.61s/it]

[I 2025-11-12 11:02:45,278] Trial 51 finished with value: 0.5778197857592943 and parameters: {'learning_rate': 0.059555407415948385, 'depth': 4, 'l2_leaf_reg': 0.0076832166120577436, 'subsample': 0.9, 'random_strength': 0.05510222958553215, 'bagging_temperature': 0.7137617868361491, 'border_count': 228, 'scale_pos_weight': 2.926075445089256}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  53%|█████▎    | 53/100 [02:37<01:07,  1.43s/it]

[I 2025-11-12 11:02:46,307] Trial 52 finished with value: 0.5773333333333334 and parameters: {'learning_rate': 0.08591265040803973, 'depth': 4, 'l2_leaf_reg': 0.0023259390906925155, 'subsample': 0.8, 'random_strength': 0.1245224876556488, 'bagging_temperature': 0.6304322089046329, 'border_count': 244, 'scale_pos_weight': 2.502485095520066}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  54%|█████▍    | 54/100 [02:39<01:07,  1.46s/it]

[I 2025-11-12 11:02:47,818] Trial 53 finished with value: 0.5826771653543307 and parameters: {'learning_rate': 0.06486737419599757, 'depth': 3, 'l2_leaf_reg': 0.052550738793371195, 'subsample': 0.9, 'random_strength': 0.01079797865124095, 'bagging_temperature': 0.7875156831141187, 'border_count': 215, 'scale_pos_weight': 2.3326832682314818}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  55%|█████▌    | 55/100 [02:40<01:01,  1.36s/it]

[I 2025-11-12 11:02:48,968] Trial 54 finished with value: 0.5670498084291188 and parameters: {'learning_rate': 0.0691294655238064, 'depth': 3, 'l2_leaf_reg': 0.05277062027424726, 'subsample': 0.9, 'random_strength': 0.010929386801777597, 'bagging_temperature': 0.797033877983243, 'border_count': 215, 'scale_pos_weight': 1.770416592157421}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  56%|█████▌    | 56/100 [02:41<00:58,  1.34s/it]

[I 2025-11-12 11:02:50,246] Trial 55 finished with value: 0.5811965811965812 and parameters: {'learning_rate': 0.04099595147780778, 'depth': 3, 'l2_leaf_reg': 0.07735058465810993, 'subsample': 0.8, 'random_strength': 0.0037194963071816188, 'bagging_temperature': 0.5429291413444454, 'border_count': 199, 'scale_pos_weight': 2.3388650731890235}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  57%|█████▋    | 57/100 [02:43<00:58,  1.36s/it]

[I 2025-11-12 11:02:51,645] Trial 56 finished with value: 0.5550847457627118 and parameters: {'learning_rate': 0.03215239149677963, 'depth': 3, 'l2_leaf_reg': 0.024027631135852174, 'subsample': 0.9, 'random_strength': 0.019167860877638266, 'bagging_temperature': 0.4467179567724368, 'border_count': 231, 'scale_pos_weight': 4.535768849264893}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  58%|█████▊    | 58/100 [02:44<01:03,  1.52s/it]

[I 2025-11-12 11:02:53,538] Trial 57 finished with value: 0.5387613685578172 and parameters: {'learning_rate': 0.061254611173433386, 'depth': 4, 'l2_leaf_reg': 0.021183072402214252, 'subsample': 1.0, 'random_strength': 0.2511842504925351, 'bagging_temperature': 0.7292428958848384, 'border_count': 254, 'scale_pos_weight': 1.3565981283304054}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  59%|█████▉    | 59/100 [02:46<01:03,  1.54s/it]

[I 2025-11-12 11:02:55,123] Trial 58 finished with value: 0.5661345496009123 and parameters: {'learning_rate': 0.05429150462905155, 'depth': 5, 'l2_leaf_reg': 0.044804120754564566, 'subsample': 0.7, 'random_strength': 0.056062877538284026, 'bagging_temperature': 0.7654100384012117, 'border_count': 242, 'scale_pos_weight': 3.7816246203132535}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  60%|██████    | 60/100 [02:47<00:55,  1.38s/it]

[I 2025-11-12 11:02:56,129] Trial 59 finished with value: 0.5778339122197663 and parameters: {'learning_rate': 0.10853093509588305, 'depth': 4, 'l2_leaf_reg': 0.0020091757866610954, 'subsample': 0.9, 'random_strength': 0.09513174020393922, 'bagging_temperature': 0.6101827805929605, 'border_count': 206, 'scale_pos_weight': 2.788081959583039}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  61%|██████    | 61/100 [02:49<00:55,  1.42s/it]

[I 2025-11-12 11:02:57,638] Trial 60 finished with value: 0.5658389766741911 and parameters: {'learning_rate': 0.0761656901775209, 'depth': 6, 'l2_leaf_reg': 0.14496570138511236, 'subsample': 0.9, 'random_strength': 0.011063896545354661, 'bagging_temperature': 0.49840761676617673, 'border_count': 191, 'scale_pos_weight': 1.9046361071416058}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  62%|██████▏   | 62/100 [02:51<01:03,  1.67s/it]

[I 2025-11-12 11:02:59,900] Trial 61 finished with value: 0.5768261964735516 and parameters: {'learning_rate': 0.0261282531471605, 'depth': 3, 'l2_leaf_reg': 1.4185859057004249, 'subsample': 0.9, 'random_strength': 2.283760022298601e-05, 'bagging_temperature': 0.8527489986938135, 'border_count': 54, 'scale_pos_weight': 2.9328348915487013}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  63%|██████▎   | 63/100 [02:53<01:03,  1.71s/it]

[I 2025-11-12 11:03:01,715] Trial 62 finished with value: 0.5852991452991453 and parameters: {'learning_rate': 0.04322177688583661, 'depth': 5, 'l2_leaf_reg': 0.008299627223026806, 'subsample': 0.9, 'random_strength': 0.0013185705458777612, 'bagging_temperature': 0.6640870334236839, 'border_count': 231, 'scale_pos_weight': 2.4241335260521675}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  64%|██████▍   | 64/100 [02:54<01:00,  1.68s/it]

[I 2025-11-12 11:03:03,309] Trial 63 finished with value: 0.5836707152496626 and parameters: {'learning_rate': 0.04418426150095648, 'depth': 5, 'l2_leaf_reg': 0.008732642215045624, 'subsample': 0.8, 'random_strength': 0.0015881734930360113, 'bagging_temperature': 0.6593287898023571, 'border_count': 234, 'scale_pos_weight': 2.487255733893481}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  65%|██████▌   | 65/100 [02:55<00:54,  1.55s/it]

[I 2025-11-12 11:03:04,552] Trial 64 finished with value: 0.5659259259259259 and parameters: {'learning_rate': 0.09059438803239159, 'depth': 5, 'l2_leaf_reg': 0.007230499787102085, 'subsample': 0.9, 'random_strength': 0.0003024890364860524, 'bagging_temperature': 0.5981173232299857, 'border_count': 234, 'scale_pos_weight': 3.402554875371629}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  66%|██████▌   | 66/100 [02:57<00:53,  1.58s/it]

[I 2025-11-12 11:03:06,221] Trial 65 finished with value: 0.48007870142646336 and parameters: {'learning_rate': 0.05209967427473193, 'depth': 6, 'l2_leaf_reg': 0.0025300860725997417, 'subsample': 0.9, 'random_strength': 0.002969273748429336, 'bagging_temperature': 0.6368292974665569, 'border_count': 218, 'scale_pos_weight': 1.0735968280283372}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  67%|██████▋   | 67/100 [02:58<00:48,  1.47s/it]

[I 2025-11-12 11:03:07,425] Trial 66 finished with value: 0.5223562810503903 and parameters: {'learning_rate': 0.06649554300252329, 'depth': 5, 'l2_leaf_reg': 0.004218802479019058, 'subsample': 1.0, 'random_strength': 0.0010069742399501556, 'bagging_temperature': 0.7770385764063299, 'border_count': 222, 'scale_pos_weight': 6.812065158129132}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  68%|██████▊   | 68/100 [02:59<00:44,  1.38s/it]

[I 2025-11-12 11:03:08,599] Trial 67 finished with value: 0.56865127582017 and parameters: {'learning_rate': 0.10679812084092476, 'depth': 6, 'l2_leaf_reg': 0.009930934387515843, 'subsample': 0.9, 'random_strength': 0.001474651331814411, 'bagging_temperature': 0.7068250268959753, 'border_count': 211, 'scale_pos_weight': 3.12836459549542}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  69%|██████▉   | 69/100 [03:02<00:49,  1.60s/it]

[I 2025-11-12 11:03:10,709] Trial 68 finished with value: 0.5734011627906976 and parameters: {'learning_rate': 0.033747311127572295, 'depth': 5, 'l2_leaf_reg': 0.014831649408181298, 'subsample': 0.8, 'random_strength': 0.0003948677660361115, 'bagging_temperature': 0.7336137059873253, 'border_count': 228, 'scale_pos_weight': 2.0861520797195694}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  70%|███████   | 70/100 [03:03<00:45,  1.51s/it]

[I 2025-11-12 11:03:12,008] Trial 69 finished with value: 0.535931790499391 and parameters: {'learning_rate': 0.04057901646473234, 'depth': 5, 'l2_leaf_reg': 0.001803461432907977, 'subsample': 1.0, 'random_strength': 0.0070156948284898326, 'bagging_temperature': 0.5518253413607933, 'border_count': 233, 'scale_pos_weight': 5.916250782162611}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  71%|███████   | 71/100 [03:04<00:43,  1.51s/it]

[I 2025-11-12 11:03:13,527] Trial 70 finished with value: 0.5759637188208617 and parameters: {'learning_rate': 0.05715092063055947, 'depth': 6, 'l2_leaf_reg': 0.0029872494151213266, 'subsample': 0.7, 'random_strength': 0.00014690231909803947, 'bagging_temperature': 0.6470212443432689, 'border_count': 176, 'scale_pos_weight': 2.7486384398752386}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  72%|███████▏  | 72/100 [03:07<00:47,  1.71s/it]

[I 2025-11-12 11:03:15,707] Trial 71 finished with value: 0.5806451612903226 and parameters: {'learning_rate': 0.043628556096774934, 'depth': 4, 'l2_leaf_reg': 0.033433313486479405, 'subsample': 0.8, 'random_strength': 0.02812321097056036, 'bagging_temperature': 0.6620447230324602, 'border_count': 250, 'scale_pos_weight': 2.4260319623429294}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  73%|███████▎  | 73/100 [03:09<00:53,  1.98s/it]

[I 2025-11-12 11:03:18,319] Trial 72 finished with value: 0.5768688293370945 and parameters: {'learning_rate': 0.02692244590628077, 'depth': 5, 'l2_leaf_reg': 0.005382265682260118, 'subsample': 0.8, 'random_strength': 0.015492565869255551, 'bagging_temperature': 0.6900583590283318, 'border_count': 245, 'scale_pos_weight': 2.271434668091812}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  74%|███████▍  | 74/100 [03:11<00:50,  1.92s/it]

[I 2025-11-12 11:03:20,106] Trial 73 finished with value: 0.5723981900452488 and parameters: {'learning_rate': 0.035667361837549594, 'depth': 4, 'l2_leaf_reg': 0.062493384677443975, 'subsample': 0.9, 'random_strength': 0.006490119502265842, 'bagging_temperature': 0.6653010150099226, 'border_count': 239, 'scale_pos_weight': 1.848587367591243}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  75%|███████▌  | 75/100 [03:12<00:43,  1.75s/it]

[I 2025-11-12 11:03:21,465] Trial 74 finished with value: 0.581781914893617 and parameters: {'learning_rate': 0.06501750130018974, 'depth': 5, 'l2_leaf_reg': 0.010499492653924495, 'subsample': 0.8, 'random_strength': 0.07396583901355237, 'bagging_temperature': 0.58688350839055, 'border_count': 249, 'scale_pos_weight': 2.589011679942572}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  76%|███████▌  | 76/100 [03:14<00:41,  1.73s/it]

[I 2025-11-12 11:03:23,132] Trial 75 finished with value: 0.5540484997944924 and parameters: {'learning_rate': 0.06446966739581003, 'depth': 5, 'l2_leaf_reg': 0.006344448995285103, 'subsample': 0.9, 'random_strength': 0.0022738431367640335, 'bagging_temperature': 0.58250115346711, 'border_count': 221, 'scale_pos_weight': 1.5325779643381252}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  77%|███████▋  | 77/100 [03:15<00:35,  1.56s/it]

[I 2025-11-12 11:03:24,299] Trial 76 finished with value: 0.5778795811518325 and parameters: {'learning_rate': 0.08744077773186047, 'depth': 6, 'l2_leaf_reg': 0.010824398497807206, 'subsample': 0.8, 'random_strength': 0.03490401599592777, 'bagging_temperature': 0.4893206520697394, 'border_count': 201, 'scale_pos_weight': 2.632170053623153}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  78%|███████▊  | 78/100 [03:17<00:33,  1.52s/it]

[I 2025-11-12 11:03:25,728] Trial 77 finished with value: 0.5751673767498479 and parameters: {'learning_rate': 0.0735276407338059, 'depth': 5, 'l2_leaf_reg': 0.001538151408623597, 'subsample': 0.9, 'random_strength': 0.0007765701968229278, 'bagging_temperature': 0.3990966593405934, 'border_count': 213, 'scale_pos_weight': 3.17961777805674}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  79%|███████▉  | 79/100 [03:18<00:31,  1.48s/it]

[I 2025-11-12 11:03:27,129] Trial 78 finished with value: 0.5754512635379061 and parameters: {'learning_rate': 0.12241046743403936, 'depth': 5, 'l2_leaf_reg': 0.017020144205470922, 'subsample': 0.9, 'random_strength': 5.160230795459705e-08, 'bagging_temperature': 0.5239061897794892, 'border_count': 152, 'scale_pos_weight': 2.1051069798214286}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  80%|████████  | 80/100 [03:20<00:32,  1.61s/it]

[I 2025-11-12 11:03:29,048] Trial 79 finished with value: 0.5777632865992827 and parameters: {'learning_rate': 0.05003083745399597, 'depth': 7, 'l2_leaf_reg': 0.4200260783405358, 'subsample': 0.7, 'random_strength': 0.18969883273565738, 'bagging_temperature': 0.6251956069724772, 'border_count': 235, 'scale_pos_weight': 2.7407828645680627}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  81%|████████  | 81/100 [03:21<00:27,  1.45s/it]

[I 2025-11-12 11:03:30,125] Trial 80 finished with value: 0.5628637951105937 and parameters: {'learning_rate': 0.0972257795663851, 'depth': 4, 'l2_leaf_reg': 1.050646620137806, 'subsample': 0.8, 'random_strength': 0.020679861171059195, 'bagging_temperature': 0.5954921642111928, 'border_count': 254, 'scale_pos_weight': 3.523083497408365}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  82%|████████▏ | 82/100 [03:22<00:26,  1.45s/it]

[I 2025-11-12 11:03:31,552] Trial 81 finished with value: 0.5826028320971005 and parameters: {'learning_rate': 0.04447387427177665, 'depth': 4, 'l2_leaf_reg': 0.01944225517797556, 'subsample': 0.8, 'random_strength': 0.07483668743126877, 'bagging_temperature': 0.705411059858605, 'border_count': 230, 'scale_pos_weight': 2.455922741601924}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  83%|████████▎ | 83/100 [03:24<00:25,  1.49s/it]

[I 2025-11-12 11:03:33,139] Trial 82 finished with value: 0.573170731707317 and parameters: {'learning_rate': 0.04810232813496465, 'depth': 5, 'l2_leaf_reg': 0.019312051992205703, 'subsample': 0.8, 'random_strength': 0.07119347313505131, 'bagging_temperature': 0.8315493735638121, 'border_count': 229, 'scale_pos_weight': 1.8362222766308325}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  84%|████████▍ | 84/100 [03:26<00:26,  1.64s/it]

[I 2025-11-12 11:03:35,119] Trial 83 finished with value: 0.5810671256454389 and parameters: {'learning_rate': 0.05551565909421933, 'depth': 4, 'l2_leaf_reg': 0.004918272598608523, 'subsample': 0.9, 'random_strength': 0.4554503209955878, 'bagging_temperature': 0.7899638094847901, 'border_count': 249, 'scale_pos_weight': 2.344254818931624}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  85%|████████▌ | 85/100 [03:28<00:25,  1.70s/it]

[I 2025-11-12 11:03:36,984] Trial 84 finished with value: 0.5782907049282595 and parameters: {'learning_rate': 0.03899113298202667, 'depth': 5, 'l2_leaf_reg': 0.008896712784021852, 'subsample': 0.8, 'random_strength': 0.010727054811205955, 'bagging_temperature': 0.6997554466489557, 'border_count': 222, 'scale_pos_weight': 2.989387833003314}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  86%|████████▌ | 86/100 [03:29<00:22,  1.58s/it]

[I 2025-11-12 11:03:38,280] Trial 85 finished with value: 0.5789821546596167 and parameters: {'learning_rate': 0.08349131036506911, 'depth': 4, 'l2_leaf_reg': 0.0032057817966746004, 'subsample': 0.9, 'random_strength': 0.03952392918391009, 'bagging_temperature': 0.7443115672242842, 'border_count': 238, 'scale_pos_weight': 2.575361043661037}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  87%|████████▋ | 87/100 [03:31<00:20,  1.59s/it]

[I 2025-11-12 11:03:39,874] Trial 86 finished with value: 0.577825929938606 and parameters: {'learning_rate': 0.06298162253660818, 'depth': 5, 'l2_leaf_reg': 0.038761193603670974, 'subsample': 0.8, 'random_strength': 0.005796246153801195, 'bagging_temperature': 0.5647840904580133, 'border_count': 206, 'scale_pos_weight': 2.085827808983808}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  88%|████████▊ | 88/100 [03:32<00:18,  1.53s/it]

[I 2025-11-12 11:03:41,276] Trial 87 finished with value: 0.5647711511789182 and parameters: {'learning_rate': 0.07444353096900491, 'depth': 5, 'l2_leaf_reg': 2.7654477101127517, 'subsample': 0.6, 'random_strength': 0.14100812870896498, 'bagging_temperature': 0.6264204464057423, 'border_count': 194, 'scale_pos_weight': 3.9639826236022486}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  89%|████████▉ | 89/100 [03:33<00:16,  1.46s/it]

[I 2025-11-12 11:03:42,565] Trial 88 finished with value: 0.526032315978456 and parameters: {'learning_rate': 0.13153179208259339, 'depth': 6, 'l2_leaf_reg': 0.013421769463163782, 'subsample': 0.9, 'random_strength': 0.0013396378761022362, 'bagging_temperature': 0.5411515011589216, 'border_count': 226, 'scale_pos_weight': 1.2639383652479124}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  90%|█████████ | 90/100 [03:36<00:17,  1.75s/it]

[I 2025-11-12 11:03:44,994] Trial 89 finished with value: 0.5687631658140234 and parameters: {'learning_rate': 0.029941589000264723, 'depth': 4, 'l2_leaf_reg': 0.259579446427814, 'subsample': 0.8, 'random_strength': 0.0042232458842186805, 'bagging_temperature': 0.9084447022752986, 'border_count': 218, 'scale_pos_weight': 3.266794745581097}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  91%|█████████ | 91/100 [03:38<00:15,  1.75s/it]

[I 2025-11-12 11:03:46,745] Trial 90 finished with value: 0.5840531561461794 and parameters: {'learning_rate': 0.0454468960026582, 'depth': 4, 'l2_leaf_reg': 0.10459041042945783, 'subsample': 0.9, 'random_strength': 0.023115993339100346, 'bagging_temperature': 0.7153519478000077, 'border_count': 248, 'scale_pos_weight': 2.540073781537213}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  92%|█████████▏| 92/100 [03:39<00:14,  1.76s/it]

[I 2025-11-12 11:03:48,517] Trial 91 finished with value: 0.5603174603174603 and parameters: {'learning_rate': 0.04428722152448807, 'depth': 4, 'l2_leaf_reg': 0.5954248115652654, 'subsample': 0.9, 'random_strength': 0.022331008828348778, 'bagging_temperature': 0.7152665903983944, 'border_count': 241, 'scale_pos_weight': 1.6411803434223913}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  93%|█████████▎| 93/100 [03:41<00:12,  1.74s/it]

[I 2025-11-12 11:03:50,233] Trial 92 finished with value: 0.5795975997176138 and parameters: {'learning_rate': 0.0515594790656795, 'depth': 5, 'l2_leaf_reg': 0.0010130282064940669, 'subsample': 0.9, 'random_strength': 0.06730130966742458, 'bagging_temperature': 0.6811656160799302, 'border_count': 247, 'scale_pos_weight': 2.260055198286903}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  94%|█████████▍| 94/100 [03:43<00:11,  1.88s/it]

[I 2025-11-12 11:03:52,433] Trial 93 finished with value: 0.5780856423173804 and parameters: {'learning_rate': 0.03692278788154709, 'depth': 3, 'l2_leaf_reg': 1.5294258885685745, 'subsample': 0.9, 'random_strength': 0.012411915428423145, 'bagging_temperature': 0.011805202864157538, 'border_count': 230, 'scale_pos_weight': 2.8911400060959433}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  95%|█████████▌| 95/100 [03:45<00:08,  1.76s/it]

[I 2025-11-12 11:03:53,919] Trial 94 finished with value: 0.5793149318257399 and parameters: {'learning_rate': 0.05566746522243186, 'depth': 4, 'l2_leaf_reg': 0.02370684086429759, 'subsample': 0.9, 'random_strength': 0.03919866878105531, 'bagging_temperature': 0.580657731937849, 'border_count': 238, 'scale_pos_weight': 2.565774263251782}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  96%|█████████▌| 96/100 [03:47<00:07,  1.85s/it]

[I 2025-11-12 11:03:55,977] Trial 95 finished with value: 0.5716417910447761 and parameters: {'learning_rate': 0.04128406649540395, 'depth': 6, 'l2_leaf_reg': 0.11021087394107298, 'subsample': 1.0, 'random_strength': 0.2171781761920012, 'bagging_temperature': 0.771702942047622, 'border_count': 255, 'scale_pos_weight': 1.9614461402246282}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  97%|█████████▋| 97/100 [03:48<00:05,  1.68s/it]

[I 2025-11-12 11:03:57,266] Trial 96 finished with value: 0.583876500857633 and parameters: {'learning_rate': 0.07802261807657684, 'depth': 5, 'l2_leaf_reg': 0.012017399198792121, 'subsample': 0.9, 'random_strength': 0.1153804297662899, 'bagging_temperature': 0.6093520740734941, 'border_count': 233, 'scale_pos_weight': 2.3966761831053285}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  98%|█████████▊| 98/100 [03:49<00:02,  1.49s/it]

[I 2025-11-12 11:03:58,318] Trial 97 finished with value: 0.574239163848755 and parameters: {'learning_rate': 0.10602857576380884, 'depth': 5, 'l2_leaf_reg': 0.0065990046889841175, 'subsample': 0.9, 'random_strength': 0.435265429698245, 'bagging_temperature': 0.6174917336524528, 'border_count': 215, 'scale_pos_weight': 3.058288806054356}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562:  99%|█████████▉| 99/100 [03:50<00:01,  1.38s/it]

[I 2025-11-12 11:03:59,424] Trial 98 finished with value: 0.558573165788407 and parameters: {'learning_rate': 0.08142244032562405, 'depth': 5, 'l2_leaf_reg': 0.17918476185994778, 'subsample': 0.9, 'random_strength': 0.13055094844005166, 'bagging_temperature': 0.6440027681209428, 'border_count': 208, 'scale_pos_weight': 1.5808997135958724}. Best is trial 42 with value: 0.5856200069468566.


Best trial: 42. Best value: 0.58562: 100%|██████████| 100/100 [03:52<00:00,  2.33s/it]

[I 2025-11-12 11:04:01,217] Trial 99 finished with value: 0.5231620188983637 and parameters: {'learning_rate': 0.04783917969535651, 'depth': 4, 'l2_leaf_reg': 0.7301452704902446, 'subsample': 0.9, 'random_strength': 0.7186234997773577, 'bagging_temperature': 0.4684294407820243, 'border_count': 234, 'scale_pos_weight': 7.883469830257926}. Best is trial 42 with value: 0.5856200069468566.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.5856
  Best Hyperparameters:
    learning_rate: 0.09325715313438826
    depth: 5
    l2_leaf_reg: 0.0021410243996902215
    subsample: 0.9
    random_strength: 0.04848483602713997
    bagging_temperature: 0.6979998048549101
    border_count: 239
    scale_pos_weight: 2.2902998251039337





In [23]:
best_params_from_optuna = study.best_trial.params.copy()

rfe_params = best_params_from_optuna.copy()
rfe_params.update({
    'iterations': 1000, # Max iterations per step
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 100,
    'random_state': 123,
    'verbose': False
})

train_pool = Pool(
    data=X_train_proc, 
    label=y_train, 
    cat_features=CAT_FEATURES
)

eval_pool = Pool(
    data=X_test_proc, 
    label=y_test, 
    cat_features=CAT_FEATURES
)

model_for_rfe = CatBoostClassifier(**rfe_params)

print("Starting CatBoost RFE...")
start_time = time.time()

summary = model_for_rfe.select_features(
    train_pool,
    eval_set=eval_pool,
    algorithm=EFeaturesSelectionAlgorithm.RecursiveByLossFunctionChange,
    num_features_to_select=50,
    steps=10, 
    features_for_select=X_train_proc.columns.tolist(),
    logging_level='Info', # Shows progress
    train_final_model=False
)

end_time = time.time()
print(f"RFE Complete. Took {end_time - start_time:.2f} seconds.")

selected_features = summary['selected_features_names']
eliminated_features = summary['eliminated_features_names']

print(f"\n--- RFE Results ---")
print(f"Total features selected: {len(selected_features)}")
print(f"Total features eliminated: {len(eliminated_features)}")
print("\nEliminated features:")
print(eliminated_features)

Starting CatBoost RFE...
Step #1 out of 10

liab_prct, bin=35 score 24.89008809
has_recovery_target, bin=0 score 31.65969367
highrisk_site_binary, bin=0 score 33.95795559
witness_binary, bin=0 score 35.58066597
liab_prct, bin=41 score 36.89271689

liab_prct, bin=38 score 21.52506273
multicar_binary, bin=0 score 27.33976975
highrisk_site_binary, bin=0 score 29.48784749
witness_present, bin=0 score 30.98652847
liab_prct, bin=33 score 31.67453292

liab_prct, bin=35 score 18.55382772
is_single_car, bin=0 score 23.40464238
highrisk_site_binary, bin=0 score 25.25857842
liab_prct, bin=41 score 26.66781441
witness_present_ind, value=1 score 27.87763603

liab_prct, bin=33 score 15.77310722
multicar_binary, bin=0 score 19.97392005
high_risk_site, bin=0 score 21.64194621
liab_prct, bin=41 score 23.15367943
witness_binary, bin=0 score 24.26616426

liab_prct, bin=34 score 13.60376612
recovery_case_clarity, bin=0 score 17.37590282
high_risk_site, bin=0 score 18.80911489
liab_prct, bin=44 score 20.15

In [24]:
X_train_top_features = X_train_proc[selected_features]
X_test_top_features = X_test_proc[selected_features]

original_cat_features = set(CAT_FEATURES)
top_features_set = set(selected_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

In [26]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.09325715313438826, 'depth': 5, 'l2_leaf_reg': 0.0021410243996902215, 'subsample': 0.9, 'random_strength': 0.04848483602713997, 'bagging_temperature': 0.6979998048549101, 'border_count': 239, 'scale_pos_weight': 2.2902998251039337}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5814

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.89      0.81      0.85      4165
   Class 1.0       0.51      0.68      0.58      1235

    accuracy                           0.78      5400
   macro avg       0.70      0.74      0.71      5400
weighted avg       0.81      0.78      0.79      5400



In [27]:
final_params_rfe = study.best_trial.params.copy()
final_params_rfe.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 100,
    'random_state': 123
})

new_model = CatBoostClassifier(**final_params_rfe)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

# Get the original F1 score from cell 13 to compare
original_f1_score = f1_score(y_test, best_model.predict(X_test_proc), pos_label=1)

print("\n--- Model Performance Comparison ---")
print(f"Original F1 score (all 83 features): {original_f1_score:.4f}")
print(f"New F1 score ({len(selected_features)} RFE features): {new_f1:.4f}")

print("\nNew Model Classification Report (RFE Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))


--- Model Performance Comparison ---
Original F1 score (all 83 features): 0.5814
New F1 score (50 RFE features): 0.5864

New Model Classification Report (RFE Features):
              precision    recall  f1-score   support

   Class 0.0       0.90      0.80      0.85      4165
   Class 1.0       0.51      0.69      0.59      1235

    accuracy                           0.78      5400
   macro avg       0.70      0.75      0.72      5400
weighted avg       0.81      0.78      0.79      5400



In [14]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6022.cbm")

print("Done.")

Saving best_model...
Done.


In [15]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 83

Top 10 Most Important Features:
                     feature  importance
14                 liab_prct   23.422263
49     recovery_case_clarity    6.895557
52      highrisk_site_binary    3.970960
46             is_single_car    3.845630
13       witness_present_ind    2.992513
45  is_multi_vehicle_unclear    2.951461
25       in_network_bodyshop    2.888566
53            high_risk_site    2.612414
48       has_recovery_target    2.430656
22            vehicle_weight    2.138397

Bottom 10 Least Important Features:
              feature  importance
66     veteran_driver    0.052804
43      police_binary    0.047522
62  middle_age_driver    0.035613
36          is_friday    0.028452
59       young_driver    0.023122
38         claim_hour    0.000000
64      novice_driver    0.000000
60      senior_driver    0.000000
39          rush_hour    0.000000
40         late_night    0.000000

Keeping top 66 features and removing bottom 100.


In [16]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data for CatBoost...
Applying mandatory .astype(str) to: ['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'policy_report_filed_ind', 'witness_present_ind']
Dropping helper datetime columns: ['claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            0
2       4655051            0
3       6728725            1
4       9848460            1


In [17]:
prediction.to_csv("results/catboost_6022_prediction.csv", index=False)