In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier, Pool, EFeaturesSelectionAlgorithm
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc5_preprocessor import Preprocessor

import joblib

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Learning statistical parameters for Z-scoring...
Fit complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.


In [8]:
print("Saving preprocessor and training columns...")

# Save the 'pre' object
joblib.dump(pre, 'cc3_preprocessor.pkl')

# Save the exact column order and names
joblib.dump(X_train_proc.columns, 'training_columns.pkl')

print("Done.")

Saving preprocessor and training columns...
Done.


## CatBoost with Optuna Tuning

In [9]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season']


In [11]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100,
        'random_state': 123
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [13]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=100, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-12 12:16:40,918] A new study created in memory with name: no-name-4ee938df-9a18-4c7b-93fa-318c908af48d



2. Starting Optuna study...


Best trial: 0. Best value: 0.589641:   1%|          | 1/100 [00:02<04:32,  2.75s/it]

[I 2025-11-12 12:16:43,673] Trial 0 finished with value: 0.5896414342629482 and parameters: {'learning_rate': 0.03945611717792594, 'depth': 6, 'l2_leaf_reg': 0.005339045005626586, 'subsample': 0.8, 'random_strength': 1.0685974307072054e-05, 'bagging_temperature': 0.7661345098981627, 'border_count': 207, 'scale_pos_weight': 2.0964408316048657}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   2%|▏         | 2/100 [00:04<03:09,  1.93s/it]

[I 2025-11-12 12:16:45,027] Trial 1 finished with value: 0.5271790065604499 and parameters: {'learning_rate': 0.07729474645436202, 'depth': 5, 'l2_leaf_reg': 2.4761234980890205, 'subsample': 0.6, 'random_strength': 0.03640607083607454, 'bagging_temperature': 0.1709537244372289, 'border_count': 139, 'scale_pos_weight': 7.702291807525953}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   3%|▎         | 3/100 [00:07<04:07,  2.55s/it]

[I 2025-11-12 12:16:48,312] Trial 2 finished with value: 0.5780830098536877 and parameters: {'learning_rate': 0.022680210366206707, 'depth': 8, 'l2_leaf_reg': 0.0031090663866896956, 'subsample': 1.0, 'random_strength': 3.4337170026694583e-07, 'bagging_temperature': 0.9411226266196828, 'border_count': 183, 'scale_pos_weight': 3.6709971547824987}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   4%|▍         | 4/100 [00:08<03:12,  2.00s/it]

[I 2025-11-12 12:16:49,472] Trial 3 finished with value: 0.5200816882232812 and parameters: {'learning_rate': 0.12205817847927665, 'depth': 5, 'l2_leaf_reg': 8.064885388328216, 'subsample': 0.5, 'random_strength': 0.0001928506655263629, 'bagging_temperature': 0.6064738158581415, 'border_count': 133, 'scale_pos_weight': 9.326437402381245}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   5%|▌         | 5/100 [00:10<03:15,  2.06s/it]

[I 2025-11-12 12:16:51,639] Trial 4 finished with value: 0.5261669024045261 and parameters: {'learning_rate': 0.18024748707225832, 'depth': 8, 'l2_leaf_reg': 0.06440458418490988, 'subsample': 0.5, 'random_strength': 0.00961717867883696, 'bagging_temperature': 0.6873814192795987, 'border_count': 225, 'scale_pos_weight': 8.086373051687453}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   6%|▌         | 6/100 [00:14<04:21,  2.78s/it]

[I 2025-11-12 12:16:55,815] Trial 5 finished with value: 0.5534626038781163 and parameters: {'learning_rate': 0.28918374786174295, 'depth': 10, 'l2_leaf_reg': 0.1028799017929965, 'subsample': 0.7, 'random_strength': 0.22122582108985292, 'bagging_temperature': 0.08504283700758408, 'border_count': 199, 'scale_pos_weight': 4.513932689273227}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   7%|▋         | 7/100 [00:20<05:52,  3.80s/it]

[I 2025-11-12 12:17:01,700] Trial 6 finished with value: 0.588577664605311 and parameters: {'learning_rate': 0.013272254825342708, 'depth': 4, 'l2_leaf_reg': 3.968723842780682, 'subsample': 0.8, 'random_strength': 0.0042521578610185615, 'bagging_temperature': 0.5631596587922525, 'border_count': 46, 'scale_pos_weight': 1.9840881047242858}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   8%|▊         | 8/100 [00:21<04:22,  2.85s/it]

[I 2025-11-12 12:17:02,527] Trial 7 finished with value: 0.5252854812398042 and parameters: {'learning_rate': 0.2411963772094233, 'depth': 4, 'l2_leaf_reg': 0.6698968060607206, 'subsample': 1.0, 'random_strength': 6.395487004341503e-08, 'bagging_temperature': 0.5438658186271794, 'border_count': 253, 'scale_pos_weight': 8.035057737630984}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 0. Best value: 0.589641:   9%|▉         | 9/100 [00:24<04:27,  2.94s/it]

[I 2025-11-12 12:17:05,676] Trial 8 finished with value: 0.5235057878573116 and parameters: {'learning_rate': 0.1562006920865555, 'depth': 9, 'l2_leaf_reg': 0.9794723247159124, 'subsample': 0.5, 'random_strength': 7.528277499135866e-06, 'bagging_temperature': 0.29909079869785604, 'border_count': 73, 'scale_pos_weight': 8.527522796157061}. Best is trial 0 with value: 0.5896414342629482.


Best trial: 9. Best value: 0.590406:  10%|█         | 10/100 [00:25<03:28,  2.32s/it]

[I 2025-11-12 12:17:06,585] Trial 9 finished with value: 0.5904059040590406 and parameters: {'learning_rate': 0.12572852617362415, 'depth': 3, 'l2_leaf_reg': 0.006702342408778661, 'subsample': 0.6, 'random_strength': 0.0863116309793937, 'bagging_temperature': 0.7777324389112562, 'border_count': 78, 'scale_pos_weight': 1.899015575983526}. Best is trial 9 with value: 0.5904059040590406.


Best trial: 9. Best value: 0.590406:  11%|█         | 11/100 [00:27<03:13,  2.18s/it]

[I 2025-11-12 12:17:08,445] Trial 10 finished with value: 0.5185185185185185 and parameters: {'learning_rate': 0.059788362363644154, 'depth': 3, 'l2_leaf_reg': 0.018905761334687132, 'subsample': 0.7, 'random_strength': 0.8229621270021071, 'bagging_temperature': 0.9696665231855581, 'border_count': 99, 'scale_pos_weight': 1.0074979683654712}. Best is trial 9 with value: 0.5904059040590406.


Best trial: 11. Best value: 0.590723:  12%|█▏        | 12/100 [00:29<03:10,  2.17s/it]

[I 2025-11-12 12:17:10,595] Trial 11 finished with value: 0.5907226254338909 and parameters: {'learning_rate': 0.029676782573412695, 'depth': 6, 'l2_leaf_reg': 0.0023970237162622035, 'subsample': 0.8, 'random_strength': 8.957699461967189e-05, 'bagging_temperature': 0.7872827010734517, 'border_count': 182, 'scale_pos_weight': 2.9333253583861616}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  13%|█▎        | 13/100 [00:31<03:09,  2.18s/it]

[I 2025-11-12 12:17:12,808] Trial 12 finished with value: 0.5474433986263038 and parameters: {'learning_rate': 0.033956280499054484, 'depth': 7, 'l2_leaf_reg': 0.0013393268815676377, 'subsample': 0.9, 'random_strength': 0.0005195859415979467, 'bagging_temperature': 0.8159258424994172, 'border_count': 166, 'scale_pos_weight': 5.925413272481837}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  14%|█▍        | 14/100 [00:32<02:39,  1.85s/it]

[I 2025-11-12 12:17:13,896] Trial 13 finished with value: 0.5865030674846625 and parameters: {'learning_rate': 0.08544881963578839, 'depth': 3, 'l2_leaf_reg': 0.011907735189370786, 'subsample': 0.6, 'random_strength': 1.9387444399325132e-05, 'bagging_temperature': 0.3519356286186177, 'border_count': 102, 'scale_pos_weight': 3.1264506542880777}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  15%|█▌        | 15/100 [00:35<02:50,  2.01s/it]

[I 2025-11-12 12:17:16,261] Trial 14 finished with value: 0.5487309644670051 and parameters: {'learning_rate': 0.0187556541853522, 'depth': 6, 'l2_leaf_reg': 0.0012923687552881494, 'subsample': 0.6, 'random_strength': 0.001117364185687433, 'bagging_temperature': 0.8406860267876449, 'border_count': 114, 'scale_pos_weight': 5.55470881515633}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  16%|█▌        | 16/100 [00:38<03:13,  2.30s/it]

[I 2025-11-12 12:17:19,259] Trial 15 finished with value: 0.5264650283553876 and parameters: {'learning_rate': 0.035073946789656626, 'depth': 5, 'l2_leaf_reg': 0.04717013130468787, 'subsample': 0.9, 'random_strength': 1.1728681403851398e-06, 'bagging_temperature': 0.42539013417203336, 'border_count': 52, 'scale_pos_weight': 1.0542284887597182}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  17%|█▋        | 17/100 [00:44<04:49,  3.49s/it]

[I 2025-11-12 12:17:25,500] Trial 16 finished with value: 0.5875276986388097 and parameters: {'learning_rate': 0.010865223819306895, 'depth': 7, 'l2_leaf_reg': 0.005959272584543825, 'subsample': 0.7, 'random_strength': 0.06479371262947717, 'bagging_temperature': 0.6871160357949562, 'border_count': 173, 'scale_pos_weight': 3.121617007683321}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  18%|█▊        | 18/100 [00:45<03:48,  2.78s/it]

[I 2025-11-12 12:17:26,646] Trial 17 finished with value: 0.5623655913978495 and parameters: {'learning_rate': 0.10821209059997573, 'depth': 4, 'l2_leaf_reg': 0.28453463461020373, 'subsample': 0.8, 'random_strength': 2.3822462765189954e-05, 'bagging_temperature': 0.8395499362511367, 'border_count': 80, 'scale_pos_weight': 4.539596633327369}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  19%|█▉        | 19/100 [00:47<03:24,  2.53s/it]

[I 2025-11-12 12:17:28,579] Trial 18 finished with value: 0.5414337788578372 and parameters: {'learning_rate': 0.050804428606907315, 'depth': 3, 'l2_leaf_reg': 0.020170802009846982, 'subsample': 0.9, 'random_strength': 1.1645002370638407e-08, 'bagging_temperature': 0.6803395548510455, 'border_count': 165, 'scale_pos_weight': 6.797078064980232}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  20%|██        | 20/100 [00:52<04:29,  3.37s/it]

[I 2025-11-12 12:17:33,895] Trial 19 finished with value: 0.5769678169833269 and parameters: {'learning_rate': 0.02069962985385358, 'depth': 10, 'l2_leaf_reg': 0.002682549787957258, 'subsample': 0.6, 'random_strength': 0.0021156300739094973, 'bagging_temperature': 0.9876211445570848, 'border_count': 33, 'scale_pos_weight': 2.2060770810673374}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  21%|██        | 21/100 [00:55<03:56,  2.99s/it]

[I 2025-11-12 12:17:36,016] Trial 20 finished with value: 0.5594059405940595 and parameters: {'learning_rate': 0.06492353388688053, 'depth': 8, 'l2_leaf_reg': 0.001008532512572882, 'subsample': 0.7, 'random_strength': 0.0001035576438022315, 'bagging_temperature': 0.45391069397676886, 'border_count': 130, 'scale_pos_weight': 4.597221552528062}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 11. Best value: 0.590723:  22%|██▏       | 22/100 [00:57<03:38,  2.80s/it]

[I 2025-11-12 12:17:38,367] Trial 21 finished with value: 0.5886601661249549 and parameters: {'learning_rate': 0.036385579982547045, 'depth': 6, 'l2_leaf_reg': 0.005477475058918447, 'subsample': 0.8, 'random_strength': 1.5345898483628466e-06, 'bagging_temperature': 0.7854127145249973, 'border_count': 210, 'scale_pos_weight': 2.0907007243033133}. Best is trial 11 with value: 0.5907226254338909.


Best trial: 22. Best value: 0.593085:  23%|██▎       | 23/100 [00:59<03:22,  2.63s/it]

[I 2025-11-12 12:17:40,601] Trial 22 finished with value: 0.5930851063829787 and parameters: {'learning_rate': 0.04413801477324341, 'depth': 6, 'l2_leaf_reg': 0.009112277640736749, 'subsample': 0.8, 'random_strength': 5.1992374475507155e-06, 'bagging_temperature': 0.7420489196228054, 'border_count': 236, 'scale_pos_weight': 2.6135955130299586}. Best is trial 22 with value: 0.5930851063829787.


Best trial: 22. Best value: 0.593085:  24%|██▍       | 24/100 [01:02<03:22,  2.66s/it]

[I 2025-11-12 12:17:43,343] Trial 23 finished with value: 0.5856622114216282 and parameters: {'learning_rate': 0.025931892299779817, 'depth': 7, 'l2_leaf_reg': 0.01004474866420631, 'subsample': 0.9, 'random_strength': 6.316787182227441e-05, 'bagging_temperature': 0.8889638963081989, 'border_count': 249, 'scale_pos_weight': 3.318062539167488}. Best is trial 22 with value: 0.5930851063829787.


Best trial: 22. Best value: 0.593085:  25%|██▌       | 25/100 [01:04<03:05,  2.47s/it]

[I 2025-11-12 12:17:45,353] Trial 24 finished with value: 0.5741898806139852 and parameters: {'learning_rate': 0.0471061538258245, 'depth': 5, 'l2_leaf_reg': 0.02912785324390028, 'subsample': 0.8, 'random_strength': 1.3173146427567471e-06, 'bagging_temperature': 0.7147714831104011, 'border_count': 230, 'scale_pos_weight': 3.9927070361433983}. Best is trial 22 with value: 0.5930851063829787.


Best trial: 25. Best value: 0.596199:  26%|██▌       | 26/100 [01:07<03:21,  2.72s/it]

[I 2025-11-12 12:17:48,655] Trial 25 finished with value: 0.5961987329109704 and parameters: {'learning_rate': 0.02723481361813923, 'depth': 6, 'l2_leaf_reg': 0.0028190166246627965, 'subsample': 0.7, 'random_strength': 0.0003046129454038126, 'bagging_temperature': 0.6141963017000498, 'border_count': 153, 'scale_pos_weight': 2.6616943874257775}. Best is trial 25 with value: 0.5961987329109704.


Best trial: 25. Best value: 0.596199:  27%|██▋       | 27/100 [01:10<03:22,  2.77s/it]

[I 2025-11-12 12:17:51,542] Trial 26 finished with value: 0.5878962536023055 and parameters: {'learning_rate': 0.016070825480296987, 'depth': 6, 'l2_leaf_reg': 0.002254852859226052, 'subsample': 0.7, 'random_strength': 0.0002923159150390022, 'bagging_temperature': 0.6010606329425359, 'border_count': 155, 'scale_pos_weight': 2.800949722490966}. Best is trial 25 with value: 0.5961987329109704.


Best trial: 27. Best value: 0.597693:  28%|██▊       | 28/100 [01:14<03:40,  3.07s/it]

[I 2025-11-12 12:17:55,309] Trial 27 finished with value: 0.5976933514246947 and parameters: {'learning_rate': 0.02740558046918159, 'depth': 7, 'l2_leaf_reg': 0.14676975193887526, 'subsample': 0.8, 'random_strength': 5.4614322002371696e-05, 'bagging_temperature': 0.4904523951310234, 'border_count': 188, 'scale_pos_weight': 2.5845350500702438}. Best is trial 27 with value: 0.5976933514246947.


Best trial: 27. Best value: 0.597693:  29%|██▉       | 29/100 [01:17<03:44,  3.16s/it]

[I 2025-11-12 12:17:58,672] Trial 28 finished with value: 0.5782646295762468 and parameters: {'learning_rate': 0.02658094437905785, 'depth': 7, 'l2_leaf_reg': 0.17607002484019132, 'subsample': 0.9, 'random_strength': 2.826041590925954e-06, 'bagging_temperature': 0.4692477839335966, 'border_count': 232, 'scale_pos_weight': 3.9342741980978406}. Best is trial 27 with value: 0.5976933514246947.


Best trial: 27. Best value: 0.597693:  30%|███       | 30/100 [01:22<04:11,  3.59s/it]

[I 2025-11-12 12:18:03,266] Trial 29 finished with value: 0.5604026845637584 and parameters: {'learning_rate': 0.04041238312690585, 'depth': 9, 'l2_leaf_reg': 0.32677213776846775, 'subsample': 0.7, 'random_strength': 2.1690142522434787e-07, 'bagging_temperature': 0.3621251166383421, 'border_count': 213, 'scale_pos_weight': 1.5458261122026775}. Best is trial 27 with value: 0.5976933514246947.


Best trial: 27. Best value: 0.597693:  31%|███       | 31/100 [01:25<03:53,  3.38s/it]

[I 2025-11-12 12:18:06,166] Trial 30 finished with value: 0.5884745762711865 and parameters: {'learning_rate': 0.04160769860130853, 'depth': 8, 'l2_leaf_reg': 0.0945599286470798, 'subsample': 0.8, 'random_strength': 1.200528187332082e-05, 'bagging_temperature': 0.2733568623901555, 'border_count': 194, 'scale_pos_weight': 2.550807464373648}. Best is trial 27 with value: 0.5976933514246947.


Best trial: 31. Best value: 0.598814:  32%|███▏      | 32/100 [01:27<03:29,  3.08s/it]

[I 2025-11-12 12:18:08,556] Trial 31 finished with value: 0.5988142292490118 and parameters: {'learning_rate': 0.02796464230077825, 'depth': 6, 'l2_leaf_reg': 0.003190792324169893, 'subsample': 0.8, 'random_strength': 4.223856960453495e-05, 'bagging_temperature': 0.6159076025573295, 'border_count': 185, 'scale_pos_weight': 2.65255710022321}. Best is trial 31 with value: 0.5988142292490118.


Best trial: 31. Best value: 0.598814:  33%|███▎      | 33/100 [01:32<03:54,  3.49s/it]

[I 2025-11-12 12:18:13,003] Trial 32 finished with value: 0.5706075533661741 and parameters: {'learning_rate': 0.015252739329624966, 'depth': 7, 'l2_leaf_reg': 0.003940214758149777, 'subsample': 0.8, 'random_strength': 3.258571937547817e-05, 'bagging_temperature': 0.6393560078786749, 'border_count': 154, 'scale_pos_weight': 1.5758096625712192}. Best is trial 31 with value: 0.5988142292490118.


Best trial: 31. Best value: 0.598814:  34%|███▍      | 34/100 [01:35<03:48,  3.46s/it]

[I 2025-11-12 12:18:16,399] Trial 33 finished with value: 0.5795156113218558 and parameters: {'learning_rate': 0.02390275724967519, 'depth': 5, 'l2_leaf_reg': 0.012236020245952262, 'subsample': 0.7, 'random_strength': 5.922371473713626e-06, 'bagging_temperature': 0.5238760177086603, 'border_count': 190, 'scale_pos_weight': 3.7061281272587543}. Best is trial 31 with value: 0.5988142292490118.


Best trial: 31. Best value: 0.598814:  35%|███▌      | 35/100 [01:39<04:00,  3.70s/it]

[I 2025-11-12 12:18:20,647] Trial 34 finished with value: 0.5954979536152797 and parameters: {'learning_rate': 0.019412969415087942, 'depth': 6, 'l2_leaf_reg': 0.033736332470402274, 'subsample': 0.8, 'random_strength': 0.000732490309399442, 'bagging_temperature': 0.7225261812560272, 'border_count': 145, 'scale_pos_weight': 2.500768831698284}. Best is trial 31 with value: 0.5988142292490118.


Best trial: 31. Best value: 0.598814:  36%|███▌      | 36/100 [01:42<03:48,  3.57s/it]

[I 2025-11-12 12:18:23,906] Trial 35 finished with value: 0.5790868924889544 and parameters: {'learning_rate': 0.018785621798868796, 'depth': 5, 'l2_leaf_reg': 0.0476293803977585, 'subsample': 0.9, 'random_strength': 0.0004906543066599076, 'bagging_temperature': 0.6376349969420589, 'border_count': 145, 'scale_pos_weight': 3.492809176330381}. Best is trial 31 with value: 0.5988142292490118.


Best trial: 36. Best value: 0.604399:  37%|███▋      | 37/100 [01:48<04:28,  4.26s/it]

[I 2025-11-12 12:18:29,771] Trial 36 finished with value: 0.604399323181049 and parameters: {'learning_rate': 0.010431688073932016, 'depth': 6, 'l2_leaf_reg': 0.13650220185823378, 'subsample': 1.0, 'random_strength': 0.003373546298820878, 'bagging_temperature': 0.5141077456444894, 'border_count': 118, 'scale_pos_weight': 2.4566730131089503}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  38%|███▊      | 38/100 [01:55<05:14,  5.07s/it]

[I 2025-11-12 12:18:36,756] Trial 37 finished with value: 0.5763869132290185 and parameters: {'learning_rate': 0.010554170185851827, 'depth': 8, 'l2_leaf_reg': 0.7687667824421119, 'subsample': 1.0, 'random_strength': 0.004560158328979368, 'bagging_temperature': 0.49339761595479514, 'border_count': 126, 'scale_pos_weight': 4.2039222789441855}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  39%|███▉      | 39/100 [02:03<05:47,  5.70s/it]

[I 2025-11-12 12:18:43,928] Trial 38 finished with value: 0.5738916256157636 and parameters: {'learning_rate': 0.012993384574120869, 'depth': 7, 'l2_leaf_reg': 0.14883243333069782, 'subsample': 1.0, 'random_strength': 0.014833089858227224, 'bagging_temperature': 0.40870516346047825, 'border_count': 181, 'scale_pos_weight': 1.5806983329869604}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  40%|████      | 40/100 [02:05<04:45,  4.75s/it]

[I 2025-11-12 12:18:46,468] Trial 39 finished with value: 0.5577479610628782 and parameters: {'learning_rate': 0.03091405775434139, 'depth': 5, 'l2_leaf_reg': 1.5054268600219554, 'subsample': 1.0, 'random_strength': 0.00020603011043685696, 'bagging_temperature': 0.5784079478916136, 'border_count': 119, 'scale_pos_weight': 5.013547654860028}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  41%|████      | 41/100 [02:09<04:18,  4.37s/it]

[I 2025-11-12 12:18:49,955] Trial 40 finished with value: 0.5173116089613035 and parameters: {'learning_rate': 0.013625855906507835, 'depth': 6, 'l2_leaf_reg': 0.3915594520398252, 'subsample': 0.9, 'random_strength': 0.021619082254725138, 'bagging_temperature': 0.14407550629162713, 'border_count': 204, 'scale_pos_weight': 9.541774764151}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  42%|████▏     | 42/100 [02:12<04:01,  4.16s/it]

[I 2025-11-12 12:18:53,625] Trial 41 finished with value: 0.5977318212141428 and parameters: {'learning_rate': 0.017808611895035995, 'depth': 6, 'l2_leaf_reg': 0.08739511053046653, 'subsample': 0.7, 'random_strength': 0.0010139470260770826, 'bagging_temperature': 0.5207078769893597, 'border_count': 143, 'scale_pos_weight': 2.5686424751318597}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  43%|████▎     | 43/100 [02:16<03:49,  4.02s/it]

[I 2025-11-12 12:18:57,306] Trial 42 finished with value: 0.5840967387321363 and parameters: {'learning_rate': 0.022636330784371668, 'depth': 6, 'l2_leaf_reg': 0.087176583642408, 'subsample': 0.7, 'random_strength': 0.0024341052303551887, 'bagging_temperature': 0.5082301718820472, 'border_count': 157, 'scale_pos_weight': 2.0132421879734905}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  44%|████▍     | 44/100 [02:21<03:57,  4.24s/it]

[I 2025-11-12 12:19:02,050] Trial 43 finished with value: 0.5982019363762102 and parameters: {'learning_rate': 0.016533144587188943, 'depth': 7, 'l2_leaf_reg': 0.20204542434820555, 'subsample': 0.6, 'random_strength': 5.491005951567339e-05, 'bagging_temperature': 0.5501361398952577, 'border_count': 138, 'scale_pos_weight': 2.3942969797536797}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  45%|████▌     | 45/100 [02:29<05:00,  5.46s/it]

[I 2025-11-12 12:19:10,378] Trial 44 finished with value: 0.5709445150360017 and parameters: {'learning_rate': 0.01207694423321662, 'depth': 7, 'l2_leaf_reg': 0.20392392664315093, 'subsample': 0.5, 'random_strength': 4.568418622930325e-05, 'bagging_temperature': 0.5278036009239514, 'border_count': 103, 'scale_pos_weight': 1.4214389350460586}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  46%|████▌     | 46/100 [02:34<04:52,  5.42s/it]

[I 2025-11-12 12:19:15,689] Trial 45 finished with value: 0.5871951219512195 and parameters: {'learning_rate': 0.015668596375919795, 'depth': 8, 'l2_leaf_reg': 0.5273349527394425, 'subsample': 0.6, 'random_strength': 0.00012933824755031255, 'bagging_temperature': 0.3857598350006471, 'border_count': 134, 'scale_pos_weight': 3.4539016731407908}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  47%|████▋     | 47/100 [02:43<05:47,  6.56s/it]

[I 2025-11-12 12:19:24,915] Trial 46 finished with value: 0.5929861849096706 and parameters: {'learning_rate': 0.016326610802899075, 'depth': 9, 'l2_leaf_reg': 3.5667786139287876, 'subsample': 0.5, 'random_strength': 0.0072910129373776065, 'bagging_temperature': 0.5731686161686136, 'border_count': 115, 'scale_pos_weight': 2.3339318587844575}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  48%|████▊     | 48/100 [02:49<05:25,  6.26s/it]

[I 2025-11-12 12:19:30,469] Trial 47 finished with value: 0.545816733067729 and parameters: {'learning_rate': 0.010115136717119126, 'depth': 4, 'l2_leaf_reg': 0.1252560511605529, 'subsample': 0.6, 'random_strength': 0.0014377770134962882, 'bagging_temperature': 0.30175245828538266, 'border_count': 138, 'scale_pos_weight': 6.184697276958822}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  49%|████▉     | 49/100 [02:54<05:01,  5.91s/it]

[I 2025-11-12 12:19:35,552] Trial 48 finished with value: 0.5961290322580645 and parameters: {'learning_rate': 0.014171770751255358, 'depth': 7, 'l2_leaf_reg': 1.3339154174984464, 'subsample': 0.6, 'random_strength': 1.3294116269786962e-05, 'bagging_temperature': 0.4621798738548677, 'border_count': 92, 'scale_pos_weight': 2.903125971779661}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  50%|█████     | 50/100 [02:59<04:36,  5.52s/it]

[I 2025-11-12 12:19:40,174] Trial 49 finished with value: 0.5263404354952002 and parameters: {'learning_rate': 0.012029272363683011, 'depth': 7, 'l2_leaf_reg': 0.26185869808959056, 'subsample': 0.5, 'random_strength': 5.792304122150348e-05, 'bagging_temperature': 0.5450031212706112, 'border_count': 176, 'scale_pos_weight': 8.726655511420798}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  51%|█████     | 51/100 [03:07<05:10,  6.33s/it]

[I 2025-11-12 12:19:48,403] Trial 50 finished with value: 0.584070796460177 and parameters: {'learning_rate': 0.017159824684333677, 'depth': 8, 'l2_leaf_reg': 8.565597826920358, 'subsample': 0.6, 'random_strength': 0.03643132128295463, 'bagging_temperature': 0.64788467636164, 'border_count': 123, 'scale_pos_weight': 1.8466384829263065}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  52%|█████▏    | 52/100 [03:10<04:19,  5.40s/it]

[I 2025-11-12 12:19:51,642] Trial 51 finished with value: 0.5930195325008005 and parameters: {'learning_rate': 0.026818339502985726, 'depth': 6, 'l2_leaf_reg': 0.07004271495265794, 'subsample': 0.7, 'random_strength': 0.0003401480620809842, 'bagging_temperature': 0.608245790374887, 'border_count': 167, 'scale_pos_weight': 2.9206483087467587}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  53%|█████▎    | 53/100 [03:14<03:56,  5.02s/it]

[I 2025-11-12 12:19:55,779] Trial 52 finished with value: 0.5937716262975778 and parameters: {'learning_rate': 0.02143954340230547, 'depth': 6, 'l2_leaf_reg': 0.06226972571780214, 'subsample': 0.7, 'random_strength': 0.0008020929056618835, 'bagging_temperature': 0.43374730406684514, 'border_count': 149, 'scale_pos_weight': 2.3592590385022936}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  54%|█████▍    | 54/100 [03:18<03:37,  4.72s/it]

[I 2025-11-12 12:19:59,793] Trial 53 finished with value: 0.5391061452513967 and parameters: {'learning_rate': 0.03337441669591124, 'depth': 5, 'l2_leaf_reg': 0.5196037342400275, 'subsample': 0.7, 'random_strength': 0.00021603708026055946, 'bagging_temperature': 0.4973328059412471, 'border_count': 188, 'scale_pos_weight': 1.0715809435409378}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  55%|█████▌    | 55/100 [03:21<03:01,  4.04s/it]

[I 2025-11-12 12:20:02,237] Trial 54 finished with value: 0.5856185252894577 and parameters: {'learning_rate': 0.029327189746761583, 'depth': 6, 'l2_leaf_reg': 0.003660249207787652, 'subsample': 0.8, 'random_strength': 0.004410894423077757, 'bagging_temperature': 0.6026472772293606, 'border_count': 160, 'scale_pos_weight': 3.3114867404676205}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  56%|█████▌    | 56/100 [03:28<03:33,  4.84s/it]

[I 2025-11-12 12:20:08,955] Trial 55 finished with value: 0.5840241145440844 and parameters: {'learning_rate': 0.017870021321995833, 'depth': 5, 'l2_leaf_reg': 0.1297370366211175, 'subsample': 0.6, 'random_strength': 0.00015080979555796546, 'bagging_temperature': 0.5537754786524388, 'border_count': 170, 'scale_pos_weight': 1.8511679275749158}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  57%|█████▋    | 57/100 [03:31<03:08,  4.39s/it]

[I 2025-11-12 12:20:12,275] Trial 56 finished with value: 0.5922233300099701 and parameters: {'learning_rate': 0.022632873999931934, 'depth': 7, 'l2_leaf_reg': 0.0016360854351734687, 'subsample': 0.7, 'random_strength': 0.0016323426053777752, 'bagging_temperature': 0.6573602714491436, 'border_count': 218, 'scale_pos_weight': 2.717149790360774}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  58%|█████▊    | 58/100 [03:33<02:33,  3.65s/it]

[I 2025-11-12 12:20:14,203] Trial 57 finished with value: 0.5603195739014647 and parameters: {'learning_rate': 0.05865961737238366, 'depth': 6, 'l2_leaf_reg': 0.21345118741565544, 'subsample': 0.7, 'random_strength': 2.2159850290570483e-05, 'bagging_temperature': 0.3168949328642103, 'border_count': 199, 'scale_pos_weight': 4.929683813897898}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  59%|█████▉    | 59/100 [03:37<02:38,  3.86s/it]

[I 2025-11-12 12:20:18,571] Trial 58 finished with value: 0.5400885391047713 and parameters: {'learning_rate': 0.011811343869122254, 'depth': 7, 'l2_leaf_reg': 0.01774713693750073, 'subsample': 0.8, 'random_strength': 9.048287117801478e-05, 'bagging_temperature': 0.480119146938981, 'border_count': 136, 'scale_pos_weight': 6.951576005455296}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  60%|██████    | 60/100 [03:42<02:46,  4.17s/it]

[I 2025-11-12 12:20:23,447] Trial 59 finished with value: 0.5876836511409815 and parameters: {'learning_rate': 0.019927995931964988, 'depth': 4, 'l2_leaf_reg': 0.03482326706655088, 'subsample': 1.0, 'random_strength': 0.13564835783659332, 'bagging_temperature': 0.6805030974398405, 'border_count': 111, 'scale_pos_weight': 3.105737523653697}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  61%|██████    | 61/100 [03:46<02:37,  4.03s/it]

[I 2025-11-12 12:20:27,157] Trial 60 finished with value: 0.5728380731282646 and parameters: {'learning_rate': 0.025024945422992113, 'depth': 6, 'l2_leaf_reg': 0.37650658623467476, 'subsample': 0.9, 'random_strength': 0.0004357908587276465, 'bagging_temperature': 0.6067923640799243, 'border_count': 179, 'scale_pos_weight': 3.8508968614802637}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  62%|██████▏   | 62/100 [03:52<03:00,  4.75s/it]

[I 2025-11-12 12:20:33,591] Trial 61 finished with value: 0.5979314802844214 and parameters: {'learning_rate': 0.014017364230757647, 'depth': 7, 'l2_leaf_reg': 5.8464437723417015, 'subsample': 0.6, 'random_strength': 1.2460771753380431e-05, 'bagging_temperature': 0.4569815727229626, 'border_count': 96, 'scale_pos_weight': 2.9142732768968926}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  63%|██████▎   | 63/100 [03:58<03:10,  5.16s/it]

[I 2025-11-12 12:20:39,705] Trial 62 finished with value: 0.5916606757728253 and parameters: {'learning_rate': 0.014564477462021561, 'depth': 7, 'l2_leaf_reg': 1.2948432273550963, 'subsample': 0.6, 'random_strength': 3.370956499476777e-06, 'bagging_temperature': 0.4316537639825504, 'border_count': 88, 'scale_pos_weight': 2.119592917027735}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  64%|██████▍   | 64/100 [04:06<03:28,  5.79s/it]

[I 2025-11-12 12:20:46,958] Trial 63 finished with value: 0.5976068376068376 and parameters: {'learning_rate': 0.012339220479878432, 'depth': 6, 'l2_leaf_reg': 4.0950904184657375, 'subsample': 0.6, 'random_strength': 3.651880707306508e-05, 'bagging_temperature': 0.5685639956917179, 'border_count': 65, 'scale_pos_weight': 2.398635374131118}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  65%|██████▌   | 65/100 [04:16<04:11,  7.20s/it]

[I 2025-11-12 12:20:57,449] Trial 64 finished with value: 0.5971096228410293 and parameters: {'learning_rate': 0.011407223489343225, 'depth': 8, 'l2_leaf_reg': 6.59292359908626, 'subsample': 0.5, 'random_strength': 3.6572631444933506e-05, 'bagging_temperature': 0.013569466103007022, 'border_count': 60, 'scale_pos_weight': 2.306091013661627}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  66%|██████▌   | 66/100 [04:22<03:50,  6.78s/it]

[I 2025-11-12 12:21:03,263] Trial 65 finished with value: 0.590725178960473 and parameters: {'learning_rate': 0.013349040961190627, 'depth': 7, 'l2_leaf_reg': 5.582180137276602, 'subsample': 0.6, 'random_strength': 1.129230848861779e-05, 'bagging_temperature': 0.39683867751538227, 'border_count': 62, 'scale_pos_weight': 3.221930526945198}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  67%|██████▋   | 67/100 [04:27<03:29,  6.34s/it]

[I 2025-11-12 12:21:08,554] Trial 66 finished with value: 0.5862466384940453 and parameters: {'learning_rate': 0.01729256025247585, 'depth': 6, 'l2_leaf_reg': 2.4627815272645797, 'subsample': 0.6, 'random_strength': 3.728082207505722e-07, 'bagging_temperature': 0.2537998712544298, 'border_count': 78, 'scale_pos_weight': 1.7782447251608922}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  68%|██████▊   | 68/100 [04:29<02:41,  5.04s/it]

[I 2025-11-12 12:21:10,574] Trial 67 finished with value: 0.5714285714285714 and parameters: {'learning_rate': 0.07741524398556347, 'depth': 5, 'l2_leaf_reg': 2.5222688052198223, 'subsample': 0.6, 'random_strength': 2.2565608713074007e-05, 'bagging_temperature': 0.5712563666195252, 'border_count': 35, 'scale_pos_weight': 1.3602687800235291}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  69%|██████▉   | 69/100 [04:59<06:26, 12.47s/it]

[I 2025-11-12 12:21:40,381] Trial 68 finished with value: 0.578227245687091 and parameters: {'learning_rate': 0.010124779145458226, 'depth': 7, 'l2_leaf_reg': 3.5309792048218274, 'subsample': 0.5, 'random_strength': 6.782749000009695e-05, 'bagging_temperature': 0.525503160166038, 'border_count': 109, 'scale_pos_weight': 3.5795088740948326}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  70%|███████   | 70/100 [05:05<05:18, 10.60s/it]

[I 2025-11-12 12:21:46,625] Trial 69 finished with value: 0.568868980963046 and parameters: {'learning_rate': 0.012343054046489717, 'depth': 6, 'l2_leaf_reg': 4.970712346588461, 'subsample': 0.6, 'random_strength': 6.681191792075509e-06, 'bagging_temperature': 0.35757754627268035, 'border_count': 93, 'scale_pos_weight': 4.25338713426}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  71%|███████   | 71/100 [05:07<03:47,  7.84s/it]

[I 2025-11-12 12:21:48,018] Trial 70 finished with value: 0.5962113659022931 and parameters: {'learning_rate': 0.2075682523326636, 'depth': 7, 'l2_leaf_reg': 9.654618766893854, 'subsample': 0.6, 'random_strength': 1.7216744397352416e-05, 'bagging_temperature': 0.4498588272024665, 'border_count': 65, 'scale_pos_weight': 2.546639843782801}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  72%|███████▏  | 72/100 [05:17<04:01,  8.62s/it]

[I 2025-11-12 12:21:58,460] Trial 71 finished with value: 0.5940946282461758 and parameters: {'learning_rate': 0.01100192019212298, 'depth': 8, 'l2_leaf_reg': 2.359479948745662, 'subsample': 0.5, 'random_strength': 3.0418672384959545e-05, 'bagging_temperature': 0.015757876514693736, 'border_count': 62, 'scale_pos_weight': 2.255350930112519}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  73%|███████▎  | 73/100 [05:29<04:19,  9.60s/it]

[I 2025-11-12 12:22:10,354] Trial 72 finished with value: 0.5991531404375441 and parameters: {'learning_rate': 0.011243171209282464, 'depth': 9, 'l2_leaf_reg': 4.756689897481002, 'subsample': 0.5, 'random_strength': 3.782755526331741e-05, 'bagging_temperature': 0.002477917030256075, 'border_count': 83, 'scale_pos_weight': 2.3533856330575347}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  74%|███████▍  | 74/100 [05:38<04:08,  9.57s/it]

[I 2025-11-12 12:22:19,835] Trial 73 finished with value: 0.5908038372477671 and parameters: {'learning_rate': 0.013702187905903135, 'depth': 9, 'l2_leaf_reg': 1.8125433936043083, 'subsample': 0.5, 'random_strength': 2.5538367203454245e-06, 'bagging_temperature': 0.2217052697428128, 'border_count': 73, 'scale_pos_weight': 2.904741937690324}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  75%|███████▌  | 75/100 [05:48<03:58,  9.53s/it]

[I 2025-11-12 12:22:29,285] Trial 74 finished with value: 0.5943986820428336 and parameters: {'learning_rate': 0.014683092652117941, 'depth': 10, 'l2_leaf_reg': 0.938273262545364, 'subsample': 0.8, 'random_strength': 0.0001152303402205285, 'bagging_temperature': 0.1139182566458325, 'border_count': 83, 'scale_pos_weight': 3.1037213305297495}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  76%|███████▌  | 76/100 [06:03<04:27, 11.14s/it]

[I 2025-11-12 12:22:44,164] Trial 75 finished with value: 0.55 and parameters: {'learning_rate': 0.01552874056442314, 'depth': 10, 'l2_leaf_reg': 4.574410192696943, 'subsample': 0.5, 'random_strength': 5.204846414864664e-05, 'bagging_temperature': 0.7450873056528156, 'border_count': 52, 'scale_pos_weight': 1.2432431767426946}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  77%|███████▋  | 77/100 [06:11<03:53, 10.14s/it]

[I 2025-11-12 12:22:51,974] Trial 76 finished with value: 0.5801169590643275 and parameters: {'learning_rate': 0.012696319243495242, 'depth': 6, 'l2_leaf_reg': 6.9949015445094584, 'subsample': 0.8, 'random_strength': 9.758615116122536e-06, 'bagging_temperature': 0.8987961202238287, 'border_count': 100, 'scale_pos_weight': 1.7246715977567306}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  78%|███████▊  | 78/100 [06:17<03:20,  9.13s/it]

[I 2025-11-12 12:22:58,741] Trial 77 finished with value: 0.5856675154601674 and parameters: {'learning_rate': 0.011201756431670875, 'depth': 5, 'l2_leaf_reg': 2.9504802824936647, 'subsample': 0.9, 'random_strength': 0.0007750797758549189, 'bagging_temperature': 0.05519943982756803, 'border_count': 127, 'scale_pos_weight': 2.0062099896924677}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  79%|███████▉  | 79/100 [06:23<02:48,  8.03s/it]

[I 2025-11-12 12:23:04,226] Trial 78 finished with value: 0.5912035458574838 and parameters: {'learning_rate': 0.018356946572542256, 'depth': 9, 'l2_leaf_reg': 0.10234508847414966, 'subsample': 0.6, 'random_strength': 4.928523044440813e-06, 'bagging_temperature': 0.1809607647366011, 'border_count': 105, 'scale_pos_weight': 2.6923917239984116}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  80%|████████  | 80/100 [06:28<02:24,  7.20s/it]

[I 2025-11-12 12:23:09,489] Trial 79 finished with value: 0.5967741935483871 and parameters: {'learning_rate': 0.020691708860822354, 'depth': 6, 'l2_leaf_reg': 0.05444228333011364, 'subsample': 0.7, 'random_strength': 0.42688729609204495, 'bagging_temperature': 0.5124219995601194, 'border_count': 119, 'scale_pos_weight': 2.5756098289501703}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  81%|████████  | 81/100 [06:35<02:15,  7.14s/it]

[I 2025-11-12 12:23:16,489] Trial 80 finished with value: 0.586093920640699 and parameters: {'learning_rate': 0.010047117289675661, 'depth': 8, 'l2_leaf_reg': 0.024419334356202707, 'subsample': 0.9, 'random_strength': 0.0026557099120768034, 'bagging_temperature': 0.7045015674785402, 'border_count': 141, 'scale_pos_weight': 2.179913012722323}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  82%|████████▏ | 82/100 [06:45<02:21,  7.84s/it]

[I 2025-11-12 12:23:25,952] Trial 81 finished with value: 0.5967966573816156 and parameters: {'learning_rate': 0.011203908195876232, 'depth': 8, 'l2_leaf_reg': 4.247463831374067, 'subsample': 0.5, 'random_strength': 3.7892226963942577e-05, 'bagging_temperature': 0.0017797579135265134, 'border_count': 68, 'scale_pos_weight': 2.3542841046300205}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  83%|████████▎ | 83/100 [06:54<02:21,  8.31s/it]

[I 2025-11-12 12:23:35,368] Trial 82 finished with value: 0.596040291767975 and parameters: {'learning_rate': 0.011687865118703438, 'depth': 8, 'l2_leaf_reg': 7.225094719372236, 'subsample': 0.5, 'random_strength': 7.775998616777014e-05, 'bagging_temperature': 0.07768656773153272, 'border_count': 55, 'scale_pos_weight': 2.390385239267439}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  84%|████████▍ | 84/100 [07:04<02:21,  8.84s/it]

[I 2025-11-12 12:23:45,456] Trial 83 finished with value: 0.597199609247802 and parameters: {'learning_rate': 0.013115435902564998, 'depth': 9, 'l2_leaf_reg': 5.876400732640987, 'subsample': 0.5, 'random_strength': 0.00020297820176696916, 'bagging_temperature': 0.03499396587255742, 'border_count': 84, 'scale_pos_weight': 3.00590554162541}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  85%|████████▌ | 85/100 [07:11<02:03,  8.25s/it]

[I 2025-11-12 12:23:52,322] Trial 84 finished with value: 0.590835229119272 and parameters: {'learning_rate': 0.01311291316764183, 'depth': 9, 'l2_leaf_reg': 0.07631254183668702, 'subsample': 0.5, 'random_strength': 0.0001972260543373614, 'bagging_temperature': 0.045944125841007405, 'border_count': 71, 'scale_pos_weight': 3.0973913171113208}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  86%|████████▌ | 86/100 [07:21<02:05,  8.94s/it]

[I 2025-11-12 12:24:02,870] Trial 85 finished with value: 0.5947802197802198 and parameters: {'learning_rate': 0.016578001547087908, 'depth': 10, 'l2_leaf_reg': 1.738620757460694, 'subsample': 0.6, 'random_strength': 1.6548371975430524e-05, 'bagging_temperature': 0.5507371306804649, 'border_count': 84, 'scale_pos_weight': 2.7629923367448375}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  87%|████████▋ | 87/100 [07:31<01:57,  9.03s/it]

[I 2025-11-12 12:24:12,119] Trial 86 finished with value: 0.5902756271291422 and parameters: {'learning_rate': 0.01480542369812317, 'depth': 9, 'l2_leaf_reg': 5.606431432510547, 'subsample': 0.5, 'random_strength': 0.0003031045742292456, 'bagging_temperature': 0.11310380041525718, 'border_count': 92, 'scale_pos_weight': 3.49443066702222}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  88%|████████▊ | 88/100 [07:36<01:34,  7.87s/it]

[I 2025-11-12 12:24:17,278] Trial 87 finished with value: 0.597997138769671 and parameters: {'learning_rate': 0.01385865453042192, 'depth': 6, 'l2_leaf_reg': 0.16276107791630032, 'subsample': 0.5, 'random_strength': 0.00012660858814008335, 'bagging_temperature': 0.6306269399827237, 'border_count': 96, 'scale_pos_weight': 2.1008202663494018}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  89%|████████▉ | 89/100 [07:41<01:19,  7.19s/it]

[I 2025-11-12 12:24:22,880] Trial 88 finished with value: 0.5875440658049353 and parameters: {'learning_rate': 0.0160276095867152, 'depth': 6, 'l2_leaf_reg': 0.188455955617515, 'subsample': 0.8, 'random_strength': 0.00011804657163787634, 'bagging_temperature': 0.5822787963686183, 'border_count': 97, 'scale_pos_weight': 1.7193802737618393}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  90%|█████████ | 90/100 [07:47<01:07,  6.79s/it]

[I 2025-11-12 12:24:28,734] Trial 89 finished with value: 0.5881040892193309 and parameters: {'learning_rate': 0.018405583147956428, 'depth': 6, 'l2_leaf_reg': 0.2636554932634004, 'subsample': 0.6, 'random_strength': 9.598945987154044e-06, 'bagging_temperature': 0.6343491616669628, 'border_count': 149, 'scale_pos_weight': 1.9610706168165706}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  91%|█████████ | 91/100 [07:50<00:49,  5.48s/it]

[I 2025-11-12 12:24:31,163] Trial 90 finished with value: 0.5785903863167208 and parameters: {'learning_rate': 0.04724116232266061, 'depth': 7, 'l2_leaf_reg': 0.1479262291248487, 'subsample': 0.7, 'random_strength': 0.011706314020452508, 'bagging_temperature': 0.48328632651096376, 'border_count': 77, 'scale_pos_weight': 3.7233647223659783}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  92%|█████████▏| 92/100 [07:55<00:42,  5.36s/it]

[I 2025-11-12 12:24:36,254] Trial 91 finished with value: 0.596514745308311 and parameters: {'learning_rate': 0.014040798767870856, 'depth': 6, 'l2_leaf_reg': 0.11892297088967661, 'subsample': 0.5, 'random_strength': 0.0009718552335647859, 'bagging_temperature': 0.6678711251504664, 'border_count': 131, 'scale_pos_weight': 2.5155413052095303}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  93%|█████████▎| 93/100 [08:00<00:37,  5.34s/it]

[I 2025-11-12 12:24:41,528] Trial 92 finished with value: 0.5926868044515103 and parameters: {'learning_rate': 0.01261839863051174, 'depth': 6, 'l2_leaf_reg': 0.008023141580787423, 'subsample': 0.5, 'random_strength': 0.00017281215780818275, 'bagging_temperature': 0.576696098844444, 'border_count': 89, 'scale_pos_weight': 2.9453362560109144}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  94%|█████████▍| 94/100 [08:01<00:24,  4.08s/it]

[I 2025-11-12 12:24:42,674] Trial 93 finished with value: 0.5948609644491376 and parameters: {'learning_rate': 0.11184242996520802, 'depth': 5, 'l2_leaf_reg': 0.45560853561763315, 'subsample': 0.5, 'random_strength': 0.0005306139009941032, 'bagging_temperature': 0.5317865396274771, 'border_count': 162, 'scale_pos_weight': 2.143260700067543}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  95%|█████████▌| 95/100 [08:07<00:22,  4.48s/it]

[I 2025-11-12 12:24:48,084] Trial 94 finished with value: 0.5883441258094357 and parameters: {'learning_rate': 0.015184404561160488, 'depth': 7, 'l2_leaf_reg': 3.358481826623049, 'subsample': 0.5, 'random_strength': 5.817970343675532e-05, 'bagging_temperature': 0.6212059570647058, 'border_count': 106, 'scale_pos_weight': 3.301672333654663}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  96%|█████████▌| 96/100 [08:11<00:17,  4.38s/it]

[I 2025-11-12 12:24:52,221] Trial 95 finished with value: 0.5768455022186365 and parameters: {'learning_rate': 0.02179651079880584, 'depth': 6, 'l2_leaf_reg': 0.16591675060664277, 'subsample': 0.6, 'random_strength': 3.020435041726068e-05, 'bagging_temperature': 0.5913224457393395, 'border_count': 116, 'scale_pos_weight': 1.5807413480605794}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  97%|█████████▋| 97/100 [08:13<00:11,  3.82s/it]

[I 2025-11-12 12:24:54,736] Trial 96 finished with value: 0.5933420365535248 and parameters: {'learning_rate': 0.03780074939860058, 'depth': 7, 'l2_leaf_reg': 0.2434210634655085, 'subsample': 1.0, 'random_strength': 8.74453963660342e-05, 'bagging_temperature': 0.5538413159888863, 'border_count': 82, 'scale_pos_weight': 2.765143489909651}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  98%|█████████▊| 98/100 [08:19<00:08,  4.27s/it]

[I 2025-11-12 12:25:00,064] Trial 97 finished with value: 0.597544338335607 and parameters: {'learning_rate': 0.019446071509540686, 'depth': 6, 'l2_leaf_reg': 9.981295445259166, 'subsample': 0.5, 'random_strength': 0.003204928638265681, 'bagging_temperature': 0.5025999986312374, 'border_count': 188, 'scale_pos_weight': 2.4448368846582262}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399:  99%|█████████▉| 99/100 [08:24<00:04,  4.46s/it]

[I 2025-11-12 12:25:04,950] Trial 98 finished with value: 0.5853840417598807 and parameters: {'learning_rate': 0.019358512501146077, 'depth': 6, 'l2_leaf_reg': 0.015006796120480332, 'subsample': 0.6, 'random_strength': 0.0027460111625595917, 'bagging_temperature': 0.503484100021838, 'border_count': 191, 'scale_pos_weight': 1.9481725826251683}. Best is trial 36 with value: 0.604399323181049.


Best trial: 36. Best value: 0.604399: 100%|██████████| 100/100 [08:28<00:00,  5.09s/it]

[I 2025-11-12 12:25:09,481] Trial 99 finished with value: 0.5930388219544847 and parameters: {'learning_rate': 0.02390006623923128, 'depth': 5, 'l2_leaf_reg': 9.260999889991405, 'subsample': 0.8, 'random_strength': 0.005724255876258864, 'bagging_temperature': 0.45465569177482723, 'border_count': 201, 'scale_pos_weight': 2.5301278827071667}. Best is trial 36 with value: 0.604399323181049.

Optuna study finished.
Number of finished trials: 100

Best trial:
  Value (Max F1 Score): 0.6044
  Best Hyperparameters:
    learning_rate: 0.010431688073932016
    depth: 6
    l2_leaf_reg: 0.13650220185823378
    subsample: 1.0
    random_strength: 0.003373546298820878
    bagging_temperature: 0.5141077456444894
    border_count: 118
    scale_pos_weight: 2.4566730131089503





In [14]:
best_params_from_optuna = study.best_trial.params.copy()

rfe_params = best_params_from_optuna.copy()
rfe_params.update({
    'iterations': 1000, # Max iterations per step
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 100,
    'random_state': 123,
    'verbose': False
})

train_pool = Pool(
    data=X_train_proc, 
    label=y_train, 
    cat_features=CAT_FEATURES
)

eval_pool = Pool(
    data=X_test_proc, 
    label=y_test, 
    cat_features=CAT_FEATURES
)

model_for_rfe = CatBoostClassifier(**rfe_params)

print("Starting CatBoost RFE...")
start_time = time.time()

summary = model_for_rfe.select_features(
    train_pool,
    eval_set=eval_pool,
    algorithm=EFeaturesSelectionAlgorithm.RecursiveByLossFunctionChange,
    steps=10, 
    num_features_to_select = 50,
    features_for_select=X_train_proc.columns.tolist(),
    logging_level='Info', # Shows progress
    train_final_model=False
)

end_time = time.time()
print(f"RFE Complete. Took {end_time - start_time:.2f} seconds.")

selected_features = summary['selected_features_names']
eliminated_features = summary['eliminated_features_names']

print(f"\n--- RFE Results ---")
print(f"Total features selected: {len(selected_features)}")
print(f"Total features eliminated: {len(eliminated_features)}")
print("\nEliminated features:")
print(eliminated_features)

Starting CatBoost RFE...
Step #1 out of 10

liab_prct_cubed, bin=36 score 24.98265619
liab_x_multicar, bin=7 score 31.58215735
liab_x_highrisk_site, bin=6 score 33.82888489
witness_binary, bin=0 score 35.48765322
liab_prct_cubed, bin=40 score 36.39629048
liab_prct_cubed, bin=29 score 37.05971309

liab_prct_cubed, bin=36 score 24.58744181
liab_x_multicar, bin=7 score 31.09713309
liab_x_highrisk_site, bin=6 score 33.31774394
liab_x_witness, bin=7 score 34.96616059
liab_inverse_squared, bin=29 score 35.86737153
liab_prct, bin=30 score 36.52739751

liab_inverse_squared, bin=36 score 24.13139734
liab_x_multicar, bin=7 score 30.54758602
liab_x_highrisk_site, bin=6 score 32.74130114
liab_x_witness, bin=7 score 34.37425016
liab_prct, bin=40 score 35.26409402
liab_inverse_squared, bin=42 score 35.91847287

liab_prct_squared, bin=36 score 23.74300728
liab_x_multicar, bin=7 score 30.06973542
liab_x_highrisk_site, bin=6 score 32.24394135
liab_x_witness, bin=7 score 33.85073346
liab_prct_sqrt, bin=

In [15]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 100, # Keep early stopping
    'random_state': 123
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.010431688073932016, 'depth': 6, 'l2_leaf_reg': 0.13650220185823378, 'subsample': 1.0, 'random_strength': 0.003373546298820878, 'bagging_temperature': 0.5141077456444894, 'border_count': 118, 'scale_pos_weight': 2.4566730131089503}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.6044

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.91      0.80      0.85      4165
   Class 1.0       0.52      0.72      0.60      1235

    accuracy                           0.78      5400
   macro avg       0.71      0.76      0.73      5400
weighted avg       0.82      0.78      0.79      5400



In [16]:
X_train_top_features = X_train_proc[selected_features]
X_test_top_features = X_test_proc[selected_features]

original_cat_features = set(CAT_FEATURES)
top_features_set = set(selected_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

In [18]:
final_params_rfe = study.best_trial.params.copy()
final_params_rfe.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 100,
    'random_state': 123
})

new_model = CatBoostClassifier(**final_params_rfe)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

# Get the original F1 score from cell 13 to compare
original_f1_score = f1_score(y_test, best_model.predict(X_test_proc), pos_label=1)

print("\n--- Model Performance Comparison ---")
print(f"Original F1 score (all features): {original_f1_score:.4f}")
print(f"New F1 score ({len(selected_features)} RFE features): {new_f1:.4f}")

print("\nNew Model Classification Report (RFE Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))


--- Model Performance Comparison ---
Original F1 score (all features): 0.6044
New F1 score (50 RFE features): 0.6026

New Model Classification Report (RFE Features):
              precision    recall  f1-score   support

   Class 0.0       0.91      0.80      0.85      4165
   Class 1.0       0.52      0.72      0.60      1235

    accuracy                           0.78      5400
   macro avg       0.71      0.76      0.73      5400
weighted avg       0.82      0.78      0.79      5400



In [19]:
print("Saving best_model...")

# Use the model's F1 score in the name
best_model.save_model("catboost_mod_f1_0.6044.cbm")

print("Done.")

Saving best_model...
Done.


In [20]:
importances = best_model.get_feature_importance()
feature_names = best_model.feature_names_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("--- Feature Importance Analysis ---")
print(f"Total features: {len(feature_importance_df)}")

print("\nTop 10 Most Important Features:")
print(feature_importance_df.head(10))

print("\nBottom 10 Least Important Features:")
print(feature_importance_df.tail(10))

n_features_to_remove = 100
n_features_to_keep = len(feature_importance_df) - n_features_to_remove
top_features = feature_importance_df.head(n_features_to_keep)['feature'].tolist()

print(f"\nKeeping top {len(top_features)} features and removing bottom {n_features_to_remove}.")

X_train_top_features = X_train_proc[top_features]
X_test_top_features = X_test_proc.reindex(columns=top_features, fill_value=0) 

original_cat_features = set(CAT_FEATURES)
top_features_set = set(top_features)
new_cat_features = list(original_cat_features.intersection(top_features_set))

print(f"Original categorical features: {len(CAT_FEATURES)}")
print(f"Categorical features kept: {len(new_cat_features)}")

best_params_from_optuna = study.best_trial.params

final_params_new = best_params_from_optuna.copy()
final_params_new.update({
    'iterations': 2000, 
    'eval_metric': 'Logloss',
    'task_type': 'CPU',
    'early_stopping_rounds': 50
})

print("\nRetraining model with top features...")
new_model = CatBoostClassifier(**final_params_new)

new_model.fit(
    X_train_top_features, y_train,
    eval_set=(X_test_top_features, y_test),
    cat_features=new_cat_features,
    verbose=False
)

y_preds_new = new_model.predict(X_test_top_features)
new_f1 = f1_score(y_test, y_preds_new, pos_label=1)

print("\n--- Model Performance Comparison ---")
# 'final_f1' comes from cell 30 in your notebook
print(f"Original F1 score (all features): {final_f1:.4f}")
print(f"New F1 score (top {len(top_features)} features): {new_f1:.4f}")

print("\nNew Model Classification Report (Top Features):")
print(classification_report(y_test, y_preds_new, target_names=['Class 0.0', 'Class 1.0']))

--- Feature Importance Analysis ---
Total features: 161

Top 10 Most Important Features:
                        feature  importance
47              liab_x_multicar   13.979516
48         liab_x_highrisk_site    5.244503
152  recovery_feasibility_score    4.422504
67     is_multi_vehicle_unclear    4.329813
45               liab_x_witness    3.914721
61                liab_prct_log    2.977464
58            liab_prct_squared    2.962211
53          multicar_x_highrisk    2.834882
62                 liab_inverse    2.651149
13                    liab_prct    2.645595

Bottom 10 Least Important Features:
               feature  importance
34           is_monday         0.0
100      light_vehicle         0.0
99       heavy_vehicle         0.0
96      luxury_vehicle         0.0
95        young_novice         0.0
35           is_friday         0.0
36               is_q4         0.0
37          claim_hour         0.0
91      veteran_driver         0.0
107  very_high_mileage         0.0

Keep

In [21]:
# Output module, from model_citizens.ipynb
real_test = pd.read_csv("data/Testing_TriGuard.csv")

X_real_test_proc = pre.transform(real_test)
X_real_test_proc = X_real_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)
real_pred_proba = best_model.predict_proba(X_real_test_proc)[:, 1]
real_pred_label = (real_pred_proba >= 0.5).astype(int)

prediction = pd.DataFrame({
    "claim_number": real_test["claim_number"],
    "subrogation": real_pred_label
})

print(prediction.head())

Transforming data in 'catboost' mode...
Applying learned statistical transforms (Z-scores)...
CatBoost mode: Skipping target encoding application.
CatBoost mode: Dropping unused object/datetime columns...
Dropping: ['witness_present_ind', 'claim_date']
Transform complete.
   claim_number  subrogation
0       3126034            0
1       7380142            1
2       4655051            0
3       6728725            1
4       9848460            1


In [22]:
prediction.to_csv("results/catboost_6044_prediction.csv", index=False)