In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import catboost as cb
from catboost import CatBoostClassifier
import time

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

from cc2_preprocessor import Preprocessor

np.random.seed(42)

In [2]:
df = pd.read_csv('data/Training_TriGuard.csv')
df = df.dropna(subset=['subrogation'])

In [3]:
pre = Preprocessor(smoothing_factor=5, mode = 'catboost')

In [4]:
X = df.drop(columns=["subrogation"]).copy()
y = df["subrogation"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=0)

In [5]:
y_train.value_counts(normalize=True)

subrogation
0.0    0.77141
1.0    0.22859
Name: proportion, dtype: float64

In [6]:
y_test.value_counts(normalize=True)

subrogation
0.0    0.771296
1.0    0.228704
Name: proportion, dtype: float64

In [7]:
pre.fit(X_train, y_train)

X_train_proc = pre.transform(X_train)
X_test_proc = pre.transform(X_test)

X_test_proc = X_test_proc.reindex(columns=X_train_proc.columns, fill_value=0)

Fitting Preprocessor in 'catboost' mode...
CatBoost mode: Skipping target encoding learning.
Fit complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
Transform complete.
Transforming data in 'catboost' mode...
CatBoost mode: Skipping target encoding application.
Transform complete.


## Vanilla CatBoost

In [8]:
cb_clf = cb.CatBoostClassifier(
    objective='Logloss',
    random_state=42,
    thread_count=-1
)

In [9]:
cat_feature_names = pre.cat_for_encoding_
cb_clf.fit(X_train_proc, y_train, 
           cat_features=cat_feature_names,
           verbose=False)

<catboost.core.CatBoostClassifier at 0x118c9c590>

In [10]:
test_probabilities = cb_clf.predict_proba(X_test_proc)[:, 1]

test_classes = cb_clf.predict(X_test_proc)

print(f"Accuracy: {accuracy_score(y_test, test_classes)}")
print(f"F1 Score: {f1_score(y_test, test_classes)}")
print(f"ROC AUC Score: {roc_auc_score(y_test, test_probabilities)}") # Use probabilities
print(f"PR AUC (Average Precision): {average_precision_score(y_test, test_probabilities)}") # Use probabilities
print(f"Precision: {precision_score(y_test, test_classes)}")
print(f"Recall: {recall_score(y_test, test_classes)}")

Accuracy: 0.8112962962962963
F1 Score: 0.5108017282765243
ROC AUC Score: 0.8344394923961488
PR AUC (Average Precision): 0.6043784760678073
Precision: 0.6273584905660378
Recall: 0.4307692307692308


## Optuna

In [11]:
import optuna
from optuna.integration import CatBoostPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
CAT_FEATURES = pre.cat_for_encoding_
print(CAT_FEATURES)

['accident_site', 'accident_type', 'channel', 'vehicle_category', 'vehicle_color', 'living_status', 'claim_day_of_week', 'gender', 'in_network_bodyshop', 'season', 'witness_present_ind']


In [13]:
def objective(trial: optuna.trial.Trial) -> float:

    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 1.0, log=True), 
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 10.0),
        
        'eval_metric': 'F1',
        'task_type': 'CPU',
        'verbose': False,
        'early_stopping_rounds': 100
    }

    params['eval_metric'] = 'Logloss'
    
    model = CatBoostClassifier(**params)
    
    model.fit(
        X_train_proc, y_train,
        eval_set=(X_test_proc, y_test),
        cat_features=CAT_FEATURES,
        verbose=False
    )

    y_preds = model.predict(X_test_proc)
    
    manual_f1_score = f1_score(y_test, y_preds, pos_label=1)
    
    return manual_f1_score

In [14]:
print("\n2. Starting Optuna study...")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)

study.optimize(
    objective, 
    n_trials=50, # Number of trials to run
    show_progress_bar=True
)

print("\n" + "="*50)
print("Optuna study finished.")
print(f"Number of finished trials: {len(study.trials)}")

print("\nBest trial:")
best_trial = study.best_trial
    
print(f"  Value (Max F1 Score): {best_trial.value:.4f}") # <-- CHANGED comment
    
print("  Best Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-11-06 16:05:00,457] A new study created in memory with name: no-name-387d49d2-eeb1-4786-ab11-e40c2719700f



2. Starting Optuna study...


Best trial: 0. Best value: 0.58816:   2%|▏         | 1/50 [00:02<01:49,  2.23s/it]

[I 2025-11-06 16:05:02,698] Trial 0 finished with value: 0.58816 and parameters: {'learning_rate': 0.046979096320275124, 'depth': 4, 'l2_leaf_reg': 0.008851643989766985, 'subsample': 0.8, 'random_strength': 0.035380357300219614, 'bagging_temperature': 0.3311429374194401, 'border_count': 244, 'scale_pos_weight': 2.817846828417797}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:   4%|▍         | 2/50 [00:03<01:09,  1.45s/it]

[I 2025-11-06 16:05:03,595] Trial 1 finished with value: 0.5875432525951557 and parameters: {'learning_rate': 0.1935435205268542, 'depth': 5, 'l2_leaf_reg': 0.014710383759187094, 'subsample': 0.6, 'random_strength': 0.19776024151909286, 'bagging_temperature': 0.4019058015683371, 'border_count': 174, 'scale_pos_weight': 2.2552811119697744}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:   6%|▌         | 3/50 [00:08<02:34,  3.28s/it]

[I 2025-11-06 16:05:09,061] Trial 2 finished with value: 0.5232390991854337 and parameters: {'learning_rate': 0.017304484759682163, 'depth': 5, 'l2_leaf_reg': 0.04353550302118484, 'subsample': 0.9, 'random_strength': 9.612704109396776e-06, 'bagging_temperature': 0.01045031875603486, 'border_count': 157, 'scale_pos_weight': 1.0462456834770593}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:   8%|▊         | 4/50 [00:11<02:19,  3.04s/it]

[I 2025-11-06 16:05:11,727] Trial 3 finished with value: 0.5628385698808234 and parameters: {'learning_rate': 0.0688486807731047, 'depth': 8, 'l2_leaf_reg': 0.3320988745964888, 'subsample': 0.9, 'random_strength': 0.005382783857565332, 'bagging_temperature': 0.602279081826304, 'border_count': 38, 'scale_pos_weight': 5.0289878713789715}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  10%|█         | 5/50 [00:16<02:46,  3.71s/it]

[I 2025-11-06 16:05:16,622] Trial 4 finished with value: 0.5733596170092932 and parameters: {'learning_rate': 0.014728692280982083, 'depth': 3, 'l2_leaf_reg': 0.03388387902936107, 'subsample': 1.0, 'random_strength': 4.904011353955327e-05, 'bagging_temperature': 0.4776800950552268, 'border_count': 252, 'scale_pos_weight': 4.106444669712607}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  12%|█▏        | 6/50 [00:22<03:29,  4.76s/it]

[I 2025-11-06 16:05:23,436] Trial 5 finished with value: 0.5301495417269657 and parameters: {'learning_rate': 0.033853162377551566, 'depth': 10, 'l2_leaf_reg': 5.828375414111025, 'subsample': 0.8, 'random_strength': 6.898295579592341e-08, 'bagging_temperature': 0.6626392078893227, 'border_count': 91, 'scale_pos_weight': 9.77161100629722}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  14%|█▍        | 7/50 [00:24<02:41,  3.75s/it]

[I 2025-11-06 16:05:25,084] Trial 6 finished with value: 0.5206479580196213 and parameters: {'learning_rate': 0.1137461501263439, 'depth': 7, 'l2_leaf_reg': 0.4028523953282956, 'subsample': 0.5, 'random_strength': 0.0030606113461992823, 'bagging_temperature': 0.4052572744863644, 'border_count': 232, 'scale_pos_weight': 9.423631273657335}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  16%|█▌        | 8/50 [00:26<02:09,  3.09s/it]

[I 2025-11-06 16:05:26,772] Trial 7 finished with value: 0.5716566866267465 and parameters: {'learning_rate': 0.05942541832662259, 'depth': 4, 'l2_leaf_reg': 0.03953551217107074, 'subsample': 0.8, 'random_strength': 0.001752563544636421, 'bagging_temperature': 0.6079464173864577, 'border_count': 234, 'scale_pos_weight': 1.6007501268022226}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  18%|█▊        | 9/50 [00:27<01:41,  2.48s/it]

[I 2025-11-06 16:05:27,924] Trial 8 finished with value: 0.5767905017234776 and parameters: {'learning_rate': 0.2252913845994247, 'depth': 5, 'l2_leaf_reg': 7.775442247662198, 'subsample': 0.6, 'random_strength': 0.0037294156211921375, 'bagging_temperature': 0.6801749001115472, 'border_count': 172, 'scale_pos_weight': 1.8139920622944312}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  20%|██        | 10/50 [00:29<01:28,  2.22s/it]

[I 2025-11-06 16:05:29,542] Trial 9 finished with value: 0.5717579250720461 and parameters: {'learning_rate': 0.06593067557300361, 'depth': 3, 'l2_leaf_reg': 0.0010081267008567472, 'subsample': 0.5, 'random_strength': 0.036650074687523994, 'bagging_temperature': 0.018515825494052507, 'border_count': 234, 'scale_pos_weight': 3.8663319288377744}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  22%|██▏       | 11/50 [00:31<01:28,  2.26s/it]

[I 2025-11-06 16:05:31,888] Trial 10 finished with value: 0.5330449411199231 and parameters: {'learning_rate': 0.029265308593494996, 'depth': 7, 'l2_leaf_reg': 0.002855387187207495, 'subsample': 0.7, 'random_strength': 0.5726298923962275, 'bagging_temperature': 0.9524960568116065, 'border_count': 111, 'scale_pos_weight': 6.865696337688695}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  24%|██▍       | 12/50 [00:32<01:10,  1.86s/it]

[I 2025-11-06 16:05:32,849] Trial 11 finished with value: 0.5815516188149054 and parameters: {'learning_rate': 0.2725563209729562, 'depth': 5, 'l2_leaf_reg': 0.00675835108728658, 'subsample': 0.7, 'random_strength': 0.8449668615485003, 'bagging_temperature': 0.30406151346549704, 'border_count': 192, 'scale_pos_weight': 3.016718170801257}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  26%|██▌       | 13/50 [00:33<01:03,  1.70s/it]

[I 2025-11-06 16:05:34,186] Trial 12 finished with value: 0.5813350615683733 and parameters: {'learning_rate': 0.13699166748386352, 'depth': 6, 'l2_leaf_reg': 0.009425373659036047, 'subsample': 0.6, 'random_strength': 0.12165026811864917, 'bagging_temperature': 0.2963817995325921, 'border_count': 197, 'scale_pos_weight': 2.76293767174532}. Best is trial 0 with value: 0.58816.


Best trial: 0. Best value: 0.58816:  28%|██▊       | 14/50 [00:34<00:53,  1.47s/it]

[I 2025-11-06 16:05:35,133] Trial 13 finished with value: 0.5344701583434835 and parameters: {'learning_rate': 0.12055126003038016, 'depth': 4, 'l2_leaf_reg': 0.010008007135726808, 'subsample': 0.6, 'random_strength': 5.503276739032583e-07, 'bagging_temperature': 0.19529251455062152, 'border_count': 114, 'scale_pos_weight': 6.645068984241121}. Best is trial 0 with value: 0.58816.


Best trial: 14. Best value: 0.593679:  30%|███       | 15/50 [00:37<01:05,  1.86s/it]

[I 2025-11-06 16:05:37,893] Trial 14 finished with value: 0.5936787227109808 and parameters: {'learning_rate': 0.03487952752925363, 'depth': 4, 'l2_leaf_reg': 0.2045450394513702, 'subsample': 0.7, 'random_strength': 0.0002832276699549081, 'bagging_temperature': 0.26396963199449264, 'border_count': 200, 'scale_pos_weight': 2.7287433600049016}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  32%|███▏      | 16/50 [00:40<01:16,  2.25s/it]

[I 2025-11-06 16:05:41,044] Trial 15 finished with value: 0.5543841604525584 and parameters: {'learning_rate': 0.029612214621440768, 'depth': 3, 'l2_leaf_reg': 0.2513922102381673, 'subsample': 0.8, 'random_strength': 9.238082616277385e-05, 'bagging_temperature': 0.13244059695021493, 'border_count': 211, 'scale_pos_weight': 5.487603469592041}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  34%|███▍      | 17/50 [00:46<01:53,  3.44s/it]

[I 2025-11-06 16:05:47,249] Trial 16 finished with value: 0.5734226689000559 and parameters: {'learning_rate': 0.010016623919588252, 'depth': 4, 'l2_leaf_reg': 0.9002296868360575, 'subsample': 0.9, 'random_strength': 2.3337743373077685e-06, 'bagging_temperature': 0.22123637963484652, 'border_count': 253, 'scale_pos_weight': 4.23535349422068}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  36%|███▌      | 18/50 [00:51<01:59,  3.74s/it]

[I 2025-11-06 16:05:51,695] Trial 17 finished with value: 0.5446247464503042 and parameters: {'learning_rate': 0.043461187964782586, 'depth': 9, 'l2_leaf_reg': 1.8598293704279876, 'subsample': 0.7, 'random_strength': 0.0004831647184670957, 'bagging_temperature': 0.8359953115073049, 'border_count': 131, 'scale_pos_weight': 6.767519825762708}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  38%|███▊      | 19/50 [00:56<02:08,  4.15s/it]

[I 2025-11-06 16:05:56,783] Trial 18 finished with value: 0.5843672456575683 and parameters: {'learning_rate': 0.019409756326886088, 'depth': 6, 'l2_leaf_reg': 0.1313188876208193, 'subsample': 1.0, 'random_strength': 0.023728641866349324, 'bagging_temperature': 0.34405912663121563, 'border_count': 201, 'scale_pos_weight': 3.3127762846257824}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  40%|████      | 20/50 [00:58<01:47,  3.57s/it]

[I 2025-11-06 16:05:59,003] Trial 19 finished with value: 0.5632674853176722 and parameters: {'learning_rate': 0.04225309941131606, 'depth': 4, 'l2_leaf_reg': 0.09905255648298267, 'subsample': 0.8, 'random_strength': 0.00037418327440405606, 'bagging_temperature': 0.4840930550019609, 'border_count': 223, 'scale_pos_weight': 4.883248653994913}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 14. Best value: 0.593679:  42%|████▏     | 21/50 [00:59<01:23,  2.89s/it]

[I 2025-11-06 16:06:00,310] Trial 20 finished with value: 0.5183318056828597 and parameters: {'learning_rate': 0.0803002494095258, 'depth': 6, 'l2_leaf_reg': 0.002643405064527379, 'subsample': 0.7, 'random_strength': 2.0878352580977654e-05, 'bagging_temperature': 0.13526028161614703, 'border_count': 177, 'scale_pos_weight': 8.342247151560835}. Best is trial 14 with value: 0.5936787227109808.


Best trial: 21. Best value: 0.594254:  44%|████▍     | 22/50 [01:00<01:04,  2.31s/it]

[I 2025-11-06 16:06:01,273] Trial 21 finished with value: 0.5942536790469517 and parameters: {'learning_rate': 0.17517914221079733, 'depth': 5, 'l2_leaf_reg': 0.017919639796803905, 'subsample': 0.6, 'random_strength': 0.08934821099204092, 'bagging_temperature': 0.37892869614656927, 'border_count': 165, 'scale_pos_weight': 2.17796574135835}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  46%|████▌     | 23/50 [01:05<01:18,  2.90s/it]

[I 2025-11-06 16:06:05,548] Trial 22 finished with value: 0.5862542955326461 and parameters: {'learning_rate': 0.023524577974089394, 'depth': 4, 'l2_leaf_reg': 0.023024936800200166, 'subsample': 0.7, 'random_strength': 0.024855157390075523, 'bagging_temperature': 0.24426919241011386, 'border_count': 153, 'scale_pos_weight': 2.3426907810484425}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  48%|████▊     | 24/50 [01:06<01:04,  2.47s/it]

[I 2025-11-06 16:06:07,002] Trial 23 finished with value: 0.5795284989555357 and parameters: {'learning_rate': 0.0902838663930691, 'depth': 5, 'l2_leaf_reg': 0.08547514712015529, 'subsample': 0.5, 'random_strength': 0.0007061671503481515, 'bagging_temperature': 0.39573992280167014, 'border_count': 139, 'scale_pos_weight': 3.5051863896766235}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  50%|█████     | 25/50 [01:08<00:58,  2.36s/it]

[I 2025-11-06 16:06:09,109] Trial 24 finished with value: 0.5648389904264578 and parameters: {'learning_rate': 0.04435616166294732, 'depth': 3, 'l2_leaf_reg': 0.0034317293910926575, 'subsample': 0.6, 'random_strength': 0.08442501671920413, 'bagging_temperature': 0.5322382258830676, 'border_count': 213, 'scale_pos_weight': 1.2792698236250604}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  52%|█████▏    | 26/50 [01:10<00:56,  2.35s/it]

[I 2025-11-06 16:06:11,440] Trial 25 finished with value: 0.5894154271878242 and parameters: {'learning_rate': 0.051242993180046596, 'depth': 4, 'l2_leaf_reg': 0.0661093091518853, 'subsample': 0.8, 'random_strength': 0.013518423664670542, 'bagging_temperature': 0.12615158586062972, 'border_count': 178, 'scale_pos_weight': 2.342556611038651}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  54%|█████▍    | 27/50 [01:12<00:47,  2.04s/it]

[I 2025-11-06 16:06:12,769] Trial 26 finished with value: 0.5811011904761905 and parameters: {'learning_rate': 0.16753126162071683, 'depth': 6, 'l2_leaf_reg': 0.948679676756493, 'subsample': 0.7, 'random_strength': 0.007153197678518989, 'bagging_temperature': 0.10763684956545473, 'border_count': 184, 'scale_pos_weight': 1.947304148006884}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  56%|█████▌    | 28/50 [01:13<00:42,  1.93s/it]

[I 2025-11-06 16:06:14,446] Trial 27 finished with value: 0.5918576804652754 and parameters: {'learning_rate': 0.09797668141980785, 'depth': 5, 'l2_leaf_reg': 0.1656992143260512, 'subsample': 0.9, 'random_strength': 0.0001677507760083559, 'bagging_temperature': 0.11133110018422943, 'border_count': 158, 'scale_pos_weight': 2.3604987052798543}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  58%|█████▊    | 29/50 [01:15<00:39,  1.87s/it]

[I 2025-11-06 16:06:16,155] Trial 28 finished with value: 0.5398622047244095 and parameters: {'learning_rate': 0.09548446255221738, 'depth': 7, 'l2_leaf_reg': 0.21095811631801348, 'subsample': 0.9, 'random_strength': 8.306167998274236e-06, 'bagging_temperature': 0.06044473866426539, 'border_count': 161, 'scale_pos_weight': 6.145531104620808}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  60%|██████    | 30/50 [01:16<00:33,  1.66s/it]

[I 2025-11-06 16:06:17,349] Trial 29 finished with value: 0.587655942219304 and parameters: {'learning_rate': 0.15384921701025375, 'depth': 5, 'l2_leaf_reg': 0.6360946867761581, 'subsample': 1.0, 'random_strength': 0.00015853848285730685, 'bagging_temperature': 0.21754126702279314, 'border_count': 79, 'scale_pos_weight': 2.638229122274036}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  62%|██████▏   | 31/50 [01:17<00:28,  1.50s/it]

[I 2025-11-06 16:06:18,454] Trial 30 finished with value: 0.5773739742086753 and parameters: {'learning_rate': 0.2705346300995585, 'depth': 6, 'l2_leaf_reg': 1.941467138533925, 'subsample': 0.6, 'random_strength': 3.5795226008183155e-08, 'bagging_temperature': 0.27649185540987176, 'border_count': 128, 'scale_pos_weight': 3.616618989474895}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  64%|██████▍   | 32/50 [01:20<00:30,  1.71s/it]

[I 2025-11-06 16:06:20,676] Trial 31 finished with value: 0.5853337717855969 and parameters: {'learning_rate': 0.05269504435240573, 'depth': 4, 'l2_leaf_reg': 0.06868414321820818, 'subsample': 0.9, 'random_strength': 0.013405221295276484, 'bagging_temperature': 0.15929892627731057, 'border_count': 165, 'scale_pos_weight': 2.6483139746486595}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  66%|██████▌   | 33/50 [01:21<00:26,  1.57s/it]

[I 2025-11-06 16:06:21,897] Trial 32 finished with value: 0.5874423554451933 and parameters: {'learning_rate': 0.19638052275870277, 'depth': 5, 'l2_leaf_reg': 0.02340662249802055, 'subsample': 0.8, 'random_strength': 0.0012339642887144718, 'bagging_temperature': 0.08146767586941892, 'border_count': 144, 'scale_pos_weight': 2.112768009828224}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  68%|██████▊   | 34/50 [01:26<00:40,  2.52s/it]

[I 2025-11-06 16:06:26,650] Trial 33 finished with value: 0.5381040892193308 and parameters: {'learning_rate': 0.03519046090869415, 'depth': 4, 'l2_leaf_reg': 0.17168835980238012, 'subsample': 0.8, 'random_strength': 0.27356876302825794, 'bagging_temperature': 0.3482795761139151, 'border_count': 184, 'scale_pos_weight': 1.099229174630339}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  70%|███████   | 35/50 [01:27<00:32,  2.17s/it]

[I 2025-11-06 16:06:27,996] Trial 34 finished with value: 0.5659967409016838 and parameters: {'learning_rate': 0.0783042841873058, 'depth': 5, 'l2_leaf_reg': 0.01598607154296452, 'subsample': 0.9, 'random_strength': 0.00022174343620807497, 'bagging_temperature': 0.16304674676100117, 'border_count': 155, 'scale_pos_weight': 4.379647865656415}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  72%|███████▏  | 36/50 [01:28<00:25,  1.85s/it]

[I 2025-11-06 16:06:29,109] Trial 35 finished with value: 0.5875 and parameters: {'learning_rate': 0.11731344662379657, 'depth': 3, 'l2_leaf_reg': 0.07046849181432564, 'subsample': 0.9, 'random_strength': 0.06871569240028318, 'bagging_temperature': 0.40056942002776397, 'border_count': 205, 'scale_pos_weight': 3.0431695977735687}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  74%|███████▍  | 37/50 [01:30<00:25,  1.96s/it]

[I 2025-11-06 16:06:31,305] Trial 36 finished with value: 0.5717712914834067 and parameters: {'learning_rate': 0.05510432913318776, 'depth': 5, 'l2_leaf_reg': 0.045079296263944225, 'subsample': 0.7, 'random_strength': 4.131764490612984e-05, 'bagging_temperature': 0.006164501962277191, 'border_count': 185, 'scale_pos_weight': 1.6149153820429363}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  76%|███████▌  | 38/50 [01:35<00:34,  2.87s/it]

[I 2025-11-06 16:06:36,294] Trial 37 finished with value: 0.5912684771399106 and parameters: {'learning_rate': 0.02351196236323567, 'depth': 3, 'l2_leaf_reg': 0.4982877958302131, 'subsample': 0.8, 'random_strength': 4.2369406735701155e-06, 'bagging_temperature': 0.2596517024877365, 'border_count': 169, 'scale_pos_weight': 2.3168789334930966}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  78%|███████▊  | 39/50 [01:41<00:39,  3.59s/it]

[I 2025-11-06 16:06:41,561] Trial 38 finished with value: 0.5670913526701002 and parameters: {'learning_rate': 0.012977146806213318, 'depth': 3, 'l2_leaf_reg': 0.5097508635384178, 'subsample': 1.0, 'random_strength': 9.088407321010118e-07, 'bagging_temperature': 0.44493496109261627, 'border_count': 145, 'scale_pos_weight': 4.513944018704394}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  80%|████████  | 40/50 [01:46<00:40,  4.00s/it]

[I 2025-11-06 16:06:46,534] Trial 39 finished with value: 0.5604821351700388 and parameters: {'learning_rate': 0.021717667050714567, 'depth': 8, 'l2_leaf_reg': 0.3434346884228542, 'subsample': 0.8, 'random_strength': 1.3898930975026733e-07, 'bagging_temperature': 0.5502839968542582, 'border_count': 170, 'scale_pos_weight': 1.4297988255816865}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  82%|████████▏ | 41/50 [01:51<00:39,  4.43s/it]

[I 2025-11-06 16:06:51,947] Trial 40 finished with value: 0.5902802412202909 and parameters: {'learning_rate': 0.02543999284598748, 'depth': 3, 'l2_leaf_reg': 2.3347768977469743, 'subsample': 0.6, 'random_strength': 5.311472570929091e-06, 'bagging_temperature': 0.25829149721496025, 'border_count': 118, 'scale_pos_weight': 2.0960946659301816}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  84%|████████▍ | 42/50 [01:55<00:33,  4.21s/it]

[I 2025-11-06 16:06:55,669] Trial 41 finished with value: 0.5926449787835927 and parameters: {'learning_rate': 0.023558002272652125, 'depth': 3, 'l2_leaf_reg': 1.0745643371894174, 'subsample': 0.5, 'random_strength': 4.305703504416459e-06, 'bagging_temperature': 0.263620718294536, 'border_count': 70, 'scale_pos_weight': 2.113921970751892}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  86%|████████▌ | 43/50 [01:59<00:29,  4.24s/it]

[I 2025-11-06 16:06:59,958] Trial 42 finished with value: 0.5791828213540113 and parameters: {'learning_rate': 0.015970887438824848, 'depth': 3, 'l2_leaf_reg': 1.1962856834443603, 'subsample': 0.5, 'random_strength': 1.9249158525572238e-05, 'bagging_temperature': 0.34175882851877853, 'border_count': 50, 'scale_pos_weight': 3.389533599138505}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  88%|████████▊ | 44/50 [02:02<00:22,  3.82s/it]

[I 2025-11-06 16:07:02,792] Trial 43 finished with value: 0.5925437149455626 and parameters: {'learning_rate': 0.03611282867590995, 'depth': 3, 'l2_leaf_reg': 0.1516275140690161, 'subsample': 0.5, 'random_strength': 2.9511873480383397e-06, 'bagging_temperature': 0.18466157739927655, 'border_count': 92, 'scale_pos_weight': 2.558502466303026}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  90%|█████████ | 45/50 [02:06<00:19,  3.83s/it]

[I 2025-11-06 16:07:06,657] Trial 44 finished with value: 0.5782286383144752 and parameters: {'learning_rate': 0.03704206918029225, 'depth': 4, 'l2_leaf_reg': 0.14418312600302835, 'subsample': 0.5, 'random_strength': 1.0959273929456317e-08, 'bagging_temperature': 0.18734640299923028, 'border_count': 75, 'scale_pos_weight': 1.6757371415445275}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  92%|█████████▏| 46/50 [02:10<00:15,  3.92s/it]

[I 2025-11-06 16:07:10,790] Trial 45 finished with value: 0.5867170286433743 and parameters: {'learning_rate': 0.03178437209775508, 'depth': 3, 'l2_leaf_reg': 0.2977435276295247, 'subsample': 0.5, 'random_strength': 1.3243913238638372e-06, 'bagging_temperature': 0.06772097335432428, 'border_count': 55, 'scale_pos_weight': 2.9790253294571447}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  94%|█████████▍| 47/50 [02:12<00:10,  3.37s/it]

[I 2025-11-06 16:07:12,863] Trial 46 finished with value: 0.5217807563427477 and parameters: {'learning_rate': 0.06699613847334379, 'depth': 5, 'l2_leaf_reg': 0.04614004201032953, 'subsample': 0.5, 'random_strength': 2.4714011299830087e-07, 'bagging_temperature': 0.4288549598219754, 'border_count': 103, 'scale_pos_weight': 1.0290599122728867}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  96%|█████████▌| 48/50 [02:15<00:06,  3.36s/it]

[I 2025-11-06 16:07:16,205] Trial 47 finished with value: 0.5887819449054099 and parameters: {'learning_rate': 0.02686841985334969, 'depth': 4, 'l2_leaf_reg': 5.561288542465458, 'subsample': 0.6, 'random_strength': 2.9955031738753685e-05, 'bagging_temperature': 0.32802593833342997, 'border_count': 32, 'scale_pos_weight': 2.5707047257636635}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254:  98%|█████████▊| 49/50 [02:18<00:03,  3.03s/it]

[I 2025-11-06 16:07:18,476] Trial 48 finished with value: 0.5742800114057599 and parameters: {'learning_rate': 0.038466748594772344, 'depth': 4, 'l2_leaf_reg': 4.574324290928468, 'subsample': 0.5, 'random_strength': 0.0017743416811693633, 'bagging_temperature': 0.2022584974305492, 'border_count': 68, 'scale_pos_weight': 3.8923806669455265}. Best is trial 21 with value: 0.5942536790469517.


Best trial: 21. Best value: 0.594254: 100%|██████████| 50/50 [02:22<00:00,  2.85s/it]

[I 2025-11-06 16:07:22,876] Trial 49 finished with value: 0.5852568875651526 and parameters: {'learning_rate': 0.018466052244111894, 'depth': 3, 'l2_leaf_reg': 0.125077522843349, 'subsample': 0.6, 'random_strength': 5.940204024730539e-05, 'bagging_temperature': 0.29866455012135673, 'border_count': 90, 'scale_pos_weight': 1.8191341664285408}. Best is trial 21 with value: 0.5942536790469517.

Optuna study finished.
Number of finished trials: 50

Best trial:
  Value (Max F1 Score): 0.5943
  Best Hyperparameters:
    learning_rate: 0.17517914221079733
    depth: 5
    l2_leaf_reg: 0.017919639796803905
    subsample: 0.6
    random_strength: 0.08934821099204092
    bagging_temperature: 0.37892869614656927
    border_count: 165
    scale_pos_weight: 2.17796574135835





In [15]:
best_params = study.best_trial.params
print(best_params)

final_params = best_params.copy()
final_params.update({
    'iterations': 2000, # Use more iterations for the final model
    'eval_metric': 'Logloss', # Use Logloss for training/stopping
    'task_type': 'CPU',
    'early_stopping_rounds': 50 # Keep early stopping
})

best_model = CatBoostClassifier(**final_params)

best_model.fit(
    X_train_proc, y_train,
    eval_set=(X_test_proc, y_test),
    cat_features=CAT_FEATURES,
    verbose=False
)

print(f"\nFinal Model Score (from best Logloss iteration):")
y_preds_final = best_model.predict(X_test_proc)
final_f1 = f1_score(y_test, y_preds_final, pos_label=1)
print(f"  Manual F1:class=1 Score: {final_f1:.4f}")
        
print("\n  Full Classification Report:")
print(classification_report(y_test, y_preds_final, target_names=['Class 0.0', 'Class 1.0']))

{'learning_rate': 0.17517914221079733, 'depth': 5, 'l2_leaf_reg': 0.017919639796803905, 'subsample': 0.6, 'random_strength': 0.08934821099204092, 'bagging_temperature': 0.37892869614656927, 'border_count': 165, 'scale_pos_weight': 2.17796574135835}

Final Model Score (from best Logloss iteration):
  Manual F1:class=1 Score: 0.5943

  Full Classification Report:
              precision    recall  f1-score   support

   Class 0.0       0.90      0.81      0.85      4165
   Class 1.0       0.52      0.69      0.59      1235

    accuracy                           0.79      5400
   macro avg       0.71      0.75      0.72      5400
weighted avg       0.81      0.79      0.79      5400

