# Hyperparameter tuning

## We selected top-3 models. Let's tune their hyperparameters

# Imports

In [1]:
!pip install catboost -q
!pip install xgboost==2.0.3 -q
!pip install optuna -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.1/297.1 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import pandas as pd
import optuna
import catboost
import lightgbm as lgb
from catboost.utils import get_gpu_device_count
import xgboost as xgb
import cupy as cp
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score, balanced_accuracy_score
optuna.logging.set_verbosity(optuna.logging.INFO)

# Data

In [3]:
target = 'BANKR'

In [4]:
data = pd.read_csv('drive/MyDrive/data_catboost.csv')
X = data.drop(target, axis=1)
y = data[target]
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.15, stratify=y, random_state=42)

# Optuna

In [5]:
N_SPLITS = 3

### Catboost

In [8]:
def set_to_gpu(params):
  has_gpu = get_gpu_device_count() > 0
  params["task_type"] = "GPU" if has_gpu else "CPU"
  params["devices"] = "0" if has_gpu else ""
  return params

In [9]:
def catboost_param_space(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 800, 1800),
        "learning_rate": trial.suggest_float("learning_rate", 0.008, 0.03, log=True),
        "depth": trial.suggest_int("depth", 3, 6),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 150, 350),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 3.0, 15.0),
        "border_count": trial.suggest_int("border_count", 32, 128),
        "random_strength": trial.suggest_float("random_strength", 0.0, 1.2),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.5),
        "eval_metric": "Logloss",
        "loss_function": "Logloss",
        "verbose": 250
    }
    params = set_to_gpu(params)

    return params

In [10]:
def cv_score_catboost(params, X, y, cv, metric='balanced_accuracy'):
    scores = []

    for train_idx, val_idx in cv.split(X, y):

        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

        model = catboost.CatBoostClassifier(**params)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],  verbose=False, early_stopping_rounds=100)

        if metric == "average_precision":
            y_val_proba = model.predict_proba(X_val)[:, 1]
            score = average_precision_score(y_val, y_val_proba)
        elif metric == "balanced_accuracy":
            y_val_pred = model.predict(X_val)
            score = balanced_accuracy_score(y_val, y_val_pred)
        else:
            raise ValueError("Unknown metric")
        scores.append(score)
    score = sum(scores) / len(scores)
    return score

In [11]:
def objective_catboost(trial):
    params = catboost_param_space(trial)
    kf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
    score = cv_score_catboost(params=params,
                              X=X_train_full,
                              y=y_train_full,
                              cv=kf)
    return score


In [None]:
study_catboost = optuna.create_study(direction="maximize")
study_catboost.optimize(objective_catboost, n_trials=150)

[I 2025-11-30 13:58:54,710] A new study created in memory with name: no-name-5707417e-6fb4-4cd9-904d-ea46b767556a
[I 2025-11-30 14:00:09,721] Trial 0 finished with value: 0.8468990108790863 and parameters: {'iterations': 1648, 'learning_rate': 0.009879256933546975, 'depth': 4, 'scale_pos_weight': 172.6822765298391, 'l2_leaf_reg': 3.5535746924138056, 'border_count': 120, 'random_strength': 0.4677096070499432, 'bagging_temperature': 0.5962009640151342}. Best is trial 0 with value: 0.8468990108790863.
[I 2025-11-30 14:00:49,223] Trial 1 finished with value: 0.8543149006778995 and parameters: {'iterations': 1397, 'learning_rate': 0.02460559773182291, 'depth': 3, 'scale_pos_weight': 295.21864117153905, 'l2_leaf_reg': 10.45649631998873, 'border_count': 90, 'random_strength': 0.2892756311588285, 'bagging_temperature': 0.03670846288908697}. Best is trial 1 with value: 0.8543149006778995.
[I 2025-11-30 14:01:34,815] Trial 2 finished with value: 0.8505047963920987 and parameters: {'iterations': 

In [12]:
best_params = {'iterations': 1486,
                                               'learning_rate': 0.028711863825961668,
                                               'depth': 3,
                                               'scale_pos_weight': 304.53677886131595,
                                               'l2_leaf_reg': 14.072790632515524,
                                               'border_count': 78,
                                               'random_strength': 0.47114280903385825,
                                               'bagging_temperature': 0.5886678131427623,
                                               "verbose": 250}

best_params = set_to_gpu(best_params)

kf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
score = cv_score_catboost(params=best_params,
                              X=X_train_full,
                              y=y_train_full,
                              cv=kf,
                              metric='balanced_accuracy')
round(score, 3)

np.float64(0.856)

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = []
for train_idx, val_idx in kf.split(X_train_full, y_train_full):
        X_train, y_train = X_train_full.iloc[train_idx], y_train_full.iloc[train_idx]
        X_val, y_val = X_train_full.iloc[val_idx], y_train_full.iloc[val_idx]

        model = catboost.CatBoostClassifier(**{'iterations': 1486,
                                               'learning_rate': 0.028711863825961668,
                                               'depth': 3,
                                               'scale_pos_weight': 304.53677886131595,
                                               'l2_leaf_reg': 14.072790632515524,
                                               'border_count': 78,
                                               'random_strength': 0.47114280903385825,
                                               'bagging_temperature': 0.5886678131427623,
                                               "task_type": "GPU",
                                               "devices": "0",
                                               "verbose": 250})
        model.fit(X_train, y_train, eval_set=(X_val, y_val),  verbose=250, early_stopping_rounds=100)
        score = balanced_accuracy_score(y_val, model.predict(X_val))
        scores.append(score)

In [None]:
round(sum(scores) / len(scores), 2)

np.float64(0.86)

### XGBoost

In [None]:
def xgb_has_gpu():
    try:
        import cupy as cp
        if cp.cuda.runtime.getDeviceCount() > 0:
            return True
    except:
        pass
    try:
        import torch
        if torch.cuda.is_available():
            return True
    except:
        pass
    return False


In [None]:
def xgboost_param_space(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 500, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.05, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.1, 10.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 100, 400),
        "gamma": trial.suggest_float("gamma", 0.0, 5.0),
        "booster": "gbtree",
        "objective": "binary:logistic",
        "eval_metric": "aucpr",
        "tree_method": "hist",
        "early_stopping_rounds": 100
    }

    if xgb_has_gpu():
        params["device"] = "cuda"
    else:
        params["device"] = "cpu"
    return params

In [None]:
def cv_score_xgboost(params, X, y, cv, metric='average_precision'):
    scores = []

    for train_idx, val_idx in cv.split(X, y):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx],  y.iloc[val_idx]

        model = xgb.XGBClassifier(**params)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=250)
        if params['device']=='cuda':
          X_val_gpu = cp.asarray(X_val.values, dtype=cp.float32)
          y_val_proba_gpu = model.get_booster().inplace_predict(X_val_gpu)
          y_val_proba = cp.asnumpy(y_val_proba_gpu)

        else:
          y_val_proba = model.predict_proba(X_val)[:, 1]

        if metric == "average_precision":
            score = average_precision_score(y_val, y_val_proba)
        else:
            # more metrics may be addedd
            raise ValueError("Unknown metric")
        scores.append(score)

    return sum(scores) / len(scores)

In [None]:
def objective_xgboost(trial):
    params = xgboost_param_space(trial)
    params.setdefault("objective", "binary:logistic")
    kf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
    score = cv_score_xgboost(params=params,
                              X=X_train_full,
                              y=y_train_full,
                              cv=kf)
    return score



In [None]:
import xgboost
print(xgboost.build_info())

{'BUILTIN_PREFETCH_PRESENT': True, 'CUDA_VERSION': [11, 8], 'DEBUG': False, 'GCC_VERSION': [9, 3, 1], 'MM_PREFETCH_PRESENT': True, 'NCCL_VERSION': [2, 19, 3], 'THRUST_VERSION': [1, 15, 1], 'USE_CUDA': True, 'USE_FEDERATED': True, 'USE_NCCL': True, 'USE_OPENMP': True, 'USE_RMM': False, 'libxgboost': '/usr/local/lib/python3.12/dist-packages/xgboost/lib/libxgboost.so'}


In [None]:
study_xgboost = optuna.create_study(direction='maximize')
study_xgboost.optimize(objective_xgboost, n_trials=25)

[I 2025-11-28 18:52:14,081] A new study created in memory with name: no-name-f8166168-5ca9-4f2a-9ae2-65a526ddd2c1
[I 2025-11-28 18:54:00,669] Trial 0 finished with value: 0.11638014825474563 and parameters: {'n_estimators': 1111, 'learning_rate': 0.009759662593418681, 'max_depth': 4, 'min_child_weight': 17, 'reg_lambda': 8.434210735453131, 'reg_alpha': 4.492623525870921, 'subsample': 0.8138915355803797, 'colsample_bytree': 0.7349483535005608, 'scale_pos_weight': 138.4419244707434, 'gamma': 1.7832831888534466}. Best is trial 0 with value: 0.11638014825474563.
[I 2025-11-28 18:55:38,092] Trial 1 finished with value: 0.10819672029262477 and parameters: {'n_estimators': 1162, 'learning_rate': 0.009345078062783441, 'max_depth': 6, 'min_child_weight': 9, 'reg_lambda': 4.545975656991758, 'reg_alpha': 3.0029545973367338, 'subsample': 0.763327577511148, 'colsample_bytree': 0.6529604598948822, 'scale_pos_weight': 190.66907720633054, 'gamma': 0.7529880808267803}. Best is trial 0 with value: 0.116

##### [I 2025-11-28 18:54:00,669] Trial 0 finished with value: 0.11638014825474563 and parameters: {'n_estimators': 1111, 'learning_rate': 0.009759662593418681, 'max_depth': 4, 'min_child_weight': 17, 'reg_lambda': 8.434210735453131, 'reg_alpha': 4.492623525870921, 'subsample': 0.8138915355803797, 'colsample_bytree': 0.7349483535005608, 'scale_pos_weight': 138.4419244707434, 'gamma': 1.7832831888534466}.

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = []
for train_idx, val_idx in kf.split(X_train_full, y_train_full):
        X_train, y_train = X_train_full.iloc[train_idx], y_train_full.iloc[train_idx]
        X_val, y_val = X_train_full.iloc[val_idx], y_train_full.iloc[val_idx]

        model = xgb.XGBClassifier(**{'n_estimators': 1111,
                                     'learning_rate': 0.009759662593418681,
                                     'max_depth': 4, 'min_child_weight': 17,
                                     'reg_lambda': 8.434210735453131,
                                     'reg_alpha': 4.492623525870921,
                                     'subsample': 0.8138915355803797,
                                     'colsample_bytree': 0.7349483535005608,
                                     'scale_pos_weight': 138.4419244707434,
                                     'gamma': 1.7832831888534466,
                                     "eval_metric": "aucpr",
                                     "tree_method": "hist",
                                     "device": "cuda",
                                     "early_stopping_rounds": 100})

        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],  verbose=250)

        score = balanced_accuracy_score(y_val, model.predict(X_val))
        scores.append(score)
print(sum(scores) / len(scores))

[0]	validation_0-aucpr:0.05653
[250]	validation_0-aucpr:0.13084
[462]	validation_0-aucpr:0.13199


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




[0]	validation_0-aucpr:0.05036
[250]	validation_0-aucpr:0.09919
[500]	validation_0-aucpr:0.10252
[750]	validation_0-aucpr:0.10605
[1000]	validation_0-aucpr:0.10819
[1110]	validation_0-aucpr:0.10833
[0]	validation_0-aucpr:0.05425
[250]	validation_0-aucpr:0.12159
[500]	validation_0-aucpr:0.13521
[719]	validation_0-aucpr:0.13800
[0]	validation_0-aucpr:0.04961
[250]	validation_0-aucpr:0.10062
[500]	validation_0-aucpr:0.10714
[750]	validation_0-aucpr:0.10979
[811]	validation_0-aucpr:0.10999
[0]	validation_0-aucpr:0.04230
[250]	validation_0-aucpr:0.09153
[500]	validation_0-aucpr:0.09764
[750]	validation_0-aucpr:0.09953
[1000]	validation_0-aucpr:0.10057
[1110]	validation_0-aucpr:0.10075
0.8433335340381181


# Summary

### Catboost showed the best results