# Import

In [1]:
import random
import os
from collections import defaultdict
from tqdm import tqdm
import csv
from datetime import datetime


import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from sklearn.metrics import recall_score, classification_report, confusion_matrix, make_scorer
from sklearn.inspection import permutation_importance
import xgboost as xgb
import matplotlib.pyplot as plt
import optuna


def set_all_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    

def log(record, name: str = 'log__tuning.csv'):
    with open(name, 'a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f, delimiter=';', dialect='excel')
        writer.writerow([datetime.now()] + [record])


random_seed = 1234
set_all_seeds(random_seed)

  from .autonotebook import tqdm as notebook_tqdm


# Parameters

In [2]:
dataset_path = 'data/dataset_after_eda.parquet'
target_column = 'target'
datetime_column = '17'
time_split = '2025-04-15'

# Hyperparameter tuning

## Functions

In [3]:
def lprint(rec):
    try:
        log(rec)
        print(rec)
    except:
        r = 'Logging Error'
        log(r)
        print(r)

def model_report(y_true, y_pred, model_name = '_'):
    print('\n---------------------------------------------------------------')
    print(f"Model: {model_name}")
    print(f"Test data length: {len(y_true)}")
    print(f"Fraud sessions in test data: {sum(y_true)}")
    
    print(classification_report(y_true, y_pred))
    
    if len(set(y_true))>1:
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()        
        
        print(f'\nTN: {tn}')
        print(f'FP: {fp}')
        print(f'FN: {fn}')
        print(f'TP: {tp}')   
        
        fpr = fp/(fp+tn) 
        fnr = fn/(fn+tp)
        print('False Negative Rate = ',fnr)
        print('False Positive Rate = ',fpr)
    else:
        print('One label, fpr is out function scope.')
        

def find_threshold_fpr(y_true, y_pred_proba, target_fpr):
    sorted_indices = np.argsort(y_pred_proba)[::-1]
    y_true_sorted = y_true[sorted_indices]
    y_pred_proba_sorted = y_pred_proba[sorted_indices]
    false_pos = 0.0
    threshold = 0.0
    for i in range(len(y_true_sorted)):
        if y_true_sorted[i] == 0:
            false_pos += 1
        if false_pos / len(y_true_sorted) >= target_fpr:
            threshold = y_pred_proba_sorted[i]
            break
    return threshold


def get_recall_on_fpr(y_true, y_proba, target_fpr):
    threshold = find_threshold_fpr(y_true, y_proba, target_fpr)
    y_pred_binary = np.where(y_proba >= threshold, 1, 0)
    score = recall_score(y_true, y_pred_binary)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred_binary).ravel()
    fpr = fp / (fp + tn)
    if fpr > target_fpr * 2:
        return 0
    return score


def train_test_split_df(X, 
                        y, 
                        class_probabilities={0:0.2,1:0.2}, 
                        shuffle=True,
                        random_state=random_seed):
    """
    A dictionary of shares is used with each class. These data shares are recorded for sample in the test set.
    Example: {0:0.02, 1:0.02}
    return:
        X_train, X_test, y_train, y_test
    """
    
    if random_state is not None:
        random.seed(random_state)

    class_indices = defaultdict(list)

    for idx, label in enumerate(y.to_numpy()):
        class_indices[label].append(idx)

    X_train, X_test, y_train, y_test = [], [], [], []

    for label, prob in class_probabilities.items():
        indices = class_indices[label]
        split_index = int((1 - prob) * len(indices))
        if shuffle:
            random.shuffle(indices)

        train_indices, test_indices = indices[:split_index], indices[split_index:]

        X_train.append(X.iloc[train_indices])
        X_test.append(X.iloc[test_indices])
        y_train.append(y.iloc[train_indices])
        y_test.append(y.iloc[test_indices]) 
        
    X_train = pd.concat([*X_train],ignore_index=True)
    X_test = pd.concat([*X_test],ignore_index=True)
    y_train = pd.concat([*y_train],ignore_index=True)
    y_test = pd.concat([*y_test],ignore_index=True)
    
    return X_train, X_test, y_train, y_test
    

def make_test(df, target_column, datetime_column, split_date, class_0_share=0.1):
    
    df[datetime_column] = pd.to_datetime(df[datetime_column])
    
    dt_train_fraud = df[(df[datetime_column] < split_date) & (df[target_column]==1)]
    df_test_fraud = df[(df[datetime_column] >= split_date) & (df[target_column]==1)]
                        
    df_test_clear = df[df[target_column]==0].sample(frac=class_0_share, random_state=random_seed)
    df_train_clear = df[df[target_column]==0].drop(df_test_clear.index)
    
    train = pd.concat([dt_train_fraud, df_train_clear],ignore_index=True).drop(columns=[datetime_column])
    test = pd.concat([df_test_fraud, df_test_clear],ignore_index=True).drop(columns=[datetime_column])
    
    return train, test


def objective(trial):

    param = {
        'objective': 'binary:logistic',
        'n_estimators': trial.suggest_int('n_estimators', 50, 300, step=10),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1, step=0.001),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
        'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
        'scale_pos_weight':trial.suggest_float('scale_pos_weight',0,50000, step=0.5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 10, step=0.01),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 10, step=0.01),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0, step=0.01)
    }

    model = xgb.XGBClassifier(**param,
                              random_state = random_seed)    
        
    model.fit(X_train, y_train)
    metric = get_recall_on_fpr(y_val.values, model.predict_proba(X_val)[:, 1], 0.0001)

    lprint(str([trial.number, param, metric]))
    return metric

## Preprocessing

In [4]:
df = pd.read_parquet(dataset_path)
cols_to_drop = ['252', '414', '127', '228', '398', '226', '209', '109', '396', '243', '380', '98', '271', '268', '194', '400', '394', '429', '120', '239', '215', '181', '180', '416', '67', '256', '151', '419', '240', '158', '34', '164', '258', '424', '415', '168', '438', '283', '403', '152', '102', '60', '129', '225', '169', '409', '83', '26', '406', '413', '200', '244', '247', '430', '266', '96', '410', '161', '251']
strict_perm = ['142','389','390']
cols_to_drop += strict_perm
df = df.drop(columns=cols_to_drop)
train_val_df, test_df = make_test(df, 
                                  target_column, 
                                  datetime_column, 
                                  pd.Timestamp(time_split), 
                                  class_0_share=0.1)
X_train, X_val, y_train, y_val = train_test_split_df(train_val_df.drop(columns=[target_column]),
                                                     train_val_df[target_column], 
                                                     class_probabilities={0:0.1, 1:0.1}, 
                                                     shuffle=True,
                                                     random_state=random_seed)

## Optuna tuning

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

[I 2025-05-28 06:06:09,357] A new study created in memory with name: no-name-ff22723a-b05d-48c8-951f-6d00a858dbd7
  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:06:19,424] Trial 0 finished with value: 0.5660847880299252 and parameters: {'n_estimators': 110, 'max_depth': 3, 'learning_rate': 0.20900000000000002, 'subsample': 0.7, 'colsample_bytree': 0.9, 'scale_pos_weight': 17903.0, 'reg_alpha': 3.4, 'reg_lambda': 9.51, 'gamma': 0.9400000000000001}. Best is trial 0 with value: 0.5660847880299252.


[0, {'objective': 'binary:logistic', 'n_estimators': 110, 'max_depth': 3, 'learning_rate': 0.20900000000000002, 'subsample': 0.7, 'colsample_bytree': 0.9, 'scale_pos_weight': 17903.0, 'reg_alpha': 3.4, 'reg_lambda': 9.51, 'gamma': 0.9400000000000001}, 0.5660847880299252]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:06:32,008] Trial 1 finished with value: 0.7182044887780549 and parameters: {'n_estimators': 300, 'max_depth': 10, 'learning_rate': 0.374, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 28387.5, 'reg_alpha': 5.56, 'reg_lambda': 3.1799999999999997, 'gamma': 0.8300000000000001}. Best is trial 1 with value: 0.7182044887780549.


[1, {'objective': 'binary:logistic', 'n_estimators': 300, 'max_depth': 10, 'learning_rate': 0.374, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 28387.5, 'reg_alpha': 5.56, 'reg_lambda': 3.1799999999999997, 'gamma': 0.8300000000000001}, 0.7182044887780549]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:06:42,602] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 220, 'max_depth': 4, 'learning_rate': 0.768, 'subsample': 0.6, 'colsample_bytree': 0.6, 'scale_pos_weight': 11297.0, 'reg_alpha': 6.21, 'reg_lambda': 5.4399999999999995, 'gamma': 0.03}. Best is trial 1 with value: 0.7182044887780549.


[2, {'objective': 'binary:logistic', 'n_estimators': 220, 'max_depth': 4, 'learning_rate': 0.768, 'subsample': 0.6, 'colsample_bytree': 0.6, 'scale_pos_weight': 11297.0, 'reg_alpha': 6.21, 'reg_lambda': 5.4399999999999995, 'gamma': 0.03}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:06:57,596] Trial 3 finished with value: 0.7506234413965087 and parameters: {'n_estimators': 270, 'max_depth': 8, 'learning_rate': 0.13, 'subsample': 0.9, 'colsample_bytree': 0.8, 'scale_pos_weight': 26276.5, 'reg_alpha': 9.83, 'reg_lambda': 4.14, 'gamma': 0.31}. Best is trial 3 with value: 0.7506234413965087.


[3, {'objective': 'binary:logistic', 'n_estimators': 270, 'max_depth': 8, 'learning_rate': 0.13, 'subsample': 0.9, 'colsample_bytree': 0.8, 'scale_pos_weight': 26276.5, 'reg_alpha': 9.83, 'reg_lambda': 4.14, 'gamma': 0.31}, 0.7506234413965087]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:07:07,649] Trial 4 finished with value: 0.256857855361596 and parameters: {'n_estimators': 50, 'max_depth': 10, 'learning_rate': 0.128, 'subsample': 0.8, 'colsample_bytree': 0.5, 'scale_pos_weight': 26168.5, 'reg_alpha': 5.02, 'reg_lambda': 7.81, 'gamma': 0.14}. Best is trial 3 with value: 0.7506234413965087.


[4, {'objective': 'binary:logistic', 'n_estimators': 50, 'max_depth': 10, 'learning_rate': 0.128, 'subsample': 0.8, 'colsample_bytree': 0.5, 'scale_pos_weight': 26168.5, 'reg_alpha': 5.02, 'reg_lambda': 7.81, 'gamma': 0.14}, 0.256857855361596]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:07:19,183] Trial 5 finished with value: 0.5561097256857855 and parameters: {'n_estimators': 140, 'max_depth': 5, 'learning_rate': 0.231, 'subsample': 0.5, 'colsample_bytree': 0.5, 'scale_pos_weight': 24384.0, 'reg_alpha': 3.8, 'reg_lambda': 3.55, 'gamma': 0.060000000000000005}. Best is trial 3 with value: 0.7506234413965087.


[5, {'objective': 'binary:logistic', 'n_estimators': 140, 'max_depth': 5, 'learning_rate': 0.231, 'subsample': 0.5, 'colsample_bytree': 0.5, 'scale_pos_weight': 24384.0, 'reg_alpha': 3.8, 'reg_lambda': 3.55, 'gamma': 0.060000000000000005}, 0.5561097256857855]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:07:32,178] Trial 6 finished with value: 0.0 and parameters: {'n_estimators': 280, 'max_depth': 6, 'learning_rate': 0.392, 'subsample': 0.5, 'colsample_bytree': 0.8, 'scale_pos_weight': 36499.0, 'reg_alpha': 2.3899999999999997, 'reg_lambda': 3.75, 'gamma': 0.87}. Best is trial 3 with value: 0.7506234413965087.


[6, {'objective': 'binary:logistic', 'n_estimators': 280, 'max_depth': 6, 'learning_rate': 0.392, 'subsample': 0.5, 'colsample_bytree': 0.8, 'scale_pos_weight': 36499.0, 'reg_alpha': 2.3899999999999997, 'reg_lambda': 3.75, 'gamma': 0.87}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:07:43,053] Trial 7 finished with value: 0.7381546134663342 and parameters: {'n_estimators': 90, 'max_depth': 8, 'learning_rate': 0.644, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 4772.0, 'reg_alpha': 6.93, 'reg_lambda': 7.24, 'gamma': 0.85}. Best is trial 3 with value: 0.7506234413965087.


[7, {'objective': 'binary:logistic', 'n_estimators': 90, 'max_depth': 8, 'learning_rate': 0.644, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 4772.0, 'reg_alpha': 6.93, 'reg_lambda': 7.24, 'gamma': 0.85}, 0.7381546134663342]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:07:55,226] Trial 8 finished with value: 0.7281795511221946 and parameters: {'n_estimators': 170, 'max_depth': 7, 'learning_rate': 0.273, 'subsample': 0.7, 'colsample_bytree': 0.5, 'scale_pos_weight': 7068.5, 'reg_alpha': 7.41, 'reg_lambda': 1.3900000000000001, 'gamma': 0.15000000000000002}. Best is trial 3 with value: 0.7506234413965087.


[8, {'objective': 'binary:logistic', 'n_estimators': 170, 'max_depth': 7, 'learning_rate': 0.273, 'subsample': 0.7, 'colsample_bytree': 0.5, 'scale_pos_weight': 7068.5, 'reg_alpha': 7.41, 'reg_lambda': 1.3900000000000001, 'gamma': 0.15000000000000002}, 0.7281795511221946]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:08:04,348] Trial 9 finished with value: 0.0 and parameters: {'n_estimators': 70, 'max_depth': 4, 'learning_rate': 0.654, 'subsample': 0.7, 'colsample_bytree': 0.6, 'scale_pos_weight': 11368.5, 'reg_alpha': 9.92, 'reg_lambda': 9.74, 'gamma': 0.37}. Best is trial 3 with value: 0.7506234413965087.


[9, {'objective': 'binary:logistic', 'n_estimators': 70, 'max_depth': 4, 'learning_rate': 0.654, 'subsample': 0.7, 'colsample_bytree': 0.6, 'scale_pos_weight': 11368.5, 'reg_alpha': 9.92, 'reg_lambda': 9.74, 'gamma': 0.37}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:08:14,928] Trial 10 finished with value: 0.0 and parameters: {'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.979, 'subsample': 1.0, 'colsample_bytree': 0.7, 'scale_pos_weight': 47235.5, 'reg_alpha': 0.01, 'reg_lambda': 0.43, 'gamma': 0.56}. Best is trial 3 with value: 0.7506234413965087.


[10, {'objective': 'binary:logistic', 'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.979, 'subsample': 1.0, 'colsample_bytree': 0.7, 'scale_pos_weight': 47235.5, 'reg_alpha': 0.01, 'reg_lambda': 0.43, 'gamma': 0.56}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:08:27,085] Trial 11 finished with value: 0.7855361596009975 and parameters: {'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.595, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 290.5, 'reg_alpha': 9.7, 'reg_lambda': 6.1899999999999995, 'gamma': 0.6}. Best is trial 11 with value: 0.7855361596009975.


[11, {'objective': 'binary:logistic', 'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.595, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 290.5, 'reg_alpha': 9.7, 'reg_lambda': 6.1899999999999995, 'gamma': 0.6}, 0.7855361596009975]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:08:41,518] Trial 12 finished with value: 0.4164588528678304 and parameters: {'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.035, 'subsample': 0.9, 'colsample_bytree': 0.8, 'scale_pos_weight': 37040.0, 'reg_alpha': 9.83, 'reg_lambda': 5.9799999999999995, 'gamma': 0.53}. Best is trial 11 with value: 0.7855361596009975.


[12, {'objective': 'binary:logistic', 'n_estimators': 230, 'max_depth': 8, 'learning_rate': 0.035, 'subsample': 0.9, 'colsample_bytree': 0.8, 'scale_pos_weight': 37040.0, 'reg_alpha': 9.83, 'reg_lambda': 5.9799999999999995, 'gamma': 0.53}, 0.4164588528678304]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:08:54,193] Trial 13 finished with value: 0.7955112219451371 and parameters: {'n_estimators': 270, 'max_depth': 9, 'learning_rate': 0.533, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 952.0, 'reg_alpha': 8.37, 'reg_lambda': 6.67, 'gamma': 0.35000000000000003}. Best is trial 13 with value: 0.7955112219451371.


[13, {'objective': 'binary:logistic', 'n_estimators': 270, 'max_depth': 9, 'learning_rate': 0.533, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 952.0, 'reg_alpha': 8.37, 'reg_lambda': 6.67, 'gamma': 0.35000000000000003}, 0.7955112219451371]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:09:05,773] Trial 14 finished with value: 0.7930174563591023 and parameters: {'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.49, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 125.0, 'reg_alpha': 7.93, 'reg_lambda': 7.2, 'gamma': 0.67}. Best is trial 13 with value: 0.7955112219451371.


[14, {'objective': 'binary:logistic', 'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.49, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 125.0, 'reg_alpha': 7.93, 'reg_lambda': 7.2, 'gamma': 0.67}, 0.7930174563591023]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:09:16,983] Trial 15 finished with value: 0.7680798004987531 and parameters: {'n_estimators': 180, 'max_depth': 10, 'learning_rate': 0.48800000000000004, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 145.0, 'reg_alpha': 8.17, 'reg_lambda': 7.7299999999999995, 'gamma': 0.7000000000000001}. Best is trial 13 with value: 0.7955112219451371.


[15, {'objective': 'binary:logistic', 'n_estimators': 180, 'max_depth': 10, 'learning_rate': 0.48800000000000004, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 145.0, 'reg_alpha': 8.17, 'reg_lambda': 7.7299999999999995, 'gamma': 0.7000000000000001}, 0.7680798004987531]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:09:27,874] Trial 16 finished with value: 0.7406483790523691 and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.8210000000000001, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 16249.5, 'reg_alpha': 8.02, 'reg_lambda': 8.61, 'gamma': 0.39}. Best is trial 13 with value: 0.7955112219451371.


[16, {'objective': 'binary:logistic', 'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.8210000000000001, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 16249.5, 'reg_alpha': 8.02, 'reg_lambda': 8.61, 'gamma': 0.39}, 0.7406483790523691]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:09:40,678] Trial 17 finished with value: 0.7182044887780549 and parameters: {'n_estimators': 260, 'max_depth': 9, 'learning_rate': 0.529, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 7082.0, 'reg_alpha': 8.11, 'reg_lambda': 6.66, 'gamma': 0.73}. Best is trial 13 with value: 0.7955112219451371.


[17, {'objective': 'binary:logistic', 'n_estimators': 260, 'max_depth': 9, 'learning_rate': 0.529, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 7082.0, 'reg_alpha': 8.11, 'reg_lambda': 6.66, 'gamma': 0.73}, 0.7182044887780549]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:09:52,947] Trial 18 finished with value: 0.773067331670823 and parameters: {'n_estimators': 200, 'max_depth': 6, 'learning_rate': 0.402, 'subsample': 0.9, 'colsample_bytree': 0.9, 'scale_pos_weight': 15780.0, 'reg_alpha': 8.73, 'reg_lambda': 4.86, 'gamma': 0.26}. Best is trial 13 with value: 0.7955112219451371.


[18, {'objective': 'binary:logistic', 'n_estimators': 200, 'max_depth': 6, 'learning_rate': 0.402, 'subsample': 0.9, 'colsample_bytree': 0.9, 'scale_pos_weight': 15780.0, 'reg_alpha': 8.73, 'reg_lambda': 4.86, 'gamma': 0.26}, 0.773067331670823]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:10:03,726] Trial 19 finished with value: 0.71571072319202 and parameters: {'n_estimators': 140, 'max_depth': 9, 'learning_rate': 0.756, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 4003.5, 'reg_alpha': 6.56, 'reg_lambda': 2.36, 'gamma': 0.44}. Best is trial 13 with value: 0.7955112219451371.


[19, {'objective': 'binary:logistic', 'n_estimators': 140, 'max_depth': 9, 'learning_rate': 0.756, 'subsample': 1.0, 'colsample_bytree': 1.0, 'scale_pos_weight': 4003.5, 'reg_alpha': 6.56, 'reg_lambda': 2.36, 'gamma': 0.44}, 0.71571072319202]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:10:17,711] Trial 20 finished with value: 0.7057356608478803 and parameters: {'n_estimators': 300, 'max_depth': 7, 'learning_rate': 0.496, 'subsample': 0.8, 'colsample_bytree': 0.7, 'scale_pos_weight': 11389.5, 'reg_alpha': 8.74, 'reg_lambda': 8.55, 'gamma': 0.68}. Best is trial 13 with value: 0.7955112219451371.


[20, {'objective': 'binary:logistic', 'n_estimators': 300, 'max_depth': 7, 'learning_rate': 0.496, 'subsample': 0.8, 'colsample_bytree': 0.7, 'scale_pos_weight': 11389.5, 'reg_alpha': 8.74, 'reg_lambda': 8.55, 'gamma': 0.68}, 0.7057356608478803]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:10:30,074] Trial 21 finished with value: 0.7381546134663342 and parameters: {'n_estimators': 240, 'max_depth': 9, 'learning_rate': 0.599, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 486.0, 'reg_alpha': 8.99, 'reg_lambda': 6.42, 'gamma': 0.6}. Best is trial 13 with value: 0.7955112219451371.


[21, {'objective': 'binary:logistic', 'n_estimators': 240, 'max_depth': 9, 'learning_rate': 0.599, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 486.0, 'reg_alpha': 8.99, 'reg_lambda': 6.42, 'gamma': 0.6}, 0.7381546134663342]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:10:43,549] Trial 22 finished with value: 0.7955112219451371 and parameters: {'n_estimators': 260, 'max_depth': 7, 'learning_rate': 0.587, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 2871.5, 'reg_alpha': 7.31, 'reg_lambda': 5.26, 'gamma': 0.48000000000000004}. Best is trial 13 with value: 0.7955112219451371.


[22, {'objective': 'binary:logistic', 'n_estimators': 260, 'max_depth': 7, 'learning_rate': 0.587, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 2871.5, 'reg_alpha': 7.31, 'reg_lambda': 5.26, 'gamma': 0.48000000000000004}, 0.7955112219451371]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:10:56,719] Trial 23 finished with value: 0.0 and parameters: {'n_estimators': 260, 'max_depth': 7, 'learning_rate': 0.7030000000000001, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 7552.5, 'reg_alpha': 7.29, 'reg_lambda': 4.8999999999999995, 'gamma': 0.46}. Best is trial 13 with value: 0.7955112219451371.


[23, {'objective': 'binary:logistic', 'n_estimators': 260, 'max_depth': 7, 'learning_rate': 0.7030000000000001, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 7552.5, 'reg_alpha': 7.29, 'reg_lambda': 4.8999999999999995, 'gamma': 0.46}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:11:09,313] Trial 24 finished with value: 0.0 and parameters: {'n_estimators': 210, 'max_depth': 6, 'learning_rate': 0.868, 'subsample': 0.6, 'colsample_bytree': 0.9, 'scale_pos_weight': 3722.5, 'reg_alpha': 6.15, 'reg_lambda': 7.25, 'gamma': 0.24000000000000002}. Best is trial 13 with value: 0.7955112219451371.


[24, {'objective': 'binary:logistic', 'n_estimators': 210, 'max_depth': 6, 'learning_rate': 0.868, 'subsample': 0.6, 'colsample_bytree': 0.9, 'scale_pos_weight': 3722.5, 'reg_alpha': 6.15, 'reg_lambda': 7.25, 'gamma': 0.24000000000000002}, 0]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:11:22,714] Trial 25 finished with value: 0.6733167082294265 and parameters: {'n_estimators': 250, 'max_depth': 9, 'learning_rate': 0.457, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 19579.5, 'reg_alpha': 7.52, 'reg_lambda': 5.62, 'gamma': 0.5}. Best is trial 13 with value: 0.7955112219451371.


[25, {'objective': 'binary:logistic', 'n_estimators': 250, 'max_depth': 9, 'learning_rate': 0.457, 'subsample': 0.9, 'colsample_bytree': 1.0, 'scale_pos_weight': 19579.5, 'reg_alpha': 7.52, 'reg_lambda': 5.62, 'gamma': 0.5}, 0.6733167082294265]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:11:36,852] Trial 26 finished with value: 0.7855361596009975 and parameters: {'n_estimators': 280, 'max_depth': 10, 'learning_rate': 0.325, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 9552.5, 'reg_alpha': 5.35, 'reg_lambda': 4.41, 'gamma': 0.76}. Best is trial 13 with value: 0.7955112219451371.


[26, {'objective': 'binary:logistic', 'n_estimators': 280, 'max_depth': 10, 'learning_rate': 0.325, 'subsample': 0.8, 'colsample_bytree': 0.9, 'scale_pos_weight': 9552.5, 'reg_alpha': 5.35, 'reg_lambda': 4.41, 'gamma': 0.76}, 0.7855361596009975]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:11:48,250] Trial 27 finished with value: 0.8054862842892768 and parameters: {'n_estimators': 190, 'max_depth': 7, 'learning_rate': 0.557, 'subsample': 1.0, 'colsample_bytree': 0.8, 'scale_pos_weight': 3497.0, 'reg_alpha': 4.16, 'reg_lambda': 8.61, 'gamma': 0.37}. Best is trial 27 with value: 0.8054862842892768.


[27, {'objective': 'binary:logistic', 'n_estimators': 190, 'max_depth': 7, 'learning_rate': 0.557, 'subsample': 1.0, 'colsample_bytree': 0.8, 'scale_pos_weight': 3497.0, 'reg_alpha': 4.16, 'reg_lambda': 8.61, 'gamma': 0.37}, 0.8054862842892768]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:11:59,084] Trial 28 finished with value: 0.7481296758104738 and parameters: {'n_estimators': 150, 'max_depth': 5, 'learning_rate': 0.5690000000000001, 'subsample': 1.0, 'colsample_bytree': 0.8, 'scale_pos_weight': 13574.0, 'reg_alpha': 4.29, 'reg_lambda': 8.51, 'gamma': 0.32}. Best is trial 27 with value: 0.8054862842892768.


[28, {'objective': 'binary:logistic', 'n_estimators': 150, 'max_depth': 5, 'learning_rate': 0.5690000000000001, 'subsample': 1.0, 'colsample_bytree': 0.8, 'scale_pos_weight': 13574.0, 'reg_alpha': 4.29, 'reg_lambda': 8.51, 'gamma': 0.32}, 0.7481296758104738]


  'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
[I 2025-05-28 06:12:10,291] Trial 29 finished with value: 0.6633416458852868 and parameters: {'n_estimators': 120, 'max_depth': 7, 'learning_rate': 0.649, 'subsample': 0.7, 'colsample_bytree': 0.7, 'scale_pos_weight': 4169.0, 'reg_alpha': 2.6799999999999997, 'reg_lambda': 9.02, 'gamma': 0.18000000000000002}. Best is trial 27 with value: 0.8054862842892768.


[29, {'objective': 'binary:logistic', 'n_estimators': 120, 'max_depth': 7, 'learning_rate': 0.649, 'subsample': 0.7, 'colsample_bytree': 0.7, 'scale_pos_weight': 4169.0, 'reg_alpha': 2.6799999999999997, 'reg_lambda': 9.02, 'gamma': 0.18000000000000002}, 0.6633416458852868]


In [6]:
best_params = study.best_params
print("Best params:", best_params)

best_value = study.best_value
print("Best score:", best_value)

Best params: {'n_estimators': 190, 'max_depth': 7, 'learning_rate': 0.557, 'subsample': 1.0, 'colsample_bytree': 0.8, 'scale_pos_weight': 3497.0, 'reg_alpha': 4.16, 'reg_lambda': 8.61, 'gamma': 0.37}
Best score: 0.8054862842892768
