# Optimizacion de hiperparametros

Setup

In [1]:
import pandas as pd
import polars as pl
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour

from time import time

import pickle

In [2]:
# !gsutil cp /home/clas_giulia_s/buckets/b1/datasets/competencia_02_fe_v01_undersampled.parquet /home/clas_giulia_s/datasets/

In [2]:
# base_path = '/home/clas_giulia_s/buckets/b1/'
base_path = '/Users/ignacio/MAESTRIA/DMEF/'
dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_01_fe_modelito_undersampled.csv'

ganancia_acierto = 273000
costo_estimulo = 7000

semillas = [261823, 289871, 379817, 481301, 959723]

# data = pd.read_parquet(dataset_path + dataset_file)
data = pd.read_csv(dataset_path + dataset_file)

In [3]:
# meses_train = [201906, 201907, 201908, 201909, 201910, 201911, 201912,
#                202001, 202002, 202003, 202004, 202005, 202006,
#                202007, 202008, 202009, 202010, 202011, 202012,
#                202101, 202102, 202103, 202104, 202105] # dejo afuera 202106 para test

meses_train = [202101, 202102, 202103] # dejo afuera 202104 para test

data = data[data['foto_mes'].isin(meses_train)]
data.shape

(51606, 679)

In [4]:
# Asignamos pesos a las clases

data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [5]:
X_train = data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)
y_train_binaria = data['clase_binaria']
w_train = data['clase_peso']

In [6]:
def lgb_gan_eval(y_pred, data):
    weight = data.get_weight()
    ganancia = np.where(weight == 1.00002, ganancia_acierto, 0) - np.where(weight < 1.00002, costo_estimulo, 0)
    ganancia = ganancia[np.argsort(y_pred)[::-1]]
    ganancia = np.cumsum(ganancia)

    return 'gan_eval', np.max(ganancia) , True

# Optimizacion

In [7]:
def objective(trial):

    num_leaves = trial.suggest_int('num_leaves', 8, 200),
    # learning_rate = trial.suggest_float('learning_rate', 0.01, 0.5), # mas bajo, más iteraciones necesita
    learning_rate = trial.suggest_float('learning_rate', 0.1, 0.5), # mas alto para que dure menos para el exp
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 2000),
    feature_fraction = trial.suggest_float('feature_fraction', 0.1, 1.0),
    bagging_fraction = trial.suggest_float('bagging_fraction', 0.1, 1.0),

    params = {
        'objective': 'binary',
        'metric': 'custom',
        'boosting_type': 'gbdt',
        'first_metric_only': True,
        'boost_from_average': True,
        'feature_pre_filter': False,
        'max_bin': 31,
        'num_leaves': num_leaves,
        'learning_rate': learning_rate,
        'min_data_in_leaf': min_data_in_leaf,
        'feature_fraction': feature_fraction,
        'bagging_fraction': bagging_fraction,
        'seed': semillas[0],
        'verbose': -1,
    }
    
    train_data = lgb.Dataset(X_train,
                              label=y_train_binaria, # eligir la clase
                              weight=w_train)
    
    # print(f"Learning Rate: {learning_rate}, Type: {type(learning_rate)}")
    # print(f"Learning Rate: {params['learning_rate']}, Type: {type(params['learning_rate'])}")
    
    # Use callbacks for early stopping
    rounds = int(50 + 5/learning_rate[0])
    early_stopping_cb = lgb.early_stopping(stopping_rounds= rounds)
        
    cv_results = lgb.cv(
        params,
        train_data,
        num_boost_round=10000, # modificar, subit y subir... 
        callbacks=[early_stopping_cb],
        feval=lgb_gan_eval,
        stratified=True,
        nfold=5,
        seed=semillas[0]
    )
    
    max_gan = max(cv_results['valid gan_eval-mean'])
    best_iter = cv_results['valid gan_eval-mean'].index(max_gan) + 1

    # Guardamos cual es la mejor iteración del modelo
    trial.set_user_attr("best_iter", best_iter)

    return max_gan * 5 # funcion objetivo, en el proximo paso le digo si quiero maximizarla o minimizarla.

In [8]:
storage_name = "sqlite:///" + db_path + "optimizacion_lgbm_modelito_exp.db"
study_name = "competencia1_modelito_exp_lgbm" # UPDATE

study = optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

[I 2024-12-01 19:14:41,080] A new study created in RDB with name: competencia1_modelito_exp_lgbm


In [9]:
len(study.trials)

0

In [10]:
study.optimize(objective, n_trials=60)

Training until validation scores don't improve for 72 rounds


[I 2024-12-01 19:15:05,737] Trial 0 finished with value: 597723000.0 and parameters: {'num_leaves': 153, 'learning_rate': 0.2216061378331717, 'min_data_in_leaf': 163, 'feature_fraction': 0.79399140320068, 'bagging_fraction': 0.6946325144921471}. Best is trial 0 with value: 597723000.0.


Early stopping, best iteration is:
[9]	cv_agg's valid gan_eval: 1.19545e+08 + 1.88993e+06
Training until validation scores don't improve for 70 rounds


[I 2024-12-01 19:15:14,472] Trial 1 finished with value: 605262000.0 and parameters: {'num_leaves': 51, 'learning_rate': 0.2431975154587192, 'min_data_in_leaf': 615, 'feature_fraction': 0.42488833179117413, 'bagging_fraction': 0.5031303013504012}. Best is trial 1 with value: 605262000.0.


Early stopping, best iteration is:
[18]	cv_agg's valid gan_eval: 1.21052e+08 + 1.9385e+06
Training until validation scores don't improve for 61 rounds


[I 2024-12-01 19:15:19,008] Trial 2 finished with value: 598773000.0 and parameters: {'num_leaves': 175, 'learning_rate': 0.4178996014439409, 'min_data_in_leaf': 1985, 'feature_fraction': 0.6349151481611277, 'bagging_fraction': 0.10192515394210179}. Best is trial 1 with value: 605262000.0.


Early stopping, best iteration is:
[16]	cv_agg's valid gan_eval: 1.19755e+08 + 1.19661e+06
Training until validation scores don't improve for 62 rounds


[I 2024-12-01 19:15:25,386] Trial 3 finished with value: 597170000.0 and parameters: {'num_leaves': 154, 'learning_rate': 0.3866885712041308, 'min_data_in_leaf': 795, 'feature_fraction': 0.31399654783323894, 'bagging_fraction': 0.2021702279128118}. Best is trial 1 with value: 605262000.0.


Early stopping, best iteration is:
[14]	cv_agg's valid gan_eval: 1.19434e+08 + 1.56766e+06
Training until validation scores don't improve for 67 rounds


[I 2024-12-01 19:15:33,316] Trial 4 finished with value: 595777000.0 and parameters: {'num_leaves': 103, 'learning_rate': 0.2799147015169949, 'min_data_in_leaf': 302, 'feature_fraction': 0.1800385059472165, 'bagging_fraction': 0.3712348996810831}. Best is trial 1 with value: 605262000.0.


Early stopping, best iteration is:
[10]	cv_agg's valid gan_eval: 1.19155e+08 + 1.60291e+06
Training until validation scores don't improve for 82 rounds


[I 2024-12-01 19:15:39,939] Trial 5 finished with value: 605906000.0 and parameters: {'num_leaves': 16, 'learning_rate': 0.15438406084448278, 'min_data_in_leaf': 1820, 'feature_fraction': 0.9159405929751561, 'bagging_fraction': 0.6553752990450438}. Best is trial 5 with value: 605906000.0.


Early stopping, best iteration is:
[68]	cv_agg's valid gan_eval: 1.21181e+08 + 2.23972e+06
Training until validation scores don't improve for 61 rounds


[I 2024-12-01 19:15:46,059] Trial 6 finished with value: 595903000.0 and parameters: {'num_leaves': 57, 'learning_rate': 0.4395444927735028, 'min_data_in_leaf': 411, 'feature_fraction': 0.7876579370399329, 'bagging_fraction': 0.289611678029892}. Best is trial 5 with value: 605906000.0.


Early stopping, best iteration is:
[8]	cv_agg's valid gan_eval: 1.19181e+08 + 810870
Training until validation scores don't improve for 75 rounds


[I 2024-12-01 19:15:53,138] Trial 7 finished with value: 603001000.0 and parameters: {'num_leaves': 130, 'learning_rate': 0.19942692788143074, 'min_data_in_leaf': 618, 'feature_fraction': 0.6622688319823757, 'bagging_fraction': 0.7828625502878673}. Best is trial 5 with value: 605906000.0.


Early stopping, best iteration is:
[16]	cv_agg's valid gan_eval: 1.206e+08 + 1.94294e+06
Training until validation scores don't improve for 71 rounds


[I 2024-12-01 19:15:59,440] Trial 8 finished with value: 605346000.0 and parameters: {'num_leaves': 41, 'learning_rate': 0.2308762018555188, 'min_data_in_leaf': 1904, 'feature_fraction': 0.21849003920967858, 'bagging_fraction': 0.5754160145804846}. Best is trial 5 with value: 605906000.0.


Early stopping, best iteration is:
[45]	cv_agg's valid gan_eval: 1.21069e+08 + 2.06004e+06
Training until validation scores don't improve for 63 rounds


[I 2024-12-01 19:16:05,997] Trial 9 finished with value: 600964000.0 and parameters: {'num_leaves': 154, 'learning_rate': 0.3597466311750213, 'min_data_in_leaf': 1074, 'feature_fraction': 0.502839992232883, 'bagging_fraction': 0.9093387841112267}. Best is trial 5 with value: 605906000.0.


Early stopping, best iteration is:
[9]	cv_agg's valid gan_eval: 1.20193e+08 + 2.98029e+06
Training until validation scores don't improve for 94 rounds


[I 2024-12-01 19:16:13,029] Trial 10 finished with value: 606032000.0 and parameters: {'num_leaves': 9, 'learning_rate': 0.11276283126676387, 'min_data_in_leaf': 1450, 'feature_fraction': 0.9977669758016805, 'bagging_fraction': 0.9920425801598505}. Best is trial 10 with value: 606032000.0.


Early stopping, best iteration is:
[126]	cv_agg's valid gan_eval: 1.21206e+08 + 2.1718e+06
Training until validation scores don't improve for 99 rounds


[I 2024-12-01 19:16:20,636] Trial 11 finished with value: 605759000.0 and parameters: {'num_leaves': 22, 'learning_rate': 0.10052022095993189, 'min_data_in_leaf': 1456, 'feature_fraction': 0.9833934458448697, 'bagging_fraction': 0.9652074411860292}. Best is trial 10 with value: 606032000.0.


Early stopping, best iteration is:
[54]	cv_agg's valid gan_eval: 1.21152e+08 + 2.05337e+06
Training until validation scores don't improve for 98 rounds


[I 2024-12-01 19:16:26,390] Trial 12 finished with value: 606235000.0 and parameters: {'num_leaves': 10, 'learning_rate': 0.1033146520130771, 'min_data_in_leaf': 1578, 'feature_fraction': 0.969366560596552, 'bagging_fraction': 0.7950543394494896}. Best is trial 12 with value: 606235000.0.


Early stopping, best iteration is:
[65]	cv_agg's valid gan_eval: 1.21247e+08 + 2.19301e+06
Training until validation scores don't improve for 91 rounds


[I 2024-12-01 19:16:34,068] Trial 13 finished with value: 606641000.0 and parameters: {'num_leaves': 87, 'learning_rate': 0.11934674019655692, 'min_data_in_leaf': 1390, 'feature_fraction': 0.8589083637096303, 'bagging_fraction': 0.8432672651065729}. Best is trial 13 with value: 606641000.0.


Early stopping, best iteration is:
[61]	cv_agg's valid gan_eval: 1.21328e+08 + 1.94534e+06
Training until validation scores don't improve for 80 rounds


[I 2024-12-01 19:16:40,156] Trial 14 finished with value: 605619000.0 and parameters: {'num_leaves': 80, 'learning_rate': 0.16473352067823085, 'min_data_in_leaf': 1344, 'feature_fraction': 0.8355640446812469, 'bagging_fraction': 0.8192614162293025}. Best is trial 13 with value: 606641000.0.


Early stopping, best iteration is:
[32]	cv_agg's valid gan_eval: 1.21124e+08 + 1.63525e+06
Training until validation scores don't improve for 65 rounds


[I 2024-12-01 19:16:44,946] Trial 15 finished with value: 600068000.0 and parameters: {'num_leaves': 89, 'learning_rate': 0.3246159760745766, 'min_data_in_leaf': 1187, 'feature_fraction': 0.685183666693681, 'bagging_fraction': 0.8181402216435302}. Best is trial 13 with value: 606641000.0.


Early stopping, best iteration is:
[15]	cv_agg's valid gan_eval: 1.20014e+08 + 1.77272e+06
Training until validation scores don't improve for 60 rounds


[I 2024-12-01 19:16:48,915] Trial 16 finished with value: 599368000.0 and parameters: {'num_leaves': 194, 'learning_rate': 0.47834873490872387, 'min_data_in_leaf': 1648, 'feature_fraction': 0.8793236240782925, 'bagging_fraction': 0.7102559712046539}. Best is trial 13 with value: 606641000.0.


Early stopping, best iteration is:
[12]	cv_agg's valid gan_eval: 1.19874e+08 + 1.91318e+06
Training until validation scores don't improve for 80 rounds


[I 2024-12-01 19:16:55,483] Trial 17 finished with value: 604604000.0 and parameters: {'num_leaves': 76, 'learning_rate': 0.16565131791226692, 'min_data_in_leaf': 1607, 'feature_fraction': 0.7235716941374413, 'bagging_fraction': 0.49955904100280707}. Best is trial 13 with value: 606641000.0.


Early stopping, best iteration is:
[39]	cv_agg's valid gan_eval: 1.20921e+08 + 1.84674e+06
Training until validation scores don't improve for 88 rounds


[I 2024-12-01 19:17:08,022] Trial 18 finished with value: 607180000.0 and parameters: {'num_leaves': 119, 'learning_rate': 0.13091784315329408, 'min_data_in_leaf': 862, 'feature_fraction': 0.5442504873199518, 'bagging_fraction': 0.8717856129302802}. Best is trial 18 with value: 607180000.0.


Early stopping, best iteration is:
[43]	cv_agg's valid gan_eval: 1.21436e+08 + 1.79115e+06
Training until validation scores don't improve for 68 rounds


[I 2024-12-01 19:17:16,293] Trial 19 finished with value: 605185000.0 and parameters: {'num_leaves': 119, 'learning_rate': 0.27073751115141265, 'min_data_in_leaf': 909, 'feature_fraction': 0.5480201586822602, 'bagging_fraction': 0.8986923220436199}. Best is trial 18 with value: 607180000.0.


Early stopping, best iteration is:
[18]	cv_agg's valid gan_eval: 1.21037e+08 + 2.04593e+06
Training until validation scores don't improve for 84 rounds


[I 2024-12-01 19:17:26,851] Trial 20 finished with value: 607152000.0 and parameters: {'num_leaves': 130, 'learning_rate': 0.14453309175937457, 'min_data_in_leaf': 1192, 'feature_fraction': 0.41504718581750233, 'bagging_fraction': 0.6253036479852464}. Best is trial 18 with value: 607180000.0.


Early stopping, best iteration is:
[52]	cv_agg's valid gan_eval: 1.2143e+08 + 2.23126e+06
Training until validation scores don't improve for 85 rounds


[I 2024-12-01 19:17:37,224] Trial 21 finished with value: 609161000.0 and parameters: {'num_leaves': 129, 'learning_rate': 0.14285485549019958, 'min_data_in_leaf': 1190, 'feature_fraction': 0.41197212976185593, 'bagging_fraction': 0.6194842328414374}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[50]	cv_agg's valid gan_eval: 1.21832e+08 + 2.45795e+06
Training until validation scores don't improve for 76 rounds


[I 2024-12-01 19:17:45,953] Trial 22 finished with value: 605339000.0 and parameters: {'num_leaves': 131, 'learning_rate': 0.1902055069224507, 'min_data_in_leaf': 1199, 'feature_fraction': 0.39651284822000715, 'bagging_fraction': 0.5992381949152594}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[37]	cv_agg's valid gan_eval: 1.21068e+08 + 1.73555e+06
Training until validation scores don't improve for 84 rounds


[I 2024-12-01 19:17:54,517] Trial 23 finished with value: 605906000.0 and parameters: {'num_leaves': 111, 'learning_rate': 0.14660318318494092, 'min_data_in_leaf': 894, 'feature_fraction': 0.29859727660785784, 'bagging_fraction': 0.4396466966622029}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[31]	cv_agg's valid gan_eval: 1.21181e+08 + 1.56653e+06
Training until validation scores don't improve for 75 rounds


[I 2024-12-01 19:18:02,678] Trial 24 finished with value: 608062000.0 and parameters: {'num_leaves': 137, 'learning_rate': 0.19292862366418811, 'min_data_in_leaf': 1100, 'feature_fraction': 0.46704877663027977, 'bagging_fraction': 0.6338270099451192}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[20]	cv_agg's valid gan_eval: 1.21612e+08 + 2.51319e+06
Training until validation scores don't improve for 75 rounds


[I 2024-12-01 19:18:11,711] Trial 25 finished with value: 604443000.0 and parameters: {'num_leaves': 174, 'learning_rate': 0.19476522417268177, 'min_data_in_leaf': 715, 'feature_fraction': 0.4892340818510874, 'bagging_fraction': 0.7248479608184439}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[18]	cv_agg's valid gan_eval: 1.20889e+08 + 1.98437e+06
Training until validation scores don't improve for 69 rounds


[I 2024-12-01 19:18:19,828] Trial 26 finished with value: 604492000.0 and parameters: {'num_leaves': 138, 'learning_rate': 0.2605171901528884, 'min_data_in_leaf': 1059, 'feature_fraction': 0.5937596719632368, 'bagging_fraction': 0.403733025815801}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[27]	cv_agg's valid gan_eval: 1.20898e+08 + 2.59107e+06
Training until validation scores don't improve for 74 rounds


[I 2024-12-01 19:18:30,171] Trial 27 finished with value: 601475000.0 and parameters: {'num_leaves': 101, 'learning_rate': 0.20696261826289947, 'min_data_in_leaf': 495, 'feature_fraction': 0.5040050741678259, 'bagging_fraction': 0.5031398121761671}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[18]	cv_agg's valid gan_eval: 1.20295e+08 + 1.86371e+06
Training until validation scores don't improve for 65 rounds


[I 2024-12-01 19:18:36,437] Trial 28 finished with value: 597849000.0 and parameters: {'num_leaves': 168, 'learning_rate': 0.31813862840170737, 'min_data_in_leaf': 1025, 'feature_fraction': 0.3275138582742275, 'bagging_fraction': 0.7336839807341549}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[14]	cv_agg's valid gan_eval: 1.1957e+08 + 1.85801e+06
Training until validation scores don't improve for 86 rounds


[I 2024-12-01 19:18:59,695] Trial 29 finished with value: 601783000.0 and parameters: {'num_leaves': 147, 'learning_rate': 0.13776651382096833, 'min_data_in_leaf': 135, 'feature_fraction': 0.5736831061584238, 'bagging_fraction': 0.6473996482184492}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[54]	cv_agg's valid gan_eval: 1.20357e+08 + 3.25154e+06
Training until validation scores don't improve for 77 rounds


[I 2024-12-01 19:19:08,536] Trial 30 finished with value: 603778000.0 and parameters: {'num_leaves': 116, 'learning_rate': 0.17960777148518073, 'min_data_in_leaf': 872, 'feature_fraction': 0.3693842477496617, 'bagging_fraction': 0.9030931438001109}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[30]	cv_agg's valid gan_eval: 1.20756e+08 + 1.95326e+06
Training until validation scores don't improve for 87 rounds


[I 2024-12-01 19:19:19,701] Trial 31 finished with value: 604912000.0 and parameters: {'num_leaves': 124, 'learning_rate': 0.1346789005172443, 'min_data_in_leaf': 1187, 'feature_fraction': 0.4315440468010765, 'bagging_fraction': 0.6341613261533575}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[55]	cv_agg's valid gan_eval: 1.20982e+08 + 1.80911e+06
Training until validation scores don't improve for 72 rounds


[I 2024-12-01 19:19:27,459] Trial 32 finished with value: 605010000.0 and parameters: {'num_leaves': 144, 'learning_rate': 0.22344678018962444, 'min_data_in_leaf': 1280, 'feature_fraction': 0.4513163778171681, 'bagging_fraction': 0.582035039763909}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[22]	cv_agg's valid gan_eval: 1.21002e+08 + 1.71585e+06
Training until validation scores don't improve for 86 rounds


[I 2024-12-01 19:19:35,315] Trial 33 finished with value: 605969000.0 and parameters: {'num_leaves': 162, 'learning_rate': 0.13525415803039018, 'min_data_in_leaf': 1117, 'feature_fraction': 0.250324700584692, 'bagging_fraction': 0.5226657991765055}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[33]	cv_agg's valid gan_eval: 1.21194e+08 + 1.40845e+06
Training until validation scores don't improve for 80 rounds


[I 2024-12-01 19:19:44,684] Trial 34 finished with value: 604254000.0 and parameters: {'num_leaves': 187, 'learning_rate': 0.1651577819979626, 'min_data_in_leaf': 687, 'feature_fraction': 0.36283512978861465, 'bagging_fraction': 0.6832074340210609}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[24]	cv_agg's valid gan_eval: 1.20851e+08 + 1.72395e+06
Training until validation scores don't improve for 69 rounds


[I 2024-12-01 19:19:50,365] Trial 35 finished with value: 600355000.0 and parameters: {'num_leaves': 135, 'learning_rate': 0.25276304089643836, 'min_data_in_leaf': 790, 'feature_fraction': 0.11925736964268852, 'bagging_fraction': 0.4464452547949588}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[22]	cv_agg's valid gan_eval: 1.20071e+08 + 1.99752e+06
Training until validation scores don't improve for 73 rounds


[I 2024-12-01 19:19:58,883] Trial 36 finished with value: 603855000.0 and parameters: {'num_leaves': 103, 'learning_rate': 0.21650543940460104, 'min_data_in_leaf': 969, 'feature_fraction': 0.45827751048483095, 'bagging_fraction': 0.7547245257355563}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[18]	cv_agg's valid gan_eval: 1.20771e+08 + 1.54046e+06
Training until validation scores don't improve for 89 rounds


[I 2024-12-01 19:20:10,687] Trial 37 finished with value: 606109000.0 and parameters: {'num_leaves': 162, 'learning_rate': 0.12574492322650713, 'min_data_in_leaf': 1252, 'feature_fraction': 0.5358986541551873, 'bagging_fraction': 0.32060094174075454}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[48]	cv_agg's valid gan_eval: 1.21222e+08 + 2.1306e+06
Training until validation scores don't improve for 78 rounds


[I 2024-12-01 19:20:20,543] Trial 38 finished with value: 604408000.0 and parameters: {'num_leaves': 110, 'learning_rate': 0.1734485463495945, 'min_data_in_leaf': 807, 'feature_fraction': 0.40884261604056205, 'bagging_fraction': 0.548501737757241}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[32]	cv_agg's valid gan_eval: 1.20882e+08 + 1.81884e+06
Training until validation scores don't improve for 83 rounds


[I 2024-12-01 19:21:03,790] Trial 39 finished with value: 597793000.0 and parameters: {'num_leaves': 126, 'learning_rate': 0.14980337067006408, 'min_data_in_leaf': 35, 'feature_fraction': 0.5809025390541884, 'bagging_fraction': 0.6200378148343192}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[205]	cv_agg's valid gan_eval: 1.19559e+08 + 2.05258e+06
Training until validation scores don't improve for 70 rounds


[I 2024-12-01 19:21:12,674] Trial 40 finished with value: 602644000.0 and parameters: {'num_leaves': 95, 'learning_rate': 0.2383047045187608, 'min_data_in_leaf': 535, 'feature_fraction': 0.6452550668825101, 'bagging_fraction': 0.6745085595671988}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[12]	cv_agg's valid gan_eval: 1.20529e+08 + 2.08874e+06
Training until validation scores don't improve for 92 rounds


[I 2024-12-01 19:21:22,756] Trial 41 finished with value: 605822000.0 and parameters: {'num_leaves': 59, 'learning_rate': 0.1187313058093275, 'min_data_in_leaf': 1378, 'feature_fraction': 0.7606281244479332, 'bagging_fraction': 0.8567568695068524}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[52]	cv_agg's valid gan_eval: 1.21164e+08 + 2.20218e+06
Training until validation scores don't improve for 90 rounds


[I 2024-12-01 19:21:34,880] Trial 42 finished with value: 604842000.0 and parameters: {'num_leaves': 71, 'learning_rate': 0.12215211850101743, 'min_data_in_leaf': 1446, 'feature_fraction': 0.3291502476999053, 'bagging_fraction': 0.1479415877644158}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[55]	cv_agg's valid gan_eval: 1.20968e+08 + 2.01261e+06
Training until validation scores don't improve for 82 rounds


[I 2024-12-01 19:21:46,019] Trial 43 finished with value: 604786000.0 and parameters: {'num_leaves': 144, 'learning_rate': 0.15169250459850614, 'min_data_in_leaf': 1111, 'feature_fraction': 0.4857583664083968, 'bagging_fraction': 0.7681599869853829}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[29]	cv_agg's valid gan_eval: 1.20957e+08 + 1.68982e+06
Training until validation scores don't improve for 77 rounds


[I 2024-12-01 19:21:54,720] Trial 44 finished with value: 607019000.0 and parameters: {'num_leaves': 89, 'learning_rate': 0.18421798389037256, 'min_data_in_leaf': 974, 'feature_fraction': 0.2615290519041644, 'bagging_fraction': 0.944868271748967}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[26]	cv_agg's valid gan_eval: 1.21404e+08 + 1.77677e+06
Training until validation scores don't improve for 76 rounds


[I 2024-12-01 19:22:03,131] Trial 45 finished with value: 604128000.0 and parameters: {'num_leaves': 119, 'learning_rate': 0.19042305292081516, 'min_data_in_leaf': 1044, 'feature_fraction': 0.2541449135488022, 'bagging_fraction': 0.9597743989348098}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[29]	cv_agg's valid gan_eval: 1.20826e+08 + 1.9872e+06
Training until validation scores don't improve for 73 rounds


[I 2024-12-01 19:22:09,446] Trial 46 finished with value: 602112000.0 and parameters: {'num_leaves': 152, 'learning_rate': 0.2092747633267542, 'min_data_in_leaf': 977, 'feature_fraction': 0.11636540717086474, 'bagging_fraction': 0.9371740734998492}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[27]	cv_agg's valid gan_eval: 1.20422e+08 + 1.59432e+06
Training until validation scores don't improve for 66 rounds


[I 2024-12-01 19:22:15,334] Trial 47 finished with value: 603470000.0 and parameters: {'num_leaves': 93, 'learning_rate': 0.30006758216385, 'min_data_in_leaf': 1757, 'feature_fraction': 0.1870310425651687, 'bagging_fraction': 0.8807438274925358}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[27]	cv_agg's valid gan_eval: 1.20694e+08 + 1.92645e+06
Training until validation scores don't improve for 98 rounds


[I 2024-12-01 19:22:26,274] Trial 48 finished with value: 606144000.0 and parameters: {'num_leaves': 138, 'learning_rate': 0.10210752110549524, 'min_data_in_leaf': 1517, 'feature_fraction': 0.2743969849702629, 'bagging_fraction': 0.9993531605502418}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[53]	cv_agg's valid gan_eval: 1.21229e+08 + 2.1929e+06
Training until validation scores don't improve for 77 rounds


[I 2024-12-01 19:22:35,600] Trial 49 finished with value: 607593000.0 and parameters: {'num_leaves': 108, 'learning_rate': 0.1789126249470004, 'min_data_in_leaf': 1249, 'feature_fraction': 0.6106146240907904, 'bagging_fraction': 0.799722130094101}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[33]	cv_agg's valid gan_eval: 1.21519e+08 + 2.40163e+06
Training until validation scores don't improve for 63 rounds


[I 2024-12-01 19:22:41,675] Trial 50 finished with value: 598423000.0 and parameters: {'num_leaves': 108, 'learning_rate': 0.377245471272836, 'min_data_in_leaf': 1247, 'feature_fraction': 0.6167448405983863, 'bagging_fraction': 0.7023306190107879}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[12]	cv_agg's valid gan_eval: 1.19685e+08 + 1.83486e+06
Training until validation scores don't improve for 77 rounds


[I 2024-12-01 19:22:51,638] Trial 51 finished with value: 606452000.0 and parameters: {'num_leaves': 124, 'learning_rate': 0.18184958688434616, 'min_data_in_leaf': 1311, 'feature_fraction': 0.5340584970126977, 'bagging_fraction': 0.8009725379918269}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[28]	cv_agg's valid gan_eval: 1.2129e+08 + 2.33816e+06
Training until validation scores don't improve for 81 rounds


[I 2024-12-01 19:22:59,442] Trial 52 finished with value: 606214000.0 and parameters: {'num_leaves': 100, 'learning_rate': 0.15989525469759355, 'min_data_in_leaf': 1140, 'feature_fraction': 0.673885027293231, 'bagging_fraction': 0.9359219361302861}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[27]	cv_agg's valid gan_eval: 1.21243e+08 + 2.32431e+06
Training until validation scores don't improve for 71 rounds


[I 2024-12-01 19:23:08,463] Trial 53 finished with value: 602679000.0 and parameters: {'num_leaves': 83, 'learning_rate': 0.2335809992196082, 'min_data_in_leaf': 961, 'feature_fraction': 0.3717143132931122, 'bagging_fraction': 0.8488785508889529}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[25]	cv_agg's valid gan_eval: 1.20536e+08 + 1.91739e+06
Training until validation scores don't improve for 78 rounds


[I 2024-12-01 19:23:16,518] Trial 54 finished with value: 603260000.0 and parameters: {'num_leaves': 65, 'learning_rate': 0.17599916599843787, 'min_data_in_leaf': 829, 'feature_fraction': 0.7065311659470925, 'bagging_fraction': 0.5607694537018034}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[23]	cv_agg's valid gan_eval: 1.20652e+08 + 2.01833e+06
Training until validation scores don't improve for 74 rounds


[I 2024-12-01 19:23:25,527] Trial 55 finished with value: 604849000.0 and parameters: {'num_leaves': 116, 'learning_rate': 0.20078890624962045, 'min_data_in_leaf': 1167, 'feature_fraction': 0.44835657661107137, 'bagging_fraction': 0.6124990936467805}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[22]	cv_agg's valid gan_eval: 1.2097e+08 + 1.7228e+06
Training until validation scores don't improve for 84 rounds


[I 2024-12-01 19:23:37,075] Trial 56 finished with value: 606543000.0 and parameters: {'num_leaves': 45, 'learning_rate': 0.14373851082834674, 'min_data_in_leaf': 716, 'feature_fraction': 0.3906709236514054, 'bagging_fraction': 0.7631818174027599}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[35]	cv_agg's valid gan_eval: 1.21309e+08 + 1.93893e+06
Training until validation scores don't improve for 62 rounds


[I 2024-12-01 19:23:44,438] Trial 57 finished with value: 596638000.0 and parameters: {'num_leaves': 130, 'learning_rate': 0.41196784308981027, 'min_data_in_leaf': 934, 'feature_fraction': 0.6183161342888287, 'bagging_fraction': 0.8639010412685013}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[10]	cv_agg's valid gan_eval: 1.19328e+08 + 2.09926e+06
Training until validation scores don't improve for 81 rounds


[I 2024-12-01 19:23:54,791] Trial 58 finished with value: 607131000.0 and parameters: {'num_leaves': 32, 'learning_rate': 0.15953244520065413, 'min_data_in_leaf': 1065, 'feature_fraction': 0.5029033330046067, 'bagging_fraction': 0.6648739224718085}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[26]	cv_agg's valid gan_eval: 1.21426e+08 + 2.01983e+06
Training until validation scores don't improve for 93 rounds


[I 2024-12-01 19:24:07,854] Trial 59 finished with value: 605234000.0 and parameters: {'num_leaves': 28, 'learning_rate': 0.11488904542177002, 'min_data_in_leaf': 1342, 'feature_fraction': 0.5036445122671557, 'bagging_fraction': 0.665152566217836}. Best is trial 21 with value: 609161000.0.


Early stopping, best iteration is:
[55]	cv_agg's valid gan_eval: 1.21047e+08 + 1.86952e+06


In [11]:
optuna.visualization.plot_optimization_history(study)

In [12]:
plot_param_importances(study)

In [13]:
plot_slice(study)

In [14]:
plot_contour(study)

In [15]:
plot_contour(study, params=['num_leaves','min_data_in_leaf'])

In [16]:
study.best_trial.params

{'num_leaves': 129,
 'learning_rate': 0.14285485549019958,
 'min_data_in_leaf': 1190,
 'feature_fraction': 0.41197212976185593,
 'bagging_fraction': 0.6194842328414374}

In [18]:
best_iter = study.best_trial.user_attrs["best_iter"]
best_iter

50