# Optimizacion de hiperparametros

Setup

In [1]:
import pandas as pd
import polars as pl
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour

from time import time

import pickle

In [2]:
# !gsutil cp /home/clas_giulia_s/buckets/b1/datasets/competencia_02_fe_v01_undersampled.parquet /home/clas_giulia_s/datasets/

In [3]:
# base_path = '/home/clas_giulia_s/buckets/b1/'
base_path = '/Users/ignacio/MAESTRIA/DMEF/'
dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_01_fe_modelito_undersampled.csv'

ganancia_acierto = 273000
costo_estimulo = 7000

semillas = [261823, 289871, 379817, 481301, 959723]

# data = pd.read_parquet(dataset_path + dataset_file)
data = pd.read_csv(dataset_path + dataset_file)

In [4]:
# meses_train = [201906, 201907, 201908, 201909, 201910, 201911, 201912,
#                202001, 202002, 202003, 202004, 202005, 202006,
#                202007, 202008, 202009, 202010, 202011, 202012,
#                202101, 202102, 202103, 202104, 202105] # dejo afuera 202106 para test

meses_train = [202101, 202102, 202103] # dejo afuera 202104 para test

data = data[data['foto_mes'].isin(meses_train)]
data.shape

(51606, 679)

In [5]:
# Asignamos pesos a las clases

data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [6]:
X_train = data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)
y_train_binaria = data['clase_binaria']
w_train = data['clase_peso']

In [7]:
def lgb_gan_eval(y_pred, data):
    weight = data.get_weight()
    ganancia = np.where(weight == 1.00002, ganancia_acierto, 0) - np.where(weight < 1.00002, costo_estimulo, 0)
    ganancia = ganancia[np.argsort(y_pred)[::-1]]
    ganancia = np.cumsum(ganancia)

    return 'gan_eval', np.max(ganancia) , True

# Optimizacion

In [8]:
def objective(trial):

    num_leaves = trial.suggest_int('num_leaves', 8, 200),
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.5), # mas bajo, más iteraciones necesita
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 2000),
    feature_fraction = trial.suggest_float('feature_fraction', 0.1, 1.0),
    bagging_fraction = trial.suggest_float('bagging_fraction', 0.1, 1.0),

    params = {
        'objective': 'binary',
        'metric': 'custom',
        'boosting_type': 'gbdt',
        'first_metric_only': True,
        'boost_from_average': True,
        'feature_pre_filter': False,
        'max_bin': 31,
        'num_leaves': num_leaves,
        'learning_rate': learning_rate,
        'min_data_in_leaf': min_data_in_leaf,
        'feature_fraction': feature_fraction,
        'bagging_fraction': bagging_fraction,
        'seed': semillas[0],
        'verbose': -1,
    }
    
    train_data = lgb.Dataset(X_train,
                              label=y_train_binaria, # eligir la clase
                              weight=w_train)
    
    # print(f"Learning Rate: {learning_rate}, Type: {type(learning_rate)}")
    # print(f"Learning Rate: {params['learning_rate']}, Type: {type(params['learning_rate'])}")
    
    # Use callbacks for early stopping
    rounds = int(50 + 5/learning_rate[0])
    early_stopping_cb = lgb.early_stopping(stopping_rounds= rounds)
        
    cv_results = lgb.cv(
        params,
        train_data,
        num_boost_round=10000, # modificar, subit y subir... 
        callbacks=[early_stopping_cb],
        feval=lgb_gan_eval,
        stratified=True,
        nfold=5,
        seed=semillas[0]
    )
    
    max_gan = max(cv_results['valid gan_eval-mean'])
    best_iter = cv_results['valid gan_eval-mean'].index(max_gan) + 1

    # Guardamos cual es la mejor iteración del modelo
    trial.set_user_attr("best_iter", best_iter)

    return max_gan * 5 # funcion objetivo, en el proximo paso le digo si quiero maximizarla o minimizarla.

In [9]:
storage_name = "sqlite:///" + db_path + "optimizacion_lgbm_modelito.db"
study_name = "competencia1_modelito_lgbm" # UPDATE

study = optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

[I 2024-11-29 00:11:15,865] A new study created in RDB with name: competencia1_modelito_lgbm


In [10]:
len(study.trials)

0

In [11]:
study.optimize(objective, n_trials=60)

Training until validation scores don't improve for 80 rounds


[I 2024-11-29 00:11:23,427] Trial 0 finished with value: 605808000.0 and parameters: {'num_leaves': 92, 'learning_rate': 0.16590205894469978, 'min_data_in_leaf': 1807, 'feature_fraction': 0.20108150681730297, 'bagging_fraction': 0.8148173312186163}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[53]	cv_agg's valid gan_eval: 1.21162e+08 + 2.07394e+06
Training until validation scores don't improve for 61 rounds


[I 2024-11-29 00:11:30,781] Trial 1 finished with value: 587139000.0 and parameters: {'num_leaves': 87, 'learning_rate': 0.43162019897259574, 'min_data_in_leaf': 207, 'feature_fraction': 0.9346582061887435, 'bagging_fraction': 0.8481750242583025}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[4]	cv_agg's valid gan_eval: 1.17428e+08 + 1.45146e+06
Training until validation scores don't improve for 78 rounds


[I 2024-11-29 00:11:39,833] Trial 2 finished with value: 603820000.0 and parameters: {'num_leaves': 62, 'learning_rate': 0.1756510470370908, 'min_data_in_leaf': 1432, 'feature_fraction': 0.47186660132484554, 'bagging_fraction': 0.7057986966408546}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[37]	cv_agg's valid gan_eval: 1.20764e+08 + 1.94591e+06
Training until validation scores don't improve for 60 rounds


[I 2024-11-29 00:11:48,015] Trial 3 finished with value: 583828000.0 and parameters: {'num_leaves': 62, 'learning_rate': 0.4997707412945284, 'min_data_in_leaf': 21, 'feature_fraction': 0.27770922623993316, 'bagging_fraction': 0.38746862575887}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[27]	cv_agg's valid gan_eval: 1.16766e+08 + 2.09734e+06
Training until validation scores don't improve for 66 rounds


[I 2024-11-29 00:11:53,899] Trial 4 finished with value: 599683000.0 and parameters: {'num_leaves': 175, 'learning_rate': 0.3048490265941835, 'min_data_in_leaf': 2000, 'feature_fraction': 0.4544809482686589, 'bagging_fraction': 0.6952114312187982}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[12]	cv_agg's valid gan_eval: 1.19937e+08 + 1.81604e+06
Training until validation scores don't improve for 61 rounds


[I 2024-11-29 00:12:10,791] Trial 5 finished with value: 579824000.0 and parameters: {'num_leaves': 117, 'learning_rate': 0.4455617699081071, 'min_data_in_leaf': 89, 'feature_fraction': 0.6573985187325747, 'bagging_fraction': 0.7563672512795598}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[69]	cv_agg's valid gan_eval: 1.15965e+08 + 2.03077e+06
Training until validation scores don't improve for 69 rounds


[I 2024-11-29 00:12:18,218] Trial 6 finished with value: 603981000.0 and parameters: {'num_leaves': 140, 'learning_rate': 0.2503067355229786, 'min_data_in_leaf': 1600, 'feature_fraction': 0.5128562035699757, 'bagging_fraction': 0.2792145064838544}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[26]	cv_agg's valid gan_eval: 1.20796e+08 + 2.06738e+06
Training until validation scores don't improve for 63 rounds


[I 2024-11-29 00:12:24,727] Trial 7 finished with value: 591297000.0 and parameters: {'num_leaves': 64, 'learning_rate': 0.35903330829558283, 'min_data_in_leaf': 31, 'feature_fraction': 0.2352033866947371, 'bagging_fraction': 0.31480453354624993}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[8]	cv_agg's valid gan_eval: 1.18259e+08 + 919134
Training until validation scores don't improve for 66 rounds


[I 2024-11-29 00:12:31,399] Trial 8 finished with value: 597849000.0 and parameters: {'num_leaves': 109, 'learning_rate': 0.3123484488958601, 'min_data_in_leaf': 528, 'feature_fraction': 0.26605805847570047, 'bagging_fraction': 0.8801957928479496}. Best is trial 0 with value: 605808000.0.


Early stopping, best iteration is:
[9]	cv_agg's valid gan_eval: 1.1957e+08 + 1.74194e+06
Training until validation scores don't improve for 89 rounds


[I 2024-11-29 00:12:39,967] Trial 9 finished with value: 606991000.0 and parameters: {'num_leaves': 68, 'learning_rate': 0.12681818895595842, 'min_data_in_leaf': 849, 'feature_fraction': 0.1790543707090129, 'bagging_fraction': 0.8161152155064428}. Best is trial 9 with value: 606991000.0.


Early stopping, best iteration is:
[55]	cv_agg's valid gan_eval: 1.21398e+08 + 2.03323e+06
Training until validation scores don't improve for 275 rounds


[I 2024-11-29 00:13:03,854] Trial 10 finished with value: 608755000.0 and parameters: {'num_leaves': 10, 'learning_rate': 0.022150491821207444, 'min_data_in_leaf': 987, 'feature_fraction': 0.7418054528131783, 'bagging_fraction': 0.5135635741695919}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[545]	cv_agg's valid gan_eval: 1.21751e+08 + 2.38795e+06
Training until validation scores don't improve for 538 rounds


[I 2024-11-29 00:13:51,933] Trial 11 finished with value: 607201000.0 and parameters: {'num_leaves': 11, 'learning_rate': 0.010227457125731592, 'min_data_in_leaf': 968, 'feature_fraction': 0.766353763420427, 'bagging_fraction': 0.5284137671068697}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[977]	cv_agg's valid gan_eval: 1.2144e+08 + 2.34322e+06
Training until validation scores don't improve for 206 rounds


[I 2024-11-29 00:14:07,788] Trial 12 finished with value: 607761000.0 and parameters: {'num_leaves': 9, 'learning_rate': 0.03185318062478934, 'min_data_in_leaf': 1143, 'feature_fraction': 0.7736393420779046, 'bagging_fraction': 0.5177199703327249}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[311]	cv_agg's valid gan_eval: 1.21552e+08 + 2.43941e+06
Training until validation scores don't improve for 295 rounds


[I 2024-11-29 00:14:32,072] Trial 13 finished with value: 608202000.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.020379794125813755, 'min_data_in_leaf': 1262, 'feature_fraction': 0.8419358443509979, 'bagging_fraction': 0.5193667286465051}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[530]	cv_agg's valid gan_eval: 1.2164e+08 + 2.67052e+06
Training until validation scores don't improve for 102 rounds


[I 2024-11-29 00:14:41,087] Trial 14 finished with value: 605465000.0 and parameters: {'num_leaves': 35, 'learning_rate': 0.09503171163638492, 'min_data_in_leaf': 1274, 'feature_fraction': 0.9818912807628669, 'bagging_fraction': 0.14383326129269564}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[62]	cv_agg's valid gan_eval: 1.21093e+08 + 2.1255e+06
Training until validation scores don't improve for 124 rounds


[I 2024-11-29 00:14:56,539] Trial 15 finished with value: 606564000.0 and parameters: {'num_leaves': 33, 'learning_rate': 0.06735575256362072, 'min_data_in_leaf': 881, 'feature_fraction': 0.8373485640856828, 'bagging_fraction': 0.5929248695325122}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[142]	cv_agg's valid gan_eval: 1.21313e+08 + 1.7262e+06
Training until validation scores don't improve for 73 rounds


[I 2024-11-29 00:15:03,438] Trial 16 finished with value: 603470000.0 and parameters: {'num_leaves': 35, 'learning_rate': 0.21532425597202148, 'min_data_in_leaf': 695, 'feature_fraction': 0.6646966686469162, 'bagging_fraction': 0.4251338650451101}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[20]	cv_agg's valid gan_eval: 1.20694e+08 + 2.23948e+06
Training until validation scores don't improve for 124 rounds


[I 2024-11-29 00:15:22,255] Trial 17 finished with value: 607068000.0 and parameters: {'num_leaves': 154, 'learning_rate': 0.06694677435712171, 'min_data_in_leaf': 479, 'feature_fraction': 0.6348089414847016, 'bagging_fraction': 0.5625862261659423}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[57]	cv_agg's valid gan_eval: 1.21414e+08 + 1.62096e+06
Training until validation scores don't improve for 91 rounds


[I 2024-11-29 00:15:32,410] Trial 18 finished with value: 606697000.0 and parameters: {'num_leaves': 200, 'learning_rate': 0.12096655168356694, 'min_data_in_leaf': 1222, 'feature_fraction': 0.8892167101529468, 'bagging_fraction': 0.13441203318241868}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[65]	cv_agg's valid gan_eval: 1.21339e+08 + 2.02509e+06
Training until validation scores don't improve for 323 rounds


[I 2024-11-29 00:16:18,564] Trial 19 finished with value: 606690000.0 and parameters: {'num_leaves': 25, 'learning_rate': 0.018306768886524216, 'min_data_in_leaf': 1474, 'feature_fraction': 0.7722583579730489, 'bagging_fraction': 0.9677790682168869}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[501]	cv_agg's valid gan_eval: 1.21338e+08 + 2.20372e+06
Training until validation scores don't improve for 80 rounds


[I 2024-11-29 00:16:29,255] Trial 20 finished with value: 606095000.0 and parameters: {'num_leaves': 48, 'learning_rate': 0.1652711895580185, 'min_data_in_leaf': 1093, 'feature_fraction': 0.586692063842891, 'bagging_fraction': 0.4283042751088429}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[36]	cv_agg's valid gan_eval: 1.21219e+08 + 2.14457e+06
Training until validation scores don't improve for 145 rounds


[I 2024-11-29 00:16:44,843] Trial 21 finished with value: 606844000.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.052304127391142846, 'min_data_in_leaf': 1166, 'feature_fraction': 0.7776917951200104, 'bagging_fraction': 0.49811143854116324}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[205]	cv_agg's valid gan_eval: 1.21369e+08 + 2.56916e+06
Training until validation scores don't improve for 342 rounds


[I 2024-11-29 00:17:17,752] Trial 22 finished with value: 607397000.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.017103135917614355, 'min_data_in_leaf': 1378, 'feature_fraction': 0.8646743018249216, 'bagging_fraction': 0.6234067957322991}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[599]	cv_agg's valid gan_eval: 1.21479e+08 + 2.28893e+06
Training until validation scores don't improve for 97 rounds


[I 2024-11-29 00:17:29,893] Trial 23 finished with value: 606123000.0 and parameters: {'num_leaves': 22, 'learning_rate': 0.1053421369912894, 'min_data_in_leaf': 1635, 'feature_fraction': 0.7100770248168906, 'bagging_fraction': 0.47598170174864973}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[113]	cv_agg's valid gan_eval: 1.21225e+08 + 2.17321e+06
Training until validation scores don't improve for 135 rounds


[I 2024-11-29 00:17:48,708] Trial 24 finished with value: 606557000.0 and parameters: {'num_leaves': 50, 'learning_rate': 0.05879110353036679, 'min_data_in_leaf': 740, 'feature_fraction': 0.9955640765888834, 'bagging_fraction': 0.32288391918259507}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[94]	cv_agg's valid gan_eval: 1.21311e+08 + 1.8801e+06
Training until validation scores don't improve for 166 rounds


[I 2024-11-29 00:18:10,895] Trial 25 finished with value: 607145000.0 and parameters: {'num_leaves': 45, 'learning_rate': 0.042865634173209544, 'min_data_in_leaf': 1000, 'feature_fraction': 0.8004710460978604, 'bagging_fraction': 0.6400389382468396}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[153]	cv_agg's valid gan_eval: 1.21429e+08 + 2.10008e+06
Training until validation scores don't improve for 107 rounds


[I 2024-11-29 00:18:22,563] Trial 26 finished with value: 607194000.0 and parameters: {'num_leaves': 21, 'learning_rate': 0.08656592020856475, 'min_data_in_leaf': 1099, 'feature_fraction': 0.7164169650188944, 'bagging_fraction': 0.2199750350570372}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[73]	cv_agg's valid gan_eval: 1.21439e+08 + 1.90449e+06
Training until validation scores don't improve for 82 rounds


[I 2024-11-29 00:18:30,461] Trial 27 finished with value: 604989000.0 and parameters: {'num_leaves': 82, 'learning_rate': 0.15305613462157722, 'min_data_in_leaf': 1289, 'feature_fraction': 0.9053268089299479, 'bagging_fraction': 0.3730205913395134}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[32]	cv_agg's valid gan_eval: 1.20998e+08 + 2.1783e+06
Training until validation scores don't improve for 74 rounds


[I 2024-11-29 00:18:39,125] Trial 28 finished with value: 601944000.0 and parameters: {'num_leaves': 20, 'learning_rate': 0.2079877858668121, 'min_data_in_leaf': 504, 'feature_fraction': 0.34586289506447443, 'bagging_fraction': 0.4881390634044922}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[35]	cv_agg's valid gan_eval: 1.20389e+08 + 1.61604e+06
Training until validation scores don't improve for 501 rounds


[I 2024-11-29 00:20:36,793] Trial 29 finished with value: 606088000.0 and parameters: {'num_leaves': 42, 'learning_rate': 0.011074212418101498, 'min_data_in_leaf': 1876, 'feature_fraction': 0.5936415430923463, 'bagging_fraction': 0.6596546500656982}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[1204]	cv_agg's valid gan_eval: 1.21218e+08 + 1.9711e+06
Training until validation scores don't improve for 86 rounds


[I 2024-11-29 00:20:45,175] Trial 30 finished with value: 606088000.0 and parameters: {'num_leaves': 77, 'learning_rate': 0.13805083464447898, 'min_data_in_leaf': 1559, 'feature_fraction': 0.7131030000785364, 'bagging_fraction': 0.5611446775692448}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[53]	cv_agg's valid gan_eval: 1.21218e+08 + 2.11428e+06
Training until validation scores don't improve for 235 rounds


[I 2024-11-29 00:21:07,255] Trial 31 finished with value: 608216000.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.02691534152675993, 'min_data_in_leaf': 1349, 'feature_fraction': 0.8474367283619626, 'bagging_fraction': 0.6146707285994735}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[406]	cv_agg's valid gan_eval: 1.21643e+08 + 2.28618e+06
Training until validation scores don't improve for 162 rounds


[I 2024-11-29 00:21:24,494] Trial 32 finished with value: 606438000.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.044530585983928506, 'min_data_in_leaf': 1688, 'feature_fraction': 0.9373247173767406, 'bagging_fraction': 0.7444950301261868}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[311]	cv_agg's valid gan_eval: 1.21288e+08 + 2.44807e+06
Training until validation scores don't improve for 107 rounds


[I 2024-11-29 00:21:34,781] Trial 33 finished with value: 606921000.0 and parameters: {'num_leaves': 26, 'learning_rate': 0.08736451869636128, 'min_data_in_leaf': 1413, 'feature_fraction': 0.8404382012561379, 'bagging_fraction': 0.44144246969115536}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[59]	cv_agg's valid gan_eval: 1.21384e+08 + 1.98643e+06
Training until validation scores don't improve for 177 rounds


[I 2024-11-29 00:21:57,733] Trial 34 finished with value: 608405000.0 and parameters: {'num_leaves': 95, 'learning_rate': 0.03919577887298921, 'min_data_in_leaf': 1097, 'feature_fraction': 0.9440949238267096, 'bagging_fraction': 0.5302431004607578}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[166]	cv_agg's valid gan_eval: 1.21681e+08 + 1.81347e+06
Training until validation scores don't improve for 111 rounds


[I 2024-11-29 00:22:09,128] Trial 35 finished with value: 604730000.0 and parameters: {'num_leaves': 90, 'learning_rate': 0.08101504502552456, 'min_data_in_leaf': 889, 'feature_fraction': 0.11922783348961691, 'bagging_fraction': 0.5938634928541984}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[76]	cv_agg's valid gan_eval: 1.20946e+08 + 2.28247e+06
Training until validation scores don't improve for 93 rounds


[I 2024-11-29 00:22:18,958] Trial 36 finished with value: 607369000.0 and parameters: {'num_leaves': 101, 'learning_rate': 0.11370770538485568, 'min_data_in_leaf': 1345, 'feature_fraction': 0.9170143959386692, 'bagging_fraction': 0.6912164835299828}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[59]	cv_agg's valid gan_eval: 1.21474e+08 + 2.02441e+06
Training until validation scores don't improve for 77 rounds


[I 2024-11-29 00:22:25,962] Trial 37 finished with value: 601685000.0 and parameters: {'num_leaves': 128, 'learning_rate': 0.18043121502451903, 'min_data_in_leaf': 1752, 'feature_fraction': 0.9525057657119205, 'bagging_fraction': 0.741669198830893}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[36]	cv_agg's valid gan_eval: 1.20337e+08 + 2.13901e+06
Training until validation scores don't improve for 174 rounds


[I 2024-11-29 00:22:49,171] Trial 38 finished with value: 607110000.0 and parameters: {'num_leaves': 161, 'learning_rate': 0.04015749894345536, 'min_data_in_leaf': 1508, 'feature_fraction': 0.8202917827423185, 'bagging_fraction': 0.35208997382863594}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[238]	cv_agg's valid gan_eval: 1.21422e+08 + 1.98028e+06
Training until validation scores don't improve for 63 rounds


[I 2024-11-29 00:22:55,731] Trial 39 finished with value: 597772000.0 and parameters: {'num_leaves': 54, 'learning_rate': 0.38333040713171396, 'min_data_in_leaf': 728, 'feature_fraction': 0.8780668724322158, 'bagging_fraction': 0.5917670589604842}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[10]	cv_agg's valid gan_eval: 1.19554e+08 + 1.80092e+06
Training until validation scores don't improve for 67 rounds


[I 2024-11-29 00:23:03,604] Trial 40 finished with value: 602917000.0 and parameters: {'num_leaves': 196, 'learning_rate': 0.2849413099043189, 'min_data_in_leaf': 1034, 'feature_fraction': 0.4447517572263404, 'bagging_fraction': 0.4579298205202106}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[12]	cv_agg's valid gan_eval: 1.20583e+08 + 1.82931e+06
Training until validation scores don't improve for 192 rounds


[I 2024-11-29 00:23:33,212] Trial 41 finished with value: 607894000.0 and parameters: {'num_leaves': 30, 'learning_rate': 0.03512120763058125, 'min_data_in_leaf': 1194, 'feature_fraction': 0.7428243269299659, 'bagging_fraction': 0.5226307432561227}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[266]	cv_agg's valid gan_eval: 1.21579e+08 + 1.92466e+06
Training until validation scores don't improve for 131 rounds


[I 2024-11-29 00:23:46,978] Trial 42 finished with value: 608146000.0 and parameters: {'num_leaves': 16, 'learning_rate': 0.06118877064645903, 'min_data_in_leaf': 1223, 'feature_fraction': 0.7305505989051485, 'bagging_fraction': 0.5447688740448585}. Best is trial 10 with value: 608755000.0.


Early stopping, best iteration is:
[100]	cv_agg's valid gan_eval: 1.21629e+08 + 2.06197e+06
Training until validation scores don't improve for 126 rounds


[I 2024-11-29 00:24:02,318] Trial 43 finished with value: 610491000.0 and parameters: {'num_leaves': 16, 'learning_rate': 0.06537511998403012, 'min_data_in_leaf': 1295, 'feature_fraction': 0.6760496941965302, 'bagging_fraction': 0.6807642069212421}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[139]	cv_agg's valid gan_eval: 1.22098e+08 + 2.1468e+06
Training until validation scores don't improve for 60 rounds


[I 2024-11-29 00:24:09,291] Trial 44 finished with value: 600999000.0 and parameters: {'num_leaves': 40, 'learning_rate': 0.4787519614287913, 'min_data_in_leaf': 1328, 'feature_fraction': 0.655154706431596, 'bagging_fraction': 0.691987860999105}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[7]	cv_agg's valid gan_eval: 1.202e+08 + 2.32441e+06
Training until validation scores don't improve for 114 rounds


[I 2024-11-29 00:24:27,406] Trial 45 finished with value: 606270000.0 and parameters: {'num_leaves': 58, 'learning_rate': 0.07693982673530042, 'min_data_in_leaf': 930, 'feature_fraction': 0.8208071248399762, 'bagging_fraction': 0.8119283301549393}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[120]	cv_agg's valid gan_eval: 1.21254e+08 + 2.5869e+06
Training until validation scores don't improve for 254 rounds


[I 2024-11-29 00:25:02,206] Trial 46 finished with value: 607432000.0 and parameters: {'num_leaves': 72, 'learning_rate': 0.024401001625607585, 'min_data_in_leaf': 1042, 'feature_fraction': 0.9515035132978955, 'bagging_fraction': 0.6391027193960584}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[232]	cv_agg's valid gan_eval: 1.21486e+08 + 1.96243e+06
Training until validation scores don't improve for 85 rounds


[I 2024-11-29 00:25:14,538] Trial 47 finished with value: 607509000.0 and parameters: {'num_leaves': 17, 'learning_rate': 0.14054256838065116, 'min_data_in_leaf': 816, 'feature_fraction': 0.5234868410699182, 'bagging_fraction': 0.7889603745017987}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[54]	cv_agg's valid gan_eval: 1.21502e+08 + 2.19404e+06
Training until validation scores don't improve for 96 rounds


[I 2024-11-29 00:25:33,210] Trial 48 finished with value: 604247000.0 and parameters: {'num_leaves': 135, 'learning_rate': 0.1074021197613908, 'min_data_in_leaf': 229, 'feature_fraction': 0.6827948493191824, 'bagging_fraction': 0.8734474889241277}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[22]	cv_agg's valid gan_eval: 1.20849e+08 + 2.00383e+06
Training until validation scores don't improve for 136 rounds


[I 2024-11-29 00:25:50,172] Trial 49 finished with value: 606697000.0 and parameters: {'num_leaves': 33, 'learning_rate': 0.05797599145805908, 'min_data_in_leaf': 1449, 'feature_fraction': 0.8579256162926288, 'bagging_fraction': 0.39016129404949673}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[139]	cv_agg's valid gan_eval: 1.21339e+08 + 1.78498e+06
Training until validation scores don't improve for 542 rounds


[I 2024-11-29 00:27:09,082] Trial 50 finished with value: 607950000.0 and parameters: {'num_leaves': 117, 'learning_rate': 0.010160220984731183, 'min_data_in_leaf': 1111, 'feature_fraction': 0.8075567187579257, 'bagging_fraction': 0.7136737529422751}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[644]	cv_agg's valid gan_eval: 1.2159e+08 + 2.05147e+06
Training until validation scores don't improve for 123 rounds


[I 2024-11-29 00:27:20,698] Trial 51 finished with value: 606613000.0 and parameters: {'num_leaves': 16, 'learning_rate': 0.06842211597413633, 'min_data_in_leaf': 1259, 'feature_fraction': 0.7553771743984748, 'bagging_fraction': 0.5502349838039875}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[83]	cv_agg's valid gan_eval: 1.21323e+08 + 2.11279e+06
Training until validation scores don't improve for 194 rounds


[I 2024-11-29 00:27:49,488] Trial 52 finished with value: 608930000.0 and parameters: {'num_leaves': 14, 'learning_rate': 0.034660061785605975, 'min_data_in_leaf': 1172, 'feature_fraction': 0.590591316184365, 'bagging_fraction': 0.6092632210833665}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[225]	cv_agg's valid gan_eval: 1.21786e+08 + 2.12815e+06
Training until validation scores don't improve for 164 rounds


[I 2024-11-29 00:28:13,674] Trial 53 finished with value: 607600000.0 and parameters: {'num_leaves': 25, 'learning_rate': 0.04370476680661608, 'min_data_in_leaf': 967, 'feature_fraction': 0.6113430894183342, 'bagging_fraction': 0.5827026227460055}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[171]	cv_agg's valid gan_eval: 1.2152e+08 + 1.92709e+06
Training until validation scores don't improve for 192 rounds


[I 2024-11-29 00:28:44,291] Trial 54 finished with value: 608237000.0 and parameters: {'num_leaves': 14, 'learning_rate': 0.03499795729630839, 'min_data_in_leaf': 1166, 'feature_fraction': 0.5626100665337322, 'bagging_fraction': 0.6704711181630505}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[188]	cv_agg's valid gan_eval: 1.21647e+08 + 2.35418e+06
Training until validation scores don't improve for 191 rounds


[I 2024-11-29 00:29:21,619] Trial 55 finished with value: 608335000.0 and parameters: {'num_leaves': 16, 'learning_rate': 0.03524348364623114, 'min_data_in_leaf': 1154, 'feature_fraction': 0.5498067585035321, 'bagging_fraction': 0.6692353022929081}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[270]	cv_agg's valid gan_eval: 1.21667e+08 + 2.26886e+06
Training until validation scores don't improve for 104 rounds


[I 2024-11-29 00:29:39,317] Trial 56 finished with value: 608426000.0 and parameters: {'num_leaves': 38, 'learning_rate': 0.09210306445514575, 'min_data_in_leaf': 647, 'feature_fraction': 0.47365916237044065, 'bagging_fraction': 0.6824398355242607}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[59]	cv_agg's valid gan_eval: 1.21685e+08 + 2.08531e+06
Training until validation scores don't improve for 103 rounds


[I 2024-11-29 00:29:52,641] Trial 57 finished with value: 606487000.0 and parameters: {'num_leaves': 38, 'learning_rate': 0.09431239210705332, 'min_data_in_leaf': 274, 'feature_fraction': 0.4266324762263489, 'bagging_fraction': 0.7755295685728675}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[26]	cv_agg's valid gan_eval: 1.21297e+08 + 1.99994e+06
Training until validation scores don't improve for 89 rounds


[I 2024-11-29 00:30:07,724] Trial 58 finished with value: 606242000.0 and parameters: {'num_leaves': 63, 'learning_rate': 0.12687282751588172, 'min_data_in_leaf': 398, 'feature_fraction': 0.3957237521780697, 'bagging_fraction': 0.7089981505932664}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[35]	cv_agg's valid gan_eval: 1.21248e+08 + 1.40004e+06
Training until validation scores don't improve for 63 rounds


[I 2024-11-29 00:30:15,653] Trial 59 finished with value: 601615000.0 and parameters: {'num_leaves': 30, 'learning_rate': 0.3598843862584943, 'min_data_in_leaf': 1050, 'feature_fraction': 0.4883058442811445, 'bagging_fraction': 0.9308002294087376}. Best is trial 43 with value: 610491000.0.


Early stopping, best iteration is:
[10]	cv_agg's valid gan_eval: 1.20323e+08 + 1.80388e+06


In [12]:
optuna.visualization.plot_optimization_history(study)

In [13]:
plot_param_importances(study)

In [14]:
plot_slice(study)

In [15]:
plot_contour(study)

In [16]:
plot_contour(study, params=['num_leaves','min_data_in_leaf'])

In [17]:
study.best_trial.params

{'num_leaves': 16,
 'learning_rate': 0.06537511998403012,
 'min_data_in_leaf': 1295,
 'feature_fraction': 0.6760496941965302,
 'bagging_fraction': 0.6807642069212421}

In [18]:
best_iter = study.best_trial.user_attrs["best_iter"]
best_iter

139