# Modelo

In [1]:
import pandas as pd
import polars as pl
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour

from time import time

import pickle

In [None]:
base_path = '/home/cburich_pymnts/buckets/b1/'
dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'


# base_path = 'C:/Users/Cristian Burich/Desktop/MA/segundo/eyf/'
# dataset_path = base_path + 'datasets/'
# modelos_path = base_path + 'modelos/'
# db_path = base_path + 'db/'


dataset_file = 'competencia_03_fe_U_k300.parquet' 

ganancia_acierto = 273000
costo_estimulo = 7000

# agregue sus semillas
semillas = [165229,165211,165203,165237,165247]

# data = pd.read_parquet('/home/eanegrin/datasets/' + dataset_file)
data = pd.read_parquet(dataset_path + dataset_file)

In [None]:
data = data.drop(columns=['clase_ternaria_1', 'tmobile_app', 'cmobile_app_trx', 'foto_mes'])

In [None]:
# Asegúrate de que ctrx_quarter es numérico
data['ctrx_quarter_normalizado'] = pd.to_numeric(data['ctrx_quarter'], errors='coerce')

# Aplicar las condiciones para normalizar ctrx_quarter
data.loc[data['cliente_antiguedad'] == 1, 'ctrx_quarter_normalizado'] = data['ctrx_quarter'] * 5
data.loc[data['cliente_antiguedad'] == 2, 'ctrx_quarter_normalizado'] = data['ctrx_quarter'] * 2
data.loc[data['cliente_antiguedad'] == 3, 'ctrx_quarter_normalizado'] = data['ctrx_quarter'] * 1.2

In [None]:
data.shape

In [None]:
meses_train = [201906, 201907, 201908, 201909, 201910, 201911, 201912,
               202001, 202002, 202003, 202004, 202005, 202007, 202008,
               202009, 202010, 202011, 202012, 202101, 202102, 202103, 202104, 202105, 202106, 202107] # dejo afuera 202006 por que es horrible


(376300, 679)

In [5]:
# Asignamos pesos a las clases

data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [None]:
train_data = data[data['foto_mes'].isin(meses_train)]

X_train = train_data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria1','clase_binaria2', 'foto_mes'], axis=1)
y_train_binaria1 = train_data['clase_binaria1'] # Solo BAJA+2
y_train_binaria2 = train_data['clase_binaria2'] # junta a los 2 BAJA
w_train = train_data['clase_peso']

In [7]:
def lgb_gan_eval(y_pred, data):
    weight = data.get_weight()
    ganancia = np.where(weight == 1.00002, ganancia_acierto, 0) - np.where(weight < 1.00002, costo_estimulo, 0)
    ganancia = ganancia[np.argsort(y_pred)[::-1]]
    ganancia = np.cumsum(ganancia)

    return 'gan_eval', np.max(ganancia) , True

# Optimizacion

In [None]:
def objective(trial):

    num_leaves = trial.suggest_int('num_leaves', 8, 150), 
    learning_rate = trial.suggest_float('learning_rate', 0.005, 0.05), 
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 2000),
    feature_fraction = trial.suggest_float('feature_fraction', 0.1, 1.0),
    bagging_fraction = trial.suggest_float('bagging_fraction', 0.1, 1.0),

    params = {
        'objective': 'binary',
        'metric': 'custom',
        'boosting_type': 'gbdt',
        'first_metric_only': True,
        'boost_from_average': True,
        'feature_pre_filter': False,
        'max_bin': 31,
        'num_leaves': num_leaves,
        'learning_rate': learning_rate,
        'min_data_in_leaf': min_data_in_leaf,
        'feature_fraction': feature_fraction,
        'bagging_fraction': bagging_fraction,
        'seed': semillas[0],
        'verbose': -1
    }
    
    train_data = lgb.Dataset(X_train,
                              label=y_train_binaria2, # eligir la clase
                              weight=w_train)   

    
        
    cv_results = lgb.cv(
        params,
        train_data,
        num_boost_round=100, 
          #  early_stopping_rounds= int(50 + 5 / learning_rate),
        feval=lgb_gan_eval,
        stratified=True,
        nfold=5,
        seed=semillas[0]
    )
    
    max_gan = max(cv_results['valid gan_eval-mean'])
    best_iter = cv_results['valid gan_eval-mean'].index(max_gan) + 1

    # Guardamos cual es la mejor iteración del modelo
    trial.set_user_attr("best_iter", best_iter)

    return max_gan * 5 # funcion objetivo, en el proximo paso le digo si quiero maximizarla o minimizarla.

In [None]:
storage_name = "sqlite:///" + db_path + "optimization_lgbm.db"
study_name = "competencia3_lgbm_k302" # UPDATE

study = optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

[I 2024-11-17 20:38:17,209] Using an existing study with name 'competencia2_lgbm_v08' instead of creating a new one.


In [None]:
study.optimize(objective, n_trials=100) # Habia hecho 30, hago 70 mas porque parece que es un modelo superior

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[550]	cv_agg's valid gan_eval: 6.62284e+08 + 1.61155e+06


[I 2024-11-17 20:42:22,083] Trial 30 finished with value: 3311420000.0 and parameters: {'num_leaves': 102, 'learning_rate': 0.02212150442747906, 'min_data_in_leaf': 708, 'feature_fraction': 0.2987543446016156, 'bagging_fraction': 0.6157890963391109}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[527]	cv_agg's valid gan_eval: 6.59637e+08 + 4.10708e+06


[I 2024-11-17 20:45:02,999] Trial 31 finished with value: 3298183000.0 and parameters: {'num_leaves': 56, 'learning_rate': 0.014310366137917837, 'min_data_in_leaf': 1305, 'feature_fraction': 0.21008685324297327, 'bagging_fraction': 0.9150656789105077}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[837]	cv_agg's valid gan_eval: 6.63186e+08 + 3.6387e+06


[I 2024-11-17 20:48:18,455] Trial 32 finished with value: 3315928000.0 and parameters: {'num_leaves': 69, 'learning_rate': 0.015070882836400915, 'min_data_in_leaf': 1261, 'feature_fraction': 0.20153274827066678, 'bagging_fraction': 0.9257698556265321}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[596]	cv_agg's valid gan_eval: 6.63212e+08 + 3.09388e+06


[I 2024-11-17 20:51:48,967] Trial 33 finished with value: 3316061000.0 and parameters: {'num_leaves': 124, 'learning_rate': 0.017441053996761297, 'min_data_in_leaf': 927, 'feature_fraction': 0.3754084447336967, 'bagging_fraction': 0.8978770013614338}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[439]	cv_agg's valid gan_eval: 6.59002e+08 + 3.11383e+06


[I 2024-11-17 20:54:30,868] Trial 34 finished with value: 3295012000.0 and parameters: {'num_leaves': 126, 'learning_rate': 0.017581102987027773, 'min_data_in_leaf': 83, 'feature_fraction': 0.37132120427186877, 'bagging_fraction': 0.7974877820881695}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[542]	cv_agg's valid gan_eval: 6.61553e+08 + 3.79131e+06


[I 2024-11-17 20:56:41,237] Trial 35 finished with value: 3307766000.0 and parameters: {'num_leaves': 72, 'learning_rate': 0.01971903188394312, 'min_data_in_leaf': 933, 'feature_fraction': 0.1629064809800376, 'bagging_fraction': 0.3015955141752421}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[588]	cv_agg's valid gan_eval: 6.63499e+08 + 3.31275e+06


[I 2024-11-17 21:00:24,080] Trial 36 finished with value: 3317496000.0 and parameters: {'num_leaves': 147, 'learning_rate': 0.01721194270123365, 'min_data_in_leaf': 1467, 'feature_fraction': 0.404703993856656, 'bagging_fraction': 0.699639505673058}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[615]	cv_agg's valid gan_eval: 6.6116e+08 + 3.16331e+06


[I 2024-11-17 21:04:39,835] Trial 37 finished with value: 3305799000.0 and parameters: {'num_leaves': 149, 'learning_rate': 0.010959348535982284, 'min_data_in_leaf': 1455, 'feature_fraction': 0.4898981616705105, 'bagging_fraction': 0.7234034339862323}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[652]	cv_agg's valid gan_eval: 6.62672e+08 + 3.55182e+06


[I 2024-11-17 21:08:45,448] Trial 38 finished with value: 3313359000.0 and parameters: {'num_leaves': 140, 'learning_rate': 0.016208830763379777, 'min_data_in_leaf': 658, 'feature_fraction': 0.4073626437821056, 'bagging_fraction': 0.6631640680028003}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[367]	cv_agg's valid gan_eval: 6.60647e+08 + 2.44367e+06


[I 2024-11-17 21:10:55,679] Trial 39 finished with value: 3303237000.0 and parameters: {'num_leaves': 122, 'learning_rate': 0.021601983368728175, 'min_data_in_leaf': 890, 'feature_fraction': 0.3317964252082074, 'bagging_fraction': 0.8625175868995857}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds


[I 2024-11-17 21:13:41,948] Trial 40 finished with value: 3297938000.0 and parameters: {'num_leaves': 144, 'learning_rate': 0.029879056777414176, 'min_data_in_leaf': 1604, 'feature_fraction': 0.6305125649598597, 'bagging_fraction': 0.8014477674203011}. Best is trial 22 with value: 3319092000.0.


Early stopping, best iteration is:
[228]	cv_agg's valid gan_eval: 6.59588e+08 + 3.75489e+06
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[467]	cv_agg's valid gan_eval: 6.63113e+08 + 2.88016e+06


[I 2024-11-17 21:16:39,149] Trial 41 finished with value: 3315564000.0 and parameters: {'num_leaves': 130, 'learning_rate': 0.018430614392140485, 'min_data_in_leaf': 1026, 'feature_fraction': 0.38544937538580193, 'bagging_fraction': 0.5281538014387627}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[385]	cv_agg's valid gan_eval: 6.62043e+08 + 2.89164e+06


[I 2024-11-17 21:19:49,509] Trial 42 finished with value: 3310216000.0 and parameters: {'num_leaves': 132, 'learning_rate': 0.024618573547425096, 'min_data_in_leaf': 1089, 'feature_fraction': 0.38299357081757346, 'bagging_fraction': 0.5447453246911078}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[561]	cv_agg's valid gan_eval: 6.62315e+08 + 3.70096e+06


[I 2024-11-17 21:24:19,731] Trial 43 finished with value: 3311574000.0 and parameters: {'num_leaves': 125, 'learning_rate': 0.018117360038619763, 'min_data_in_leaf': 1003, 'feature_fraction': 0.4581718850508089, 'bagging_fraction': 0.4393479952213241}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[642]	cv_agg's valid gan_eval: 6.61655e+08 + 2.93494e+06


[I 2024-11-17 21:28:41,469] Trial 44 finished with value: 3308277000.0 and parameters: {'num_leaves': 139, 'learning_rate': 0.015059713715366903, 'min_data_in_leaf': 1400, 'feature_fraction': 0.5177862951572543, 'bagging_fraction': 0.6351062287588215}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[634]	cv_agg's valid gan_eval: 6.6297e+08 + 3.06203e+06


[I 2024-11-17 21:32:28,979] Trial 45 finished with value: 3314850000.0 and parameters: {'num_leaves': 120, 'learning_rate': 0.020986400351113054, 'min_data_in_leaf': 1246, 'feature_fraction': 0.3910318794843447, 'bagging_fraction': 0.49800054242640507}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[653]	cv_agg's valid gan_eval: 6.61517e+08 + 2.28628e+06


[I 2024-11-17 21:35:59,570] Trial 46 finished with value: 3307584000.0 and parameters: {'num_leaves': 131, 'learning_rate': 0.018230409812003835, 'min_data_in_leaf': 468, 'feature_fraction': 0.33416859281609324, 'bagging_fraction': 0.7131110212481725}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[838]	cv_agg's valid gan_eval: 6.61933e+08 + 2.86801e+06


[I 2024-11-17 21:41:54,817] Trial 47 finished with value: 3309663000.0 and parameters: {'num_leaves': 150, 'learning_rate': 0.011695972072845482, 'min_data_in_leaf': 1727, 'feature_fraction': 0.43840346854655526, 'bagging_fraction': 0.9401896736945718}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[976]	cv_agg's valid gan_eval: 6.60327e+08 + 3.37783e+06


[I 2024-11-17 21:47:27,848] Trial 48 finished with value: 3301634000.0 and parameters: {'num_leaves': 94, 'learning_rate': 0.0068200992286415515, 'min_data_in_leaf': 730, 'feature_fraction': 0.23398150523415762, 'bagging_fraction': 0.5543646019372797}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[307]	cv_agg's valid gan_eval: 6.60822e+08 + 2.99659e+06


[I 2024-11-17 21:49:49,785] Trial 49 finished with value: 3304112000.0 and parameters: {'num_leaves': 67, 'learning_rate': 0.033870081721058376, 'min_data_in_leaf': 1088, 'feature_fraction': 0.3637097951957839, 'bagging_fraction': 0.87577109035577}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[591]	cv_agg's valid gan_eval: 6.59754e+08 + 2.85774e+06


[I 2024-11-17 21:52:59,082] Trial 50 finished with value: 3298771000.0 and parameters: {'num_leaves': 87, 'learning_rate': 0.015179968824868573, 'min_data_in_leaf': 569, 'feature_fraction': 0.9307541277197462, 'bagging_fraction': 0.46480760851416447}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[984]	cv_agg's valid gan_eval: 6.6108e+08 + 3.40696e+06


[I 2024-11-17 21:57:13,909] Trial 51 finished with value: 3305400000.0 and parameters: {'num_leaves': 33, 'learning_rate': 0.012716204359215413, 'min_data_in_leaf': 1271, 'feature_fraction': 0.13033926097637072, 'bagging_fraction': 0.3598268476775405}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[538]	cv_agg's valid gan_eval: 6.60358e+08 + 4.04714e+06


[I 2024-11-17 22:00:01,783] Trial 52 finished with value: 3301788000.0 and parameters: {'num_leaves': 48, 'learning_rate': 0.01762055757096998, 'min_data_in_leaf': 1396, 'feature_fraction': 0.1918109994407713, 'bagging_fraction': 0.9996888073739033}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[373]	cv_agg's valid gan_eval: 6.62019e+08 + 2.79964e+06


[I 2024-11-17 22:02:48,848] Trial 53 finished with value: 3310097000.0 and parameters: {'num_leaves': 63, 'learning_rate': 0.04977382165029644, 'min_data_in_leaf': 940, 'feature_fraction': 0.26643228664846436, 'bagging_fraction': 0.18139843293304891}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[363]	cv_agg's valid gan_eval: 6.60694e+08 + 3.67294e+06


[I 2024-11-17 22:05:51,192] Trial 54 finished with value: 3303468000.0 and parameters: {'num_leaves': 75, 'learning_rate': 0.024266914428969083, 'min_data_in_leaf': 1118, 'feature_fraction': 0.3070061429061986, 'bagging_fraction': 0.3229869824644106}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[743]	cv_agg's valid gan_eval: 6.61933e+08 + 3.53853e+06


[I 2024-11-17 22:11:23,066] Trial 55 finished with value: 3309663000.0 and parameters: {'num_leaves': 139, 'learning_rate': 0.010001781156379175, 'min_data_in_leaf': 1253, 'feature_fraction': 0.23546802359638982, 'bagging_fraction': 0.8178660920488428}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[769]	cv_agg's valid gan_eval: 6.62334e+08 + 3.81307e+06


[I 2024-11-17 22:15:01,646] Trial 56 finished with value: 3311672000.0 and parameters: {'num_leaves': 19, 'learning_rate': 0.038579446975219506, 'min_data_in_leaf': 1560, 'feature_fraction': 0.133135435740866, 'bagging_fraction': 0.4030662513363761}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[622]	cv_agg's valid gan_eval: 6.62367e+08 + 2.53472e+06


[I 2024-11-17 22:19:16,374] Trial 57 finished with value: 3311833000.0 and parameters: {'num_leaves': 110, 'learning_rate': 0.020738833014577955, 'min_data_in_leaf': 832, 'feature_fraction': 0.2752594593841462, 'bagging_fraction': 0.288824079666688}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[573]	cv_agg's valid gan_eval: 6.62987e+08 + 3.01328e+06


[I 2024-11-17 22:23:47,494] Trial 58 finished with value: 3314934000.0 and parameters: {'num_leaves': 129, 'learning_rate': 0.016382727523515456, 'min_data_in_leaf': 1016, 'feature_fraction': 0.3228478110566505, 'bagging_fraction': 0.7849632209855284}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[548]	cv_agg's valid gan_eval: 6.62528e+08 + 3.00793e+06


[I 2024-11-17 22:27:19,091] Trial 59 finished with value: 3312638000.0 and parameters: {'num_leaves': 146, 'learning_rate': 0.019006991209892223, 'min_data_in_leaf': 1712, 'feature_fraction': 0.18522037124081264, 'bagging_fraction': 0.9556150934456563}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	cv_agg's valid gan_eval: 6.60755e+08 + 2.65653e+06


[I 2024-11-17 22:33:27,347] Trial 60 finished with value: 3303776000.0 and parameters: {'num_leaves': 34, 'learning_rate': 0.01247048548071289, 'min_data_in_leaf': 1430, 'feature_fraction': 0.4082338801434655, 'bagging_fraction': 0.705583925436384}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[566]	cv_agg's valid gan_eval: 6.62575e+08 + 3.01577e+06


[I 2024-11-17 22:38:24,677] Trial 61 finished with value: 3312876000.0 and parameters: {'num_leaves': 128, 'learning_rate': 0.016021163064230702, 'min_data_in_leaf': 1022, 'feature_fraction': 0.3412728037973264, 'bagging_fraction': 0.7877518191498535}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[666]	cv_agg's valid gan_eval: 6.62459e+08 + 2.14374e+06


[I 2024-11-17 22:43:47,979] Trial 62 finished with value: 3312295000.0 and parameters: {'num_leaves': 119, 'learning_rate': 0.014086887005598653, 'min_data_in_leaf': 1334, 'feature_fraction': 0.31624079198313787, 'bagging_fraction': 0.8996084776440909}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[728]	cv_agg's valid gan_eval: 6.63557e+08 + 2.58723e+06


[I 2024-11-17 22:48:36,442] Trial 63 finished with value: 3317783000.0 and parameters: {'num_leaves': 137, 'learning_rate': 0.01675328274582527, 'min_data_in_leaf': 1224, 'feature_fraction': 0.2304612634606038, 'bagging_fraction': 0.847603913567244}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[564]	cv_agg's valid gan_eval: 6.62701e+08 + 2.83341e+06


[I 2024-11-17 22:52:23,233] Trial 64 finished with value: 3313506000.0 and parameters: {'num_leaves': 136, 'learning_rate': 0.02264862239092839, 'min_data_in_leaf': 1207, 'feature_fraction': 0.23928652615868096, 'bagging_fraction': 0.8506266669505539}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[990]	cv_agg's valid gan_eval: 6.59666e+08 + 4.17223e+06


[I 2024-11-17 22:57:35,103] Trial 65 finished with value: 3298330000.0 and parameters: {'num_leaves': 50, 'learning_rate': 0.009116004040564962, 'min_data_in_leaf': 1339, 'feature_fraction': 0.21836651352228525, 'bagging_fraction': 0.8907710051023814}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[663]	cv_agg's valid gan_eval: 6.62603e+08 + 3.20635e+06


[I 2024-11-17 23:02:20,098] Trial 66 finished with value: 3313016000.0 and parameters: {'num_leaves': 143, 'learning_rate': 0.013570575837597961, 'min_data_in_leaf': 1491, 'feature_fraction': 0.2790032275620491, 'bagging_fraction': 0.6565410130372563}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[891]	cv_agg's valid gan_eval: 6.61272e+08 + 2.4256e+06


[I 2024-11-17 23:06:41,977] Trial 67 finished with value: 3306359000.0 and parameters: {'num_leaves': 84, 'learning_rate': 0.019066438750007037, 'min_data_in_leaf': 1136, 'feature_fraction': 0.11263267853730743, 'bagging_fraction': 0.9594988787714078}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[668]	cv_agg's valid gan_eval: 6.62054e+08 + 3.32455e+06


[I 2024-11-17 23:10:22,524] Trial 68 finished with value: 3310272000.0 and parameters: {'num_leaves': 60, 'learning_rate': 0.015232422979913396, 'min_data_in_leaf': 1218, 'feature_fraction': 0.17899676103833173, 'bagging_fraction': 0.7452579103120249}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[992]	cv_agg's valid gan_eval: 6.62649e+08 + 3.1846e+06


[I 2024-11-17 23:16:50,110] Trial 69 finished with value: 3313247000.0 and parameters: {'num_leaves': 99, 'learning_rate': 0.010818590068203885, 'min_data_in_leaf': 1055, 'feature_fraction': 0.36286993551838453, 'bagging_fraction': 0.24057557038388705}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[617]	cv_agg's valid gan_eval: 6.62915e+08 + 2.7429e+06


[I 2024-11-17 23:19:54,263] Trial 70 finished with value: 3314577000.0 and parameters: {'num_leaves': 136, 'learning_rate': 0.017456319060607975, 'min_data_in_leaf': 1299, 'feature_fraction': 0.2576225906468755, 'bagging_fraction': 0.5881534351965878}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[502]	cv_agg's valid gan_eval: 6.61748e+08 + 3.28717e+06


[I 2024-11-17 23:22:01,784] Trial 71 finished with value: 3308739000.0 and parameters: {'num_leaves': 129, 'learning_rate': 0.01733114295288905, 'min_data_in_leaf': 977, 'feature_fraction': 0.15204767185357418, 'bagging_fraction': 0.8368913881328216}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[367]	cv_agg's valid gan_eval: 6.60117e+08 + 3.25884e+06


[I 2024-11-17 23:25:27,816] Trial 72 finished with value: 3300584000.0 and parameters: {'num_leaves': 122, 'learning_rate': 0.02053071771006495, 'min_data_in_leaf': 889, 'feature_fraction': 0.7957077787926498, 'bagging_fraction': 0.7807930290880594}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[660]	cv_agg's valid gan_eval: 6.62168e+08 + 2.66417e+06


[I 2024-11-17 23:29:25,187] Trial 73 finished with value: 3310839000.0 and parameters: {'num_leaves': 111, 'learning_rate': 0.015644553927449373, 'min_data_in_leaf': 1135, 'feature_fraction': 0.43153410058904185, 'bagging_fraction': 0.6885430048187857}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[527]	cv_agg's valid gan_eval: 6.61136e+08 + 3.10916e+06


[I 2024-11-17 23:32:17,574] Trial 74 finished with value: 3305680000.0 and parameters: {'num_leaves': 144, 'learning_rate': 0.01654192781716732, 'min_data_in_leaf': 223, 'feature_fraction': 0.32087783144503873, 'bagging_fraction': 0.8188833476311934}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[616]	cv_agg's valid gan_eval: 6.61422e+08 + 3.89972e+06


[I 2024-11-17 23:35:52,404] Trial 75 finished with value: 3307108000.0 and parameters: {'num_leaves': 69, 'learning_rate': 0.014310149418721625, 'min_data_in_leaf': 1055, 'feature_fraction': 0.478773115482745, 'bagging_fraction': 0.10045811686041184}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[612]	cv_agg's valid gan_eval: 6.62203e+08 + 2.36481e+06


[I 2024-11-17 23:40:04,960] Trial 76 finished with value: 3311014000.0 and parameters: {'num_leaves': 134, 'learning_rate': 0.01880885646649824, 'min_data_in_leaf': 1190, 'feature_fraction': 0.531729091525412, 'bagging_fraction': 0.9212914007526235}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[468]	cv_agg's valid gan_eval: 6.62298e+08 + 2.72099e+06


[I 2024-11-17 23:42:35,759] Trial 77 finished with value: 3311490000.0 and parameters: {'num_leaves': 124, 'learning_rate': 0.027917292150901517, 'min_data_in_leaf': 969, 'feature_fraction': 0.29142765733921966, 'bagging_fraction': 0.7412946960559931}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[789]	cv_agg's valid gan_eval: 6.62745e+08 + 3.44957e+06


[I 2024-11-17 23:46:02,377] Trial 78 finished with value: 3313723000.0 and parameters: {'num_leaves': 114, 'learning_rate': 0.013009372571260931, 'min_data_in_leaf': 774, 'feature_fraction': 0.2076785893060686, 'bagging_fraction': 0.627562215644116}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[406]	cv_agg's valid gan_eval: 6.61942e+08 + 4.05844e+06


[I 2024-11-17 23:48:28,346] Trial 79 finished with value: 3309712000.0 and parameters: {'num_leaves': 118, 'learning_rate': 0.02202934179431966, 'min_data_in_leaf': 901, 'feature_fraction': 0.35599817603683476, 'bagging_fraction': 0.7724189921408149}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[630]	cv_agg's valid gan_eval: 6.62694e+08 + 2.80735e+06


[I 2024-11-17 23:51:54,528] Trial 80 finished with value: 3313471000.0 and parameters: {'num_leaves': 78, 'learning_rate': 0.016713083506579313, 'min_data_in_leaf': 1447, 'feature_fraction': 0.3898431499481424, 'bagging_fraction': 0.8189341257748546}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[456]	cv_agg's valid gan_eval: 6.62019e+08 + 3.44411e+06


[I 2024-11-17 23:54:44,386] Trial 81 finished with value: 3310097000.0 and parameters: {'num_leaves': 131, 'learning_rate': 0.020488155711382856, 'min_data_in_leaf': 1237, 'feature_fraction': 0.3895822954418968, 'bagging_fraction': 0.8682151648797846}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[579]	cv_agg's valid gan_eval: 6.62774e+08 + 2.09076e+06


[I 2024-11-17 23:58:33,831] Trial 82 finished with value: 3313870000.0 and parameters: {'num_leaves': 121, 'learning_rate': 0.02110697118868032, 'min_data_in_leaf': 1360, 'feature_fraction': 0.45861617119614534, 'bagging_fraction': 0.5035713774526729}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[681]	cv_agg's valid gan_eval: 6.61548e+08 + 2.80381e+06


[I 2024-11-18 00:02:42,370] Trial 83 finished with value: 3307738000.0 and parameters: {'num_leaves': 139, 'learning_rate': 0.01200189593809619, 'min_data_in_leaf': 1164, 'feature_fraction': 0.4076663974303643, 'bagging_fraction': 0.4758341818386806}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[718]	cv_agg's valid gan_eval: 6.62729e+08 + 3.48852e+06


[I 2024-11-18 00:06:26,572] Trial 84 finished with value: 3313646000.0 and parameters: {'num_leaves': 127, 'learning_rate': 0.014866592953980731, 'min_data_in_leaf': 1266, 'feature_fraction': 0.2990424388328155, 'bagging_fraction': 0.5322317372952567}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[409]	cv_agg's valid gan_eval: 6.61395e+08 + 3.15534e+06


[I 2024-11-18 00:08:47,139] Trial 85 finished with value: 3306975000.0 and parameters: {'num_leaves': 108, 'learning_rate': 0.02328009707782572, 'min_data_in_leaf': 1089, 'feature_fraction': 0.3356350451760701, 'bagging_fraction': 0.18159915330588294}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[565]	cv_agg's valid gan_eval: 6.62652e+08 + 3.0593e+06


[I 2024-11-18 00:12:11,300] Trial 86 finished with value: 3313261000.0 and parameters: {'num_leaves': 147, 'learning_rate': 0.018162157705123895, 'min_data_in_leaf': 1289, 'feature_fraction': 0.37550950343710593, 'bagging_fraction': 0.5680775143449582}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[653]	cv_agg's valid gan_eval: 6.60325e+08 + 3.08377e+06


[I 2024-11-18 00:15:32,773] Trial 87 finished with value: 3301627000.0 and parameters: {'num_leaves': 41, 'learning_rate': 0.019628188978755384, 'min_data_in_leaf': 1524, 'feature_fraction': 0.5910111328608005, 'bagging_fraction': 0.9352162752750113}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[619]	cv_agg's valid gan_eval: 6.62894e+08 + 2.73321e+06


[I 2024-11-18 00:18:27,446] Trial 88 finished with value: 3314472000.0 and parameters: {'num_leaves': 92, 'learning_rate': 0.0259870971726303, 'min_data_in_leaf': 1391, 'feature_fraction': 0.25651063241626526, 'bagging_fraction': 0.8969155703656777}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[644]	cv_agg's valid gan_eval: 6.63641e+08 + 3.22268e+06


[I 2024-11-18 00:21:30,865] Trial 89 finished with value: 3318203000.0 and parameters: {'num_leaves': 142, 'learning_rate': 0.018441429461149888, 'min_data_in_leaf': 1020, 'feature_fraction': 0.22069884466338813, 'bagging_fraction': 0.37808461817475986}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[477]	cv_agg's valid gan_eval: 6.62246e+08 + 2.62052e+06


[I 2024-11-18 00:23:47,927] Trial 90 finished with value: 3311231000.0 and parameters: {'num_leaves': 140, 'learning_rate': 0.01697334389530437, 'min_data_in_leaf': 868, 'feature_fraction': 0.20847610187303525, 'bagging_fraction': 0.28635938185067705}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[613]	cv_agg's valid gan_eval: 6.62214e+08 + 3.2075e+06


[I 2024-11-18 00:26:22,267] Trial 91 finished with value: 3311070000.0 and parameters: {'num_leaves': 150, 'learning_rate': 0.015777170578281853, 'min_data_in_leaf': 1017, 'feature_fraction': 0.14698772194633344, 'bagging_fraction': 0.4345162783532601}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[748]	cv_agg's valid gan_eval: 6.63382e+08 + 2.79348e+06


[I 2024-11-18 00:29:53,779] Trial 92 finished with value: 3316908000.0 and parameters: {'num_leaves': 142, 'learning_rate': 0.018376330414880886, 'min_data_in_leaf': 1109, 'feature_fraction': 0.2260784107296045, 'bagging_fraction': 0.36536498450040183}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[581]	cv_agg's valid gan_eval: 6.62733e+08 + 3.83799e+06


[I 2024-11-18 00:32:30,696] Trial 93 finished with value: 3313667000.0 and parameters: {'num_leaves': 143, 'learning_rate': 0.018665916493204888, 'min_data_in_leaf': 973, 'feature_fraction': 0.18446878651114448, 'bagging_fraction': 0.20442857752859844}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[730]	cv_agg's valid gan_eval: 6.62892e+08 + 3.09273e+06


[I 2024-11-18 00:36:00,865] Trial 94 finished with value: 3314458000.0 and parameters: {'num_leaves': 137, 'learning_rate': 0.014159472838249712, 'min_data_in_leaf': 1062, 'feature_fraction': 0.23747709293296992, 'bagging_fraction': 0.3588486208182783}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[488]	cv_agg's valid gan_eval: 6.62665e+08 + 3.66076e+06


[I 2024-11-18 00:38:24,030] Trial 95 finished with value: 3313324000.0 and parameters: {'num_leaves': 132, 'learning_rate': 0.019737718068441802, 'min_data_in_leaf': 1161, 'feature_fraction': 0.2218614106539067, 'bagging_fraction': 0.3265314374982285}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[550]	cv_agg's valid gan_eval: 6.6197e+08 + 3.52245e+06


[I 2024-11-18 00:41:16,973] Trial 96 finished with value: 3309852000.0 and parameters: {'num_leaves': 147, 'learning_rate': 0.016831074342465925, 'min_data_in_leaf': 1101, 'feature_fraction': 0.26797280864418216, 'bagging_fraction': 0.27158168614185324}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[881]	cv_agg's valid gan_eval: 6.62613e+08 + 2.82314e+06


[I 2024-11-18 00:44:56,469] Trial 97 finished with value: 3313065000.0 and parameters: {'num_leaves': 143, 'learning_rate': 0.013273659401895083, 'min_data_in_leaf': 939, 'feature_fraction': 0.1701928153524901, 'bagging_fraction': 0.3863542720955415}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[525]	cv_agg's valid gan_eval: 6.61973e+08 + 3.31315e+06


[I 2024-11-18 00:50:01,896] Trial 98 finished with value: 3309866000.0 and parameters: {'num_leaves': 134, 'learning_rate': 0.017891131466557995, 'min_data_in_leaf': 1327, 'feature_fraction': 0.6668838533905063, 'bagging_fraction': 0.25934076625067126}. Best is trial 22 with value: 3319092000.0.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[888]	cv_agg's valid gan_eval: 6.60765e+08 + 2.17508e+06


[I 2024-11-18 00:53:09,380] Trial 99 finished with value: 3303825000.0 and parameters: {'num_leaves': 141, 'learning_rate': 0.015269812774196912, 'min_data_in_leaf': 1020, 'feature_fraction': 0.10024518118924586, 'bagging_fraction': 0.9734649133265478}. Best is trial 22 with value: 3319092000.0.


Analizamos los resultados as usual

In [11]:
optuna.visualization.plot_optimization_history(study)

In [12]:
plot_param_importances(study)

El **learning rate** es un parámetro que tiene que ir acompañado por más árboles.

In [13]:
plot_slice(study)

In [14]:
plot_contour(study)

In [15]:
plot_contour(study, params=['num_leaves','min_data_in_leaf'])

In [16]:
study.best_trial.params

{'num_leaves': 95,
 'learning_rate': 0.014790793124814124,
 'min_data_in_leaf': 1162,
 'feature_fraction': 0.32039319093779284,
 'bagging_fraction': 0.7236519946486292}

In [17]:
best_iter = study.best_trial.user_attrs["best_iter"]
best_iter

871