In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour
from time import time
import pickle
import os
from dotenv import load_dotenv
import urllib


In [2]:
storage_name = "mysql+mysqldb://{u}:{p}@{ip}:3306/optuna_rf_db".format(p=urllib.parse.quote_plus(os.getenv("password")), u = os.getenv("usersrv"), ip = os.getenv("ip"))


In [10]:
optuna.get_all_study_names(storage_name)

['exp_206_random-forest-opt',
 'exp_206_random-forest-opt-20240905',
 'rf-segmentacion',
 'rf-segmentacion-0',
 'rf-segmentacion-1',
 'rf-segmentacion-2',
 'rf-segmentacion-3',
 'rf-segmentacion-4',
 'rf-segmentacion-5',
 'rf-segmentacion-6',
 'rf-segmentacion-7',
 'rf-segmentacion-8',
 'rf-segmentacion-9',
 'rf-segmentacion-solobajas',
 'rf-segmentacion-0-solobajas',
 'rf-segmentacion-1-solobajas',
 'lgbm_binaria_t1',
 'lgbm_binaria_t2',
 'lgbm_binaria_t3',
 'lgbm_binaria_t4',
 'lgbm_binaria_t5',
 'experimento_sampling_100perc',
 'experimento_sampling_10perc',
 'experimento_sampling_30perc',
 'experimento_linea_base',
 'experimento_sampling_10percV2',
 'experimento_sampling_10perc_v2',
 'experimento_sampling_20perc']

In [31]:
st = optuna.load_study(study_name='experimento_sampling_20perc', storage=storage_name)

In [32]:
st.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_drop_rate,params_feature_fraction,params_feature_fraction_bynode,params_learning_rate,params_min_data_in_leaf,params_num_leaves,user_attrs_best_iter,user_attrs_seed,user_attrs_train_months,state
0,0,153906000.0,2024-10-25 18:29:46,2024-10-25 18:31:18,0 days 00:01:32,0.176057,0.103785,0.472251,0.317285,3541,9065,408.0,886969.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
1,1,156327000.0,2024-10-25 18:31:31,2024-10-25 18:34:53,0 days 00:03:22,0.282483,0.474016,0.471419,0.27421,3635,1080,717.0,780588.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
2,2,156518000.0,2024-10-25 18:35:05,2024-10-25 18:41:02,0 days 00:05:57,0.219578,0.875372,0.624298,0.078614,3668,25,1227.0,792991.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
3,3,152994000.0,2024-10-25 18:41:14,2024-10-25 18:42:37,0 days 00:01:23,0.094849,0.10501,0.169194,0.376781,2367,1793,401.0,780588.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
4,4,158588000.0,2024-10-25 18:42:49,2024-10-25 18:47:19,0 days 00:04:30,0.152709,0.310613,0.052457,0.120109,1825,569,1009.0,792992.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
5,5,156202000.0,2024-10-25 18:47:35,2024-10-25 18:49:59,0 days 00:02:24,0.1908,0.357383,0.113674,0.175884,3757,7122,562.0,792991.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
6,6,151465000.0,2024-10-25 18:50:11,2024-10-25 18:51:11,0 days 00:01:00,0.27746,0.014868,0.427206,0.229524,1838,2245,400.0,208961.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
7,7,159720000.0,2024-10-25 18:51:23,2024-10-25 18:56:26,0 days 00:05:03,0.194884,0.070671,0.389144,0.262557,66,3711,525.0,792992.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
8,8,158369000.0,2024-10-25 18:56:37,2024-10-25 19:00:48,0 days 00:04:11,0.008609,0.566347,0.70095,0.201686,2298,2522,626.0,208961.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE
9,9,157984000.0,2024-10-25 19:01:00,2024-10-25 19:05:13,0 days 00:04:13,0.053703,0.608195,0.12077,0.250289,2206,2238,579.0,780587.0,"[202012, 202012, 202101, 202102, 202103, 20210...",COMPLETE


In [33]:
st.best_trial

FrozenTrial(number=7, state=1, values=[159720000.0], datetime_start=datetime.datetime(2024, 10, 25, 18, 51, 23), datetime_complete=datetime.datetime(2024, 10, 25, 18, 56, 26), params={'num_leaves': 3711, 'learning_rate': 0.26255701042195245, 'min_data_in_leaf': 66, 'feature_fraction': 0.07067057523741806, 'feature_fraction_bynode': 0.3891435585769663, 'drop_rate': 0.19488391990811987}, user_attrs={'best_iter': 525, 'seed': 792992, 'train_months': [202012, 202012, 202101, 202102, 202103, 202104, 202105]}, system_attrs={}, intermediate_values={}, distributions={'num_leaves': IntDistribution(high=10000, log=False, low=20, step=1), 'learning_rate': FloatDistribution(high=0.4, log=False, low=0.005, step=None), 'min_data_in_leaf': IntDistribution(high=4000, log=False, low=1, step=1), 'feature_fraction': FloatDistribution(high=0.9, log=False, low=0.005, step=None), 'feature_fraction_bynode': FloatDistribution(high=0.9, log=False, low=0.05, step=None), 'drop_rate': FloatDistribution(high=0.3, 

In [12]:
optuna.importance.get_param_importances(st)

{'learning_rate': 0.43203325382431934,
 'min_data_in_leaf': 0.2523585975269244,
 'feature_fraction': 0.12242100257306737,
 'drop_rate': 0.1191064771937908,
 'feature_fraction_bynode': 0.03830479637663614,
 'num_leaves': 0.03577587250526182}