# LightGBM optuna optimization

# Load data

In [1]:
import pandas as pd
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.shape

(100000, 52)

In [2]:
x_train = train.drop(columns=['id','target'])
x_test = test.drop(columns=['id'])

In [11]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(train['target'].unique())
le.classes_

array(['Class_1', 'Class_2', 'Class_3', 'Class_4'], dtype=object)

In [12]:
y_train = le.transform(train['target'])
y_train

array([1, 0, 0, ..., 2, 1, 2])

# Optuna optimization

In [21]:
import optuna
from lightgbm import Dataset
import lightgbm
def objective(trial):
    params = {'min_child_samples':trial.suggest_int("min_child_samples", 1, 5000, log=True),
              'max_depth':trial.suggest_int("max_depth", 1, 50),
              'reg_lambda': trial.suggest_float("reg_lambda", 0.0001, 25, log=True),
              'reg_alpha': trial.suggest_float("reg_alpha", 0.0001, 25, log=True),
              'colsample_bytree': trial.suggest_float("colsample_bytree", 0.1, 1),
              'num_leaves': trial.suggest_int("num_leaves", 5, 5000, log=True),
              'cat_smooth': trial.suggest_int("cat_smooth", 0, 100),
              'is_unbalance':trial.suggest_categorical('is_unbalance',[True,False]),
              'objective':'multiclass',
              'verbose':0,
              'force_row_wise':True,
              'num_class':4
             }
    n_estimators = 20000              
    train_set = Dataset(data=x_train, label=y_train)
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "multi_logloss")
    result = lightgbm.cv(params, train_set, metrics='multi_logloss', num_boost_round=n_estimators,
                         early_stopping_rounds=200, return_cvbooster=True, verbose_eval=0, 
                         callbacks=[pruning_callback])
    print(f"best iteration: {result['cvbooster'].best_iteration} of {n_estimators}")    
    return result['multi_logloss-mean'][-1]

In [None]:
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, timeout=60)
print(study.best_trial)

[32m[I 2021-05-04 23:04:35,512][0m A new study created in memory with name: no-name-78a4dee0-01a0-48e9-84b2-c909eb13bea8[0m


# Plot results

In [None]:
from optuna.visualization import plot_optimization_history, plot_param_importances
plot_optimization_history(study)

In [None]:
plot_param_importances(study)