# LightGBM - Parameter Tuning with Bayesian Optimization

In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pylab as plt

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV,learning_curve, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold

In [None]:
from lightgbm import LGBMClassifier
from bayes_opt import BayesianOptimization
import lightgbm as lgb

In [9]:
# train, test data set 불러오기
train = pd.read_csv("train.csv", index_col=0)
test = pd.read_csv("test.csv", index_col=0)
sample_submission = pd.read_csv("sample_submission.csv", index_col=0)

In [10]:
column_number = {}
for i, column in enumerate(sample_submission.columns):
    column_number[column] = i
    
def to_number(x, dic):
    return dic[x]

train['type_num'] = train['type'].apply(lambda x: to_number(x, column_number))

In [11]:
# oulier 제거
for column in train.columns[2:-1]:
    test_min_value = test[column].min()
    test_max_value = test[column].max()

    for column in train.columns[2:-1]:
        train = train.drop(index=train[(train[column] <= test_min_value -100) | (train[column] >= test_max_value +100)].index)

    train = train.reset_index(drop=True)
    
train.shape

(199880, 23)

In [12]:
# train set의 문제지(X), 정답지(y) 나누기
train_X = train.drop(columns=['type', 'type_num'], axis=1)
train_y = train['type_num']
test_x = test

In [13]:
train_X.shape

(199880, 21)

In [14]:
# validation 진행하기 위한 train_test_split
X_train, X_test, y_train, y_test = train_test_split(train_X, 
                                                    train_y, 
                                                    stratify = train_y,
                                                    test_size=0.2, 
                                                    random_state=1234)

In [14]:
# kfold = KFold(n_splits=6, random_state= 0, shuffle = True)
kfold_s = StratifiedKFold(n_splits=6, random_state= 0, shuffle = True)

# Bayesian Optimization

## 1. parameters to be tuned

In [18]:
def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth, lambda_l1, lambda_l2, min_split_gain, min_child_weight):

    params = {'application':'multiclass','num_iterations':1000, 'learning_rate':0.1, 'early_stopping_round':500, 'metric':'multi_logloss','num_class':19}
    params["num_leaves"] = round(num_leaves)
    params['feature_fraction'] = max(min(feature_fraction, 1), 0)
    params['bagging_fraction'] = max(min(bagging_fraction, 1), 0) 
    params['max_depth'] = round(max_depth)
    params['lambda_l1'] = max(lambda_l1, 0)
    params['lambda_l2'] = max(lambda_l2, 0)
    params['min_split_gain'] = min_split_gain
    params['min_child_weight'] = min_child_weight
    cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, stratified=True, verbose_eval =200, metrics=['multi_logloss'])
    return max(cv_result['multi_logloss'])

## 2. Set the range for each parameter

In [20]:
lgbBO = BayesianOptimization(lgb_eval, {'num_leaves': (100,300),
                                        'feature_fraction': (0.1,0.3), # 열 샘플링 / default값인 1.0보다 작으면 각각의 tree를 training 시키기 전에 지정한 값만큼의 feature만 선택
                                        'bagging_fraction': (0.1,0.3), # 행 샘플링 / resampling 없이 data를 random으로 선택한다
                                        'max_depth': (1,9),
                                        'lambda_l1': (0.1,0.3), # L1 정규화 / 보통 default값인 0으로 놔둠
                                        'lambda_l2': (0.1,0.3), # L2 정규화 / 보통 default값인 0으로 놔둠
                                        'min_split_gain': (0.1,0.3),
                                        'min_child_weight':(0.1,0.3) }, random_state=1234)

In [21]:
# type_num에 1씩 더해서 type_num을 0부터가 아닌 1부터 시작하도록 만들어줌
ytrain_temp = y_train + 1

In [22]:
# 이건 그냥 확인하려고 쓴 코드
len(np.unique(y_train))

19

## 3. Put all together

In [23]:
def bayes_parameter_opt_lgb(X, y, init_round=5, opt_round=20, n_folds=6, random_seed=1234, n_estimators=1000, learning_rate=0.1, output_process=False):
    # prepare data
    #train_data = lgb.Dataset(data=X, label=y, categorical_feature = categorical_feats, free_raw_data=False)
    
    # parameters
    train_data = lgb.Dataset(data=X, label=y,  free_raw_data=False)

    def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth, lambda_l1, lambda_l2, min_split_gain, min_child_weight):
      # params = {'application':'multiclass','num_iterations': n_estimators, 'learning_rate':learning_rate, 'early_stopping_round':100, 'metric':'multi_logloss'}
        params = {'application':'multiclass','num_iterations': n_estimators, 'learning_rate':learning_rate, 'early_stopping_round':500, 'metric':'multi_logloss','num_class':19}

        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['lambda_l1'] = max(lambda_l1, 0)
        params['lambda_l2'] = max(lambda_l2, 0)
        params['min_split_gain'] = min_split_gain
        params['min_child_weight'] = min_child_weight
        cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, stratified=True, verbose_eval =200, metrics=['multi_logloss'])
        #print(cv_result.keys())
        return max(cv_result['multi_logloss-mean'])
    
    # range 
    lgbBO = BayesianOptimization(lgb_eval, {'num_leaves': (100,300),
                                            'feature_fraction': (0.1,0.3),
                                            'bagging_fraction': (0.1,0.3),
                                            'max_depth': (1,9),
                                            'lambda_l1': (0.1,0.3),
                                            'lambda_l2': (0.1,0.3),
                                            'min_split_gain': (0.1,0.3),
                                            'min_child_weight':(0.1,0.3) }, random_state=1234)
       
    # optimize
    lgbBO.maximize(init_points=init_round, n_iter=opt_round)
    
    # output optimization process
    if output_process==True: lgbBO.points_to_csv("bayes_opt_result_200220_4.csv")
    
    # return best parameters
    return lgbBO.max["params"]

In [56]:
# opt_params.res

In [24]:
np.unique(ytrain_temp)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19], dtype=int64)

##  4. Best parameters 

In [24]:
# -----> input
opt_params = bayes_parameter_opt_lgb(X_train, y_train, 
                                     init_round=5, #5
                                     opt_round=20, # 10
                                     n_folds=6, # 3
                                     random_seed=1234, 
                                     n_estimators=1000,# 100 
                                     learning_rate=0.1) # 0.05

|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------------------------------




[200]	cv_agg's multi_logloss: 0.377191 + 0.00500139
[400]	cv_agg's multi_logloss: 0.372478 + 0.00491562
[600]	cv_agg's multi_logloss: 0.372464 + 0.00491044
[800]	cv_agg's multi_logloss: 0.372464 + 0.00491044
| [0m 1       [0m | [0m 1.896   [0m | [0m 0.1383  [0m | [0m 0.2244  [0m | [0m 0.1875  [0m | [0m 0.2571  [0m | [0m 7.24    [0m | [0m 0.1545  [0m | [0m 0.1553  [0m | [0m 260.4   [0m |




[200]	cv_agg's multi_logloss: 0.381695 + 0.00465627
[400]	cv_agg's multi_logloss: 0.373207 + 0.00456913
[600]	cv_agg's multi_logloss: 0.373227 + 0.00457699
[800]	cv_agg's multi_logloss: 0.373227 + 0.00457699
| [95m 2       [0m | [95m 1.911   [0m | [95m 0.2916  [0m | [95m 0.2752  [0m | [95m 0.1716  [0m | [95m 0.2002  [0m | [95m 6.468   [0m | [95m 0.2425  [0m | [95m 0.1741  [0m | [95m 212.2   [0m |




[200]	cv_agg's multi_logloss: 0.45907 + 0.00494848
[400]	cv_agg's multi_logloss: 0.414644 + 0.00477288
[600]	cv_agg's multi_logloss: 0.400412 + 0.00473539
[800]	cv_agg's multi_logloss: 0.39378 + 0.00461162
[1000]	cv_agg's multi_logloss: 0.390211 + 0.00440486
| [95m 3       [0m | [95m 2.083   [0m | [95m 0.2006  [0m | [95m 0.1028  [0m | [95m 0.2546  [0m | [95m 0.2765  [0m | [95m 3.919   [0m | [95m 0.2231  [0m | [95m 0.1151  [0m | [95m 173.8   [0m |




[200]	cv_agg's multi_logloss: 0.411872 + 0.0046344
[400]	cv_agg's multi_logloss: 0.386173 + 0.00425243
[600]	cv_agg's multi_logloss: 0.3814 + 0.00426102
[800]	cv_agg's multi_logloss: 0.381388 + 0.00425734
[1000]	cv_agg's multi_logloss: 0.381388 + 0.00425734
| [0m 4       [0m | [0m 1.955   [0m | [0m 0.2866  [0m | [0m 0.2303  [0m | [0m 0.1794  [0m | [0m 0.2577  [0m | [0m 3.535   [0m | [0m 0.2136  [0m | [0m 0.2738  [0m | [0m 187.2   [0m |




[200]	cv_agg's multi_logloss: 0.463444 + 0.00493416
[400]	cv_agg's multi_logloss: 0.416808 + 0.00476701
[600]	cv_agg's multi_logloss: 0.400381 + 0.00468051
[800]	cv_agg's multi_logloss: 0.392046 + 0.00445697
[1000]	cv_agg's multi_logloss: 0.387719 + 0.00448003
| [0m 5       [0m | [0m 2.074   [0m | [0m 0.2604  [0m | [0m 0.1288  [0m | [0m 0.2409  [0m | [0m 0.2409  [0m | [0m 2.75    [0m | [0m 0.285   [0m | [0m 0.1884  [0m | [0m 281.9   [0m |




[200]	cv_agg's multi_logloss: 0.377969 + 0.00403775
[400]	cv_agg's multi_logloss: 0.377831 + 0.00410549
[600]	cv_agg's multi_logloss: 0.377831 + 0.00410549
[800]	cv_agg's multi_logloss: 0.377831 + 0.00410549
| [0m 6       [0m | [0m 1.881   [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 9.0     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 100.0   [0m |




[200]	cv_agg's multi_logloss: 0.375304 + 0.0046255
[400]	cv_agg's multi_logloss: 0.376602 + 0.00478203
[600]	cv_agg's multi_logloss: 0.376602 + 0.00478203
| [0m 7       [0m | [0m 1.877   [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 9.0     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 300.0   [0m |




[200]	cv_agg's multi_logloss: 0.691959 + 0.0049648
[400]	cv_agg's multi_logloss: 0.606014 + 0.00536044
[600]	cv_agg's multi_logloss: 0.569011 + 0.00558975
[800]	cv_agg's multi_logloss: 0.547235 + 0.0056657
[1000]	cv_agg's multi_logloss: 0.53267 + 0.00570799
| [95m 8       [0m | [95m 2.176   [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 0.1     [0m | [95m 1.0     [0m | [95m 0.1     [0m | [95m 0.3     [0m | [95m 135.7   [0m |




[200]	cv_agg's multi_logloss: 0.411782 + 0.00497744
[400]	cv_agg's multi_logloss: 0.400624 + 0.00515429
[600]	cv_agg's multi_logloss: 0.399805 + 0.00512315
[800]	cv_agg's multi_logloss: 0.399522 + 0.0051933
[1000]	cv_agg's multi_logloss: 0.399396 + 0.00520315
| [0m 9       [0m | [0m 2.04    [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 9.0     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 146.2   [0m |




[200]	cv_agg's multi_logloss: 0.691869 + 0.00494829
[400]	cv_agg's multi_logloss: 0.605883 + 0.00540499
[600]	cv_agg's multi_logloss: 0.568875 + 0.00564177
[800]	cv_agg's multi_logloss: 0.547092 + 0.0057085
[1000]	cv_agg's multi_logloss: 0.532513 + 0.00575086
| [95m 10      [0m | [95m 2.176   [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 0.1     [0m | [95m 0.1     [0m | [95m 1.0     [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 155.2   [0m |




[200]	cv_agg's multi_logloss: 0.722523 + 0.00455074
[400]	cv_agg's multi_logloss: 0.623758 + 0.00535157
[600]	cv_agg's multi_logloss: 0.582045 + 0.00577853
[800]	cv_agg's multi_logloss: 0.557886 + 0.00598489
[1000]	cv_agg's multi_logloss: 0.541993 + 0.00608852
| [95m 11      [0m | [95m 2.198   [0m | [95m 0.1     [0m | [95m 0.1     [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 1.0     [0m | [95m 0.3     [0m | [95m 0.1     [0m | [95m 236.4   [0m |




[200]	cv_agg's multi_logloss: 0.722527 + 0.00454635
[400]	cv_agg's multi_logloss: 0.623759 + 0.00534683
[600]	cv_agg's multi_logloss: 0.58203 + 0.00578055
[800]	cv_agg's multi_logloss: 0.557834 + 0.00595691
[1000]	cv_agg's multi_logloss: 0.541932 + 0.00604964
| [0m 12      [0m | [0m 2.198   [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 300.0   [0m |




[200]	cv_agg's multi_logloss: 0.722406 + 0.00453805
[400]	cv_agg's multi_logloss: 0.623563 + 0.00534662
[600]	cv_agg's multi_logloss: 0.581864 + 0.00573528
[800]	cv_agg's multi_logloss: 0.557684 + 0.00596652
[1000]	cv_agg's multi_logloss: 0.541802 + 0.00606256
| [0m 13      [0m | [0m 2.198   [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 117.3   [0m |




[200]	cv_agg's multi_logloss: 0.691873 + 0.00495742
[400]	cv_agg's multi_logloss: 0.60587 + 0.0053984
[600]	cv_agg's multi_logloss: 0.568861 + 0.00564052
[800]	cv_agg's multi_logloss: 0.547084 + 0.00571426
[1000]	cv_agg's multi_logloss: 0.532505 + 0.00574319
| [0m 14      [0m | [0m 2.176   [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 250.5   [0m |




[200]	cv_agg's multi_logloss: 0.722531 + 0.00454525
[400]	cv_agg's multi_logloss: 0.623752 + 0.00536607
[600]	cv_agg's multi_logloss: 0.582048 + 0.0057935
[800]	cv_agg's multi_logloss: 0.55787 + 0.00598812
[1000]	cv_agg's multi_logloss: 0.541955 + 0.00608118
| [95m 15      [0m | [95m 2.198   [0m | [95m 0.1     [0m | [95m 0.1     [0m | [95m 0.3     [0m | [95m 0.3     [0m | [95m 1.0     [0m | [95m 0.1     [0m | [95m 0.1     [0m | [95m 100.0   [0m |




[200]	cv_agg's multi_logloss: 0.691959 + 0.0049648
[400]	cv_agg's multi_logloss: 0.606014 + 0.00536044
[600]	cv_agg's multi_logloss: 0.569011 + 0.00558975
[800]	cv_agg's multi_logloss: 0.547235 + 0.0056657
[1000]	cv_agg's multi_logloss: 0.53267 + 0.00570799
| [0m 16      [0m | [0m 2.176   [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 108.0   [0m |




[200]	cv_agg's multi_logloss: 0.691873 + 0.00495742
[400]	cv_agg's multi_logloss: 0.60587 + 0.0053984
[600]	cv_agg's multi_logloss: 0.568861 + 0.00564052
[800]	cv_agg's multi_logloss: 0.547084 + 0.00571426
[1000]	cv_agg's multi_logloss: 0.532505 + 0.00574319
| [0m 17      [0m | [0m 2.176   [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 293.8   [0m |




[200]	cv_agg's multi_logloss: 0.691873 + 0.00495742
[400]	cv_agg's multi_logloss: 0.60587 + 0.0053984
[600]	cv_agg's multi_logloss: 0.568861 + 0.00564052
[800]	cv_agg's multi_logloss: 0.547084 + 0.00571426
[1000]	cv_agg's multi_logloss: 0.532505 + 0.00574319
| [0m 18      [0m | [0m 2.176   [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 227.4   [0m |




[200]	cv_agg's multi_logloss: 0.722531 + 0.00454525
[400]	cv_agg's multi_logloss: 0.623752 + 0.00536607
[600]	cv_agg's multi_logloss: 0.582048 + 0.0057935
[800]	cv_agg's multi_logloss: 0.55787 + 0.00598812
[1000]	cv_agg's multi_logloss: 0.541955 + 0.00608118
| [0m 19      [0m | [0m 2.198   [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 125.2   [0m |




[200]	cv_agg's multi_logloss: 0.691873 + 0.00495742
[400]	cv_agg's multi_logloss: 0.60587 + 0.0053984
[600]	cv_agg's multi_logloss: 0.568861 + 0.00564052
[800]	cv_agg's multi_logloss: 0.547084 + 0.00571426
[1000]	cv_agg's multi_logloss: 0.532505 + 0.00574319
| [0m 20      [0m | [0m 2.176   [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 239.4   [0m |




[200]	cv_agg's multi_logloss: 0.722523 + 0.00455074
[400]	cv_agg's multi_logloss: 0.623758 + 0.00535157
[600]	cv_agg's multi_logloss: 0.582045 + 0.00577853
[800]	cv_agg's multi_logloss: 0.557886 + 0.00598489
[1000]	cv_agg's multi_logloss: 0.541993 + 0.00608852
| [0m 21      [0m | [0m 2.198   [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 264.9   [0m |




[200]	cv_agg's multi_logloss: 0.722425 + 0.00452089
[400]	cv_agg's multi_logloss: 0.623572 + 0.00534407
[600]	cv_agg's multi_logloss: 0.581876 + 0.00574347
[800]	cv_agg's multi_logloss: 0.557697 + 0.00597172
[1000]	cv_agg's multi_logloss: 0.541794 + 0.00608554
| [0m 22      [0m | [0m 2.198   [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 300.0   [0m |




[200]	cv_agg's multi_logloss: 0.722516 + 0.00455415
[400]	cv_agg's multi_logloss: 0.623732 + 0.0053525
[600]	cv_agg's multi_logloss: 0.582016 + 0.00577416
[800]	cv_agg's multi_logloss: 0.557829 + 0.00597671
[1000]	cv_agg's multi_logloss: 0.54195 + 0.00605628
| [0m 23      [0m | [0m 2.198   [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.2876  [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 166.6   [0m |




[200]	cv_agg's multi_logloss: 0.722599 + 0.00459311
[400]	cv_agg's multi_logloss: 0.623688 + 0.00539957
[600]	cv_agg's multi_logloss: 0.581946 + 0.00579344
[800]	cv_agg's multi_logloss: 0.557795 + 0.00598482
[1000]	cv_agg's multi_logloss: 0.541895 + 0.00607738
| [0m 24      [0m | [0m 2.198   [0m | [0m 0.1038  [0m | [0m 0.1109  [0m | [0m 0.2856  [0m | [0m 0.1131  [0m | [0m 1.009   [0m | [0m 0.2976  [0m | [0m 0.2536  [0m | [0m 258.3   [0m |




[200]	cv_agg's multi_logloss: 0.691816 + 0.00505177
[400]	cv_agg's multi_logloss: 0.605867 + 0.00549233
[600]	cv_agg's multi_logloss: 0.568905 + 0.00570778
[800]	cv_agg's multi_logloss: 0.547165 + 0.00575307
[1000]	cv_agg's multi_logloss: 0.532575 + 0.00578148
| [0m 25      [0m | [0m 2.175   [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 0.1     [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 0.3     [0m | [0m 0.3     [0m | [0m 121.3   [0m |


In [25]:
print(opt_params)

{'bagging_fraction': 0.1, 'feature_fraction': 0.1, 'lambda_l1': 0.3, 'lambda_l2': 0.3, 'max_depth': 1.0, 'min_child_weight': 0.1, 'min_split_gain': 0.1, 'num_leaves': 100.0}
