# Lightgbm

In [None]:
!pip install bayesian-optimization

In [1]:
import pandas as pd
import numpy as np
import scipy as sp

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVC, SVR
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from bayes_opt import BayesianOptimization  # bayesian-optimization

from sklearn.metrics import mean_squared_error, accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Hyperparameter Tuning

In [None]:
data = pd.read_pickle('./data/boston.pickle')
boston = pd.DataFrame(data.get('data'), columns=data.get('feature_names'))
boston['target'] = data.get('target')
boston

## Manual Search

- 직접 파라미터를 설정하는 직접 탐색 방법  
- 전체 범위를 보는 것이 아니기에 현재까지의 최적의 파라미터가 최적인지는 보장하지 못 함  
- 여러 종류의 파라미터를 동시에 탐색하기에는 한계

In [None]:
lasso = Lasso(alpha=0.5).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
print(lasso.score(boston.iloc[:, :-1], boston.iloc[:, -1]))

lasso = Lasso(alpha=2).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
print(lasso.score(boston.iloc[:, :-1], boston.iloc[:, -1]))

lasso = Lasso(alpha=10).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
print(lasso.score(boston.iloc[:, :-1], boston.iloc[:, -1]))

## Greed Search

<img src="https://www.yourdatateacher.com/wp-content/uploads/2021/03/image-6.png" width="400" height="400"/>

- 탐색 구간 내 모든 hyperparameter 조합을 일정 구간으로 나누어 시도  
- 파라미터가 많아질수록 기하급수적으로 많은 시간 소요

<br>

- 사용 방법
> ```python
> from sklearn.model_selection import GridSearchCV
> parameters = {'parameter1': ('value1', 'value2', ...), 'parameter2':[value1, value2, ...]}
> grid_search = GridSearchCV(model, parameters)
> grid_search.fit(X, y)
> ```

In [None]:
alphas = np.arange(0, 10, 0.5)
for alpha in alphas:
    lasso = Lasso(alpha=alpha).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
    print(lasso.score(boston.iloc[:, :-1], boston.iloc[:, -1]))

0.7406426641094094
0.7140164719858566
0.6825842212709925
0.6465205841756452
0.6254704869379217
0.6101441800078167
0.5960362446051077
0.5798338383034314
0.5675870451576798
0.5651862996116723
0.562498162686504
0.5595264178763704
0.5562716136580538
0.5527356721850062
0.5489138528031983
0.5448090145223752
0.5404210869814117
0.535750001811063
0.5307994151577793
0.5264821110745237


  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [None]:
alphas = np.arange(0, 10, 0.5)
for alpha in alphas:
    ridge = Ridge(alpha=alpha).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
    print(ridge.score(boston.iloc[:, :-1], boston.iloc[:, -1]))

0.7406426641094095
0.739957023371629
0.7388703133867616
0.7378534090710318
0.7369782922801451
0.7362368934827073
0.7356069485394054
0.7350669874488442
0.7345992921539443
0.7341898739716026
0.7338278122211868
0.7335045713812018
0.7332134382184367
0.7329490886449568
0.7327072621634045
0.7324845185872456
0.7322780558411127
0.7320855726844828
0.731905164474163
0.7317352433355795


In [None]:
lasso = Lasso()

parameters = {
    'alpha': np.arange(0.1, 10, 0.5)
}

grid_search = GridSearchCV(lasso, parameters)
grid_search.fit(boston.iloc[:, :-1], boston.iloc[:, -1])

In [None]:
grid_search.cv_results_

{'mean_fit_time': array([0.00180659, 0.00146375, 0.00099921, 0.0010036 , 0.00120382,
        0.00098925, 0.00119386, 0.00106406, 0.0010035 , 0.00099916,
        0.00099998, 0.00160108, 0.00124545, 0.00102353, 0.0010159 ,
        0.00100017, 0.00100603, 0.00100846, 0.00100036, 0.00125022]),
 'std_fit_time': array([7.49772243e-04, 4.52216422e-04, 2.46852476e-06, 1.25577580e-05,
        3.97970686e-04, 1.32826326e-05, 4.03492114e-04, 1.29636142e-04,
        6.32595976e-06, 1.05718142e-05, 6.81059687e-07, 4.91129324e-04,
        3.76522752e-04, 4.11532845e-05, 2.61509427e-05, 1.10426673e-05,
        1.26865571e-05, 1.25222195e-05, 1.06564304e-05, 3.89735360e-04]),
 'mean_score_time': array([0.00084815, 0.00062027, 0.00097971, 0.00079985, 0.00040355,
        0.00060363, 0.        , 0.0008038 , 0.0005959 , 0.00040002,
        0.00039983, 0.00082607, 0.00075698, 0.00040317, 0.00080457,
        0.00060024, 0.00104566, 0.00111294, 0.0008029 , 0.00040016]),
 'std_score_time': array([4.37906322e-

In [None]:
index_for_best_score = grid_search.cv_results_.get('rank_test_score').argmin()

In [None]:
grid_search.cv_results_.get('params')[index_for_best_score]

{'alpha': 0.6}

## Random Search

<img src="https://www.yourdatateacher.com/wp-content/uploads/2021/03/image-7.png" width="400" height="400"/>

- 탐색 대상 구간 내의 hyperparameter를 랜덤 샘플링  
- Grid Search 대비 반복 횟수를 줄이는 동시에 확률적 탐색으로 **최적에 근사한 parameter를  빨리 찾을 수 있는 것으로 알려짐**  
- 다만, 전체를 확인하는 게 아니라 최적의 값은 아님

<br>

- 사용 방법
> ```python
> from sklearn.model_selection import RandomizedSearchCV
> from scipy.stats import uniform
> distributions = {'parameter1': ('value1', 'value2', ...), 'parameter2': uniform(loc=1, scale=10)}
> grid_search = RandomizedSearchCV(model, parameters, random_state=0)
> grid_search.fit(X, y)
> ```

In [None]:
lasso = Lasso()

parameters = {
    'alpha': np.arange(0.1, 10, 0.5)
}

random_search = RandomizedSearchCV(lasso, parameters)
random_search.fit(boston.iloc[:, :-1], boston.iloc[:, -1])

In [None]:
index_for_best_score = random_search.cv_results_.get('rank_test_score').argmin()
random_search.cv_results_.get('params')[index_for_best_score]

{'alpha': 0.6}

## Bayesian Optimization

<img src="https://miro.medium.com/v2/resize:fit:720/format:webp/1*PhKGj_bZlND8IEfII426wA.png" width="600" height="400"/>


- 탐색 대상 구간 내의 hyperparameter를 샘플링  
- Grid Search나 Random Search의 경우 다음 샘플링 선정 시 이전 샘플링의 정보를 사용하지 못하여 불필요한 탐색을 반복  
- Bayesian Optimization은 사전 정보를 활용하여 다음 sample의 후보군을 선택  
순차적으로 하이퍼파라미터를 업데이트해가면서 평가를 통해 최적의 하이퍼파라미터 조합 탐색

<br>

- 사용 방법

> ```python
> import numpy as np
> import lightgbm
> from sklearn.metrics import mean_squared_error
> from bayes_opt import BayesianOptimization
> 
> def lgbm_cv(
>   max_depth,
>   learning_rate,
>   n_estimators,
>   subsample,
>   colsample_bytree
>   ):
> 
>   # model define
>   model = lightgbm.LGBMRegressor(
>               max_depth=int(max_depth),
>               learning_rate=learning_rate,
>               n_estimators=int(n_estimators),
>               subsample=subsample,
>               colsample_bytree=colsample_bytree,
>           )
>
>   # train
>   model.fit(X_train, y_train)
> 
>   # predict
>   y_pred = model.predict(X_test)
> 
>   # metric
>   rmse = np.sqrt(mean_squared_error(y_test, y_pred))
> 
>   # metric return
>   return rmse
> ```

> ```python
> pbounds = {
>     'max_depth': (3, 10),
>     'learning_rate': (0.001, 0.1),
>     'n_estimators': (10, 1000),
>     'subsample': (0.5, 1),
>     'colsample_bytree' :(0.2, 1),
> }
> 
> bayesian_optimization = BayesianOptimization(
>     f=lgbm_cv,
>     pbounds=pbounds,
>     verbose=2,          # 출력 옵션
>     random_state=0,     # 2: 출력, 1: 최댓값일 때 출력, 0: 출력 안 함
>     )
> 
> bayesian_optimization.maximize(init_points=2, n_iter=10, acq='ei', xi=0.01)
> # init_points: 초기 random search 수
> # n_iter: 반복 횟수
> # acq: acquisition function
> # xi: exploration 강도
> print(bayesian_optimization.max)
> ```