# Lightgbm

In [1]:
!pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading bayesian_optimization-1.5.1-py3-none-any.whl.metadata (16 kB)
Downloading bayesian_optimization-1.5.1-py3-none-any.whl (28 kB)
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.5.1


In [2]:
import pandas as pd
import numpy as np
import scipy as sp

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVC, SVR
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from bayes_opt import BayesianOptimization  # bayesian-optimization

from sklearn.metrics import mean_squared_error, accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Hyperparameter Tuning

## Manual Search

- 직접 파라미터를 설정하는 직접 탐색 방법  
- 전체 범위를 보는 것이 아니기에 현재까지의 최적의 파라미터가 최적인지는 보장하지 못 함  
- 여러 종류의 파라미터를 동시에 탐색하기에는 한계

In [9]:
boston = pd.read_pickle('./data/boston.pickle')
boston
boston = pd.concat([pd.DataFrame(boston.data, columns=boston.feature_names), pd.DataFrame(boston.target, columns=['target'])], axis= 1)
boston

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


## Greed Search

<img src="https://www.yourdatateacher.com/wp-content/uploads/2021/03/image-6.png" width="400" height="400"/>

- 탐색 구간 내 모든 hyperparameter 조합을 일정 구간으로 나누어 시도  
- 파라미터가 많아질수록 기하급수적으로 많은 시간 소요

<br>

- 사용 방법
> ```python
> from sklearn.model_selection import GridSearchCV
> parameters = {'parameter1': ('value1', 'value2', ...), 'parameter2':[value1, value2, ...]}
> grid_search = GridSearchCV(model, parameters)
> grid_search.fit(X, y)
> ```

In [10]:
alphas = np.arange(0, 10, 0.5)
for alpha in alphas:
    lasso = Lasso(alpha=alpha).fit(boston.iloc[:, :-1], boston.iloc[:, -1])
    lasso.coef_

  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [11]:
lasso = Lasso(alpha=alpha).fit(boston.iloc[:, :-1], boston.iloc[:, -1])

parameters = {
    'alpha': np.arange(0.1, 10, 0.5)
}

grid_search = GridSearchCV(lasso, parameters)
grid_search.fit(boston.iloc[:, :-1], boston.iloc[:, -1])

In [12]:
grid_search.best_params_

{'alpha': 0.6}

In [13]:
grid_search.cv_results_.get('params')

[{'alpha': 0.1},
 {'alpha': 0.6},
 {'alpha': 1.1},
 {'alpha': 1.6},
 {'alpha': 2.1},
 {'alpha': 2.6},
 {'alpha': 3.1},
 {'alpha': 3.6},
 {'alpha': 4.1},
 {'alpha': 4.6},
 {'alpha': 5.1},
 {'alpha': 5.6},
 {'alpha': 6.1},
 {'alpha': 6.6},
 {'alpha': 7.1},
 {'alpha': 7.6},
 {'alpha': 8.1},
 {'alpha': 8.6},
 {'alpha': 9.1},
 {'alpha': 9.6}]

In [15]:
index_for_best_score = grid_search.cv_results_.get('rank_test_score').argmin()

In [17]:
grid_search.cv_results_.get('params')[index_for_best_score]

{'alpha': 0.6}

In [19]:
lasso = Lasso(alpha=alpha).fit(boston.iloc[:, :-1], boston.iloc[:, -1])

parameters = {
    'alpha': np.arange(0.1, 10, 0.5)
}

random_search = RandomizedSearchCV(lasso, parameters)
random_search.fit(boston.iloc[:, :-1], boston.iloc[:, -1])

index_for_best_score = random_search.cv_results_.get('rank_test_score').argmin()
random_search.cv_results_.get('params')[index_for_best_score]

{'alpha': 0.6}

## Random Search

<img src="https://www.yourdatateacher.com/wp-content/uploads/2021/03/image-7.png" width="400" height="400"/>

- 탐색 대상 구간 내의 hyperparameter를 랜덤 샘플링  
- Grid Search 대비 반복 횟수를 줄이는 동시에 확률적 탐색으로 **최적에 근사한 parameter를  빨리 찾을 수 있는 것으로 알려짐**  
- 다만, 전체를 확인하는 게 아니라 최적의 값은 아님

<br>

- 사용 방법
> ```python
> from sklearn.model_selection import RandomizedSearchCV
> from scipy.stats import uniform
> distributions = {'parameter1': ('value1', 'value2', ...), 'parameter2': uniform(loc=1, scale=10)}
> grid_search = RandomizedSearchCV(model, parameters, random_state=0)
> grid_search.fit(X, y)
> ```

## Bayesian Optimization

<img src="https://miro.medium.com/v2/resize:fit:720/format:webp/1*PhKGj_bZlND8IEfII426wA.png" width="600" height="400"/>


- 탐색 대상 구간 내의 hyperparameter를 샘플링  
- Grid Search나 Random Search의 경우 다음 샘플링 선정 시 이전 샘플링의 정보를 사용하지 못하여 불필요한 탐색을 반복  
- Bayesian Optimization은 사전 정보를 활용하여 다음 sample의 후보군을 선택  
순차적으로 하이퍼파라미터를 업데이트해가면서 평가를 통해 최적의 하이퍼파라미터 조합 탐색

<br>

- 사용 방법

> ```python
> import numpy as np
> import lightgbm
> from sklearn.metrics import mean_squared_error
> from bayes_opt import BayesianOptimization
> 
> def lgbm_cv(
>   max_depth,
>   learning_rate,
>   n_estimators,
>   subsample,
>   colsample_bytree
>   ):
> 
>   # model define
>   model = lightgbm.LGBMRegressor(
>               max_depth=int(max_depth),
>               learning_rate=learning_rate,
>               n_estimators=int(n_estimators),
>               subsample=subsample,
>               colsample_bytree=colsample_bytree,
>           )
>
>   # train
>   model.fit(X_train, y_train)
> 
>   # predict
>   y_pred = model.predict(X_test)
> 
>   # metric
>   rmse = np.sqrt(mean_squared_error(y_test, y_pred))
> 
>   # metric return
>   return rmse
> ```

> ```python
> pbounds = {
>     'max_depth': (3, 10),
>     'learning_rate': (0.001, 0.1),
>     'n_estimators': (10, 1000),
>     'subsample': (0.5, 1),
>     'colsample_bytree' :(0.2, 1),
> }
> 
> bayesian_optimization = BayesianOptimization(
>     f=lgbm_cv,
>     pbounds=pbounds,
>     verbose=2,          # 출력 옵션
>     random_state=0,     # 2: 출력, 1: 최댓값일 때 출력, 0: 출력 안 함
>     )
> 
> bayesian_optimization.maximize(init_points=2, n_iter=10, acq='ei', xi=0.01)
> # init_points: 초기 random search 수
> # n_iter: 반복 횟수
> # acq: acquisition function
> # xi: exploration 강도
> print(bayesian_optimization.max)
> ```