<font color="#CC3D3D"><p>
# Model Tuning (Hyperparameter Optimization)

In [1]:
from sklearn.datasets import load_digits

digits = load_digits()

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, random_state=0)

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

model = KNeighborsClassifier()
#model = DecisionTreeClassifier()
#model = LogisticRegression()
#model = SVC()

<font color="blue"><p>
#### KNN 알고리즘의 직관적 이해    
<img align='left' src='https://miro.medium.com/max/506/0*QyWp7J6eSz0tayc0.png' width=500>

중요한 hyper parameter
1. k값
2. 범위 안에 있는 값들을 모두 동등하게 처리할 것인지, 거리에 따른 가중치를 반영할 것인지.

<font color="blue"><p>
#### Logistic Regression의 직관적 이해    
<img align='left' src='https://media.licdn.com/dms/image/D4D12AQGS572EFk8FhQ/article-cover_image-shrink_600_2000/0/1691381627266?e=2147483647&v=beta&t=fIPydD-14_98VidYlmpdBbfIGeUeYahaVAIsPjZRTiU' width=800>

<font color="blue"><p>
#### SVM(Support Vector Machine)의 직관적 이해    
<img align='left' src='https://www.inovex.de/wp-content/uploads/separating_hyperplanes.png' width=800>

직선과 점들 간의 거리(마진)가 가장 넓은 선을 찾는다.
--> 여러 실험에서 발생할 수 있는 오차가 있기 때문에."

<br><font color = "darkgreen">
## 1. Grid Search CV 

##### Set the parameters for grid search #####

In [4]:
# param_grid: dictionary with parameters names as keys and
# lists of parameter settings to try as values

param_grid = {'n_neighbors': range(4,10),
              'weights': ['uniform','distance']}
param_grid

{'n_neighbors': range(4, 10), 'weights': ['uniform', 'distance']}

##### Grid search with cross-validation ####

In [5]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(model, param_grid, scoring='accuracy', cv=5, n_jobs=-1) # n-job=-1 : Core를 병렬로 사용해라 --> 5번 돌려야하는데 동시에 실행하기

In [6]:
# grid search is very time-consuming

grid_search.fit(X_train, y_train)

##### Evaluate the model with best parameters ####

In [7]:
grid_search.score(X_test, y_test), KNeighborsClassifier().fit(X_train, y_train).score(X_test, y_test)

(0.9866666666666667, 0.98)

In [9]:
print("Best parameters: {}".format(grid_search.best_params_))
print("Best CV score: {:.2f}".format(grid_search.best_score_))

Best parameters: {'n_neighbors': 4, 'weights': 'distance'}
Best CV score: 0.99


In [10]:
print("Best estimator:\n{}".format(grid_search.best_estimator_))

Best estimator:
KNeighborsClassifier(n_neighbors=4, weights='distance')


<br><font color = "darkgreen">
## 2. Random Search CV

##### Set the parameters for random search #####

In [11]:
# random은 grid보다 범위를 넓히는 것이 좋다.
param_grid = {'n_neighbors': range(1, 10),
              'weights': ['uniform','distance']}
param_grid

{'n_neighbors': range(1, 10), 'weights': ['uniform', 'distance']}

##### Random search with cross-validation ####

In [12]:
from sklearn.model_selection import RandomizedSearchCV

rand_search = RandomizedSearchCV(model, param_distributions=param_grid, 
                                 scoring='accuracy', n_iter=12, random_state=100)  #n_iter=12 : 실험을 12번 해라

In [13]:
rand_search.fit(X_train, y_train)

##### Evaluate the model with best parameters ####

In [14]:
rand_search.score(X_test, y_test)

0.9888888888888889

In [15]:
print("Best estimator:\n{}".format(rand_search.best_estimator_))

Best estimator:
KNeighborsClassifier(n_neighbors=3, weights='distance')


In [16]:
print("Best parameters: {}".format(rand_search.best_params_))

Best parameters: {'weights': 'distance', 'n_neighbors': 3}


<br><font color = "darkgreen">
#### Grid Search 결과와 Random Search 결과 비교 예시 [Bergstra and Bengio(2012)]

<img align='left' src='https://community.alteryx.com/t5/image/serverpage/image-id/74545i97245FDAA10376E9/image-size/large?v=v2&px=999' width=600>

Random Search는 Grid Search에 비해 불필요한 반복 수행 횟수를 대폭 줄이면서, 동시에 정해진 간격(grid) 사이에 위치한 값들에 대해서도 확률적으로 탐색이 가능하므로, 최적 hyperparameter 값을 더 빨리 찾을 수 있는 것으로 알려져 있음.

## 3. Bayesian Optimization with Optuna

- Grid Search와 Random Search는 이전까지의 조사 과정에서 얻어진 hyperparameter 값들의 성능 결과에 대한 '사전 지식'이 전혀 반영되어 있지 않기 때문에 비효율적인 요소가 있음.
- 매 회 새로운 hyperparameter 값에 대한 조사를 수행할 시 '사전 지식'을 충분히 반영하면서, 동시에 전체적인 탐색 과정을 체계적으로 수행할 수 있는 방법이 Bayesian Optimization임.   

<img align='left' src='https://i.postimg.cc/P5gnLhGY/tuning-methods.png' width=800>

In [17]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.3.0 (from optuna)
  Downloading SQLAlchemy-2.0.29-cp39-cp39-win_amd64.whl.metadata (9.8 kB)
Collecting tqdm (from optuna)
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.6 kB ? eta -:--:--
     ---------------------------------------- 57.6/57.6 kB 3.0 MB/s eta 0:00:00
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.2-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet!=0.4.17 (from sqlalchemy>=1.3.0->optuna)
  Downloading greenlet-3.0.3-cp39-cp39-win_amd64.whl.metadata (3.9 kB)
Downloading optuna-3.6.0-py3-none-any.whl (379 kB)
   ---------------------------------------- 0.0/379.9 kB ? eta -:--:--
 

##### A simple optimization problem #####

In [None]:
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(-10, 10)
plt.plot(x, (x-2)**2)
plt.show()

In [18]:
"""
A simple optimization problem:

- Define objective function to be optimized. Let's minimize (x - 2)^2
- Suggest hyperparameter values using trial object. Here, a float value of x is suggested from -10 to 10
- Create a study object and invoke the optimize method over 100 trials
"""

import optuna

def objective(trial):
    x = trial.suggest_uniform('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params

[I 2024-03-28 17:31:02,070] A new study created in memory with name: no-name-7cfb4046-9b5d-4087-b9c3-4c6630e6b127
  x = trial.suggest_uniform('x', -10, 10)
[I 2024-03-28 17:31:02,077] Trial 0 finished with value: 41.69886957250131 and parameters: {'x': -4.457466188258465}. Best is trial 0 with value: 41.69886957250131.
[I 2024-03-28 17:31:02,081] Trial 1 finished with value: 4.395144606463223 and parameters: {'x': 4.096460017854675}. Best is trial 1 with value: 4.395144606463223.
[I 2024-03-28 17:31:02,086] Trial 2 finished with value: 6.796709250297036 and parameters: {'x': 4.607049913273054}. Best is trial 1 with value: 4.395144606463223.
[I 2024-03-28 17:31:02,091] Trial 3 finished with value: 39.331097692389854 and parameters: {'x': 8.27145100374625}. Best is trial 1 with value: 4.395144606463223.
[I 2024-03-28 17:31:02,095] Trial 4 finished with value: 99.63892795678824 and parameters: {'x': -7.981930071724017}. Best is trial 1 with value: 4.395144606463223.
[I 2024-03-28 17:31:02

{'x': 2.0022489136081996}

##### Procedure for optimizing sklearn parameters #####
1. Wrap model training with an objective function and return accuracy
2. Suggest hyperparameters using a trial object
3. Create a study object and execute the optimization

1. objective 함수를 만든다.
    - 함수는 반드시 trial(실험횟수)이라는 parameter를 받아야함
    - 실험했을 때, 성능을 return한다.
    - 
2. hp 찾기
3. study 객체를 통해 최적화

In [19]:
from sklearn.model_selection import cross_val_score

# 조절할 하이퍼 파라미터와 그 범위를 지정하는 함수 정의
def objective(trial): 
    # optuna.trial.Trial.suggest_categorical() for categorical parameters 파라미터가 범주형일 때,
    # optuna.trial.Trial.suggest_int() for integer parameters 파라미터가 정수일 때,
    # optuna.trial.Trial.suggest_float() for floating point parameters  파라미터가 실수일 때,
    knn_n_neighbors = trial.suggest_int('n_neighbors', 1, 10, step=1) # step : 건너뛰는 법
    knn_weights = trial.suggest_categorical('weights', ['uniform','distance'])

    classifier_obj = KNeighborsClassifier(
        n_neighbors = knn_n_neighbors, 
        weights = knn_weights,    
    )
    
    # objective 수정
    #모델에 따라 hp지정하고 모델 부르기
    # n_trial & direction 바꾸기

    score = cross_val_score(classifier_obj, X_train, y_train, cv=5, n_jobs=-1)
    accuracy = score.mean()
    return accuracy

# 최적화 실행                  # optuna.samplers.TPESampler(seed=100) : 옵튜나가 찾는 방식, # direction="maximize" : 함수 값이 높으면 높을수록 좋다.
study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=100), direction="maximize")
study.optimize(objective, n_trials=12) 

#최적화 결과 보기
print("Best score:", study.best_value)
print("Best parameters:", study.best_params)

[I 2024-03-28 17:31:12,919] A new study created in memory with name: no-name-2d0cfba5-33e4-413e-b6ee-81495070fd46
[I 2024-03-28 17:31:18,056] Trial 0 finished with value: 0.9844003855156271 and parameters: {'n_neighbors': 6, 'weights': 'distance'}. Best is trial 0 with value: 0.9844003855156271.
[I 2024-03-28 17:31:20,828] Trial 1 finished with value: 0.9814401762357153 and parameters: {'n_neighbors': 9, 'weights': 'distance'}. Best is trial 0 with value: 0.9844003855156271.
[I 2024-03-28 17:31:23,342] Trial 2 finished with value: 0.9829244114002478 and parameters: {'n_neighbors': 7, 'weights': 'uniform'}. Best is trial 0 with value: 0.9844003855156271.
[I 2024-03-28 17:31:23,387] Trial 3 finished with value: 0.9806966818119236 and parameters: {'n_neighbors': 6, 'weights': 'uniform'}. Best is trial 0 with value: 0.9844003855156271.
[I 2024-03-28 17:31:23,432] Trial 4 finished with value: 0.9844196612969848 and parameters: {'n_neighbors': 2, 'weights': 'distance'}. Best is trial 4 with 

Best score: 0.985892881729313
Best parameters: {'n_neighbors': 3, 'weights': 'uniform'}


##### Plotting the optimization process #####

In [None]:
model = KNeighborsClassifier(**study.best_params)
model.fit(X_train, y_train)
model.score(X_test, y_test)

In [None]:
# 하이퍼파라미터 중요도
#optuna.visualization.plot_param_importances(study)

<font color="#CC3D3D"><p>
# End