In [2]:
# example 1: bayes_opt
import bayes_opt #베이지만 탐색법
from bayes_opt.util import UtilityFunction

In [3]:
# 라이브러리 호출
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dataset
import torchvision.transforms as transforms

import random
import numpy as np
import matplotlib.pyplot as plt

In [5]:
# Dataset 설정
mnist_train = dataset.MNIST(root='./MNIST_data',
                          train=True, 
                          transform=transforms.ToTensor(), 
                          download=False)

mnist_test = dataset.MNIST(root='./MNIST_data', 
                         train=False, 
                         transform=transforms.ToTensor(), 
                         download=False)

# Neural network 모델만들기
class MNIST_full(nn.Module):
    def __init__(self):
        super(MNIST_full,self).__init__()
        self.lin1 = nn.Linear(784,256)
        self.lin2 = nn.Linear(256,128)
        self.lin3 = nn.Linear(128,10)

    def forward(self,x):
        x = x.view(-1,784)
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = F.softmax(self.lin3(x),dim=1)
        return x

# Bayesian optimization example

In [6]:
# LinearRegression 예제의 hyperparameter튜닝 진행
# 튜닝시킬 hyperparameter - learning rate + weight_decay
search_space = {
    "batch_size": (10, 256),
    "learning_rate": (1e-05, 1e-02)
}

In [None]:
def objective_function(batch_size,learning_rate): #목적함수 
    train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=int(batch_size),shuffle=True,drop_last=False)
    test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=32,shuffle=False,drop_last=False)
    
    model = MNIST_full()
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(3):
        loss_buffer = []
        for idx,(x,y) in enumerate(train_loader):
            optimizer.zero_grad()    

            y_onehot = torch.zeros((y.shape[0],10))
            y_onehot[range(y.shape[0]),y]=1

            y_infer = model(x)
            loss = criterion(y_infer,y)
            loss.backward()
            optimizer.step()
            loss_buffer.append(loss.item())

    acc = []
    for x,y in test_loader:
        # one_hot encoding
        y_onehot = torch.zeros((len(y),10))
        y_onehot[range(len(y)),y]=1
        y_infer = model(x)

        correct_prediction = torch.argmax(y_infer, 1) == y
        acc.append(correct_prediction.float().mean())
    bayes_target = np.mean(acc)*100
    return bayes_target # Bayes_opt는 최대화가 목표
#-----------------------------------------------------------------------------------
optimizer = bayes_opt.BayesianOptimization(
    f = objective_function,
    pbounds=search_space,
    verbose=2,
    random_state=123
)

# Gaussian Process 파라미터 설정
optimizer.set_gp_params(kernel=None, alpha=1e-6, normalize_y=True)

# Utility Function 설정
utility = UtilityFunction(kind='ei', kappa=2.576, xi=0.0)

# 최적화 실행
optimizer.maximize(
    init_points=2,
    n_iter=15,
    acquisition_function=utility
)

# 최적화 실행
optimizer.maximize(
    init_points=2,
    n_iter=15,
    acquisition_function=utility
)

In [None]:
print(f'최적 파라미터 정보: {optimizer.max["params"]}')
print(f'최적 값: {optimizer.max["target"]}')

# 격자 검색 - Grid search

In [None]:

# 하이퍼파라미터 튜닝을 위한 강력하고 직관적인 방법 매개 변수를 하나하나 바꾸어 가며 최적값을 비교함. 
# 모든 조합을 탐색하기 때문에 시간이 많이 걸릴 수 있지만, 
# 병렬 처리를 통해 속도를 개선. 이 방법은 특히 하이퍼파라미터 공간이 비교적 작을 때 유용


#격자탐색 공간 정의
param_grids = { 
    'batch_size': np.arange(64,128), #격자 공간은 64에서 127까지 학습률은 le-02까지의 실수로 50개의 간격 
    'learning_rate': np.linspace(1e-05, 1e-02),
}
#격자탐색함수 정의 
def grid_search_from_scratch(batch_size, learning_rate):
    train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=int(batch_size),shuffle=True,drop_last=False)
    test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=32,shuffle=False,drop_last=False)
    
    model = MNIST_full() #인공신경망 모델 
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(3): #학습 
        loss_buffer = []
        for idx,(x,y) in enumerate(train_loader):
            optimizer.zero_grad()    #기울기 초기화

            y_onehot = torch.zeros((y.shape[0],10)) #원핫인코딩 
            y_onehot[range(y.shape[0]),y]=1 

            y_infer = model(x)
            loss = criterion(y_infer,y)
            loss.backward()
            optimizer.step()
            loss_buffer.append(loss.item())
# 정확도 측정
    acc = []
    for x,y in test_loader:
        # one_hot encoding
        # y_onehot = torch.zeros((len(y),10))
        # y_onehot[range(len(y)),y]=1
        y_infer = model(x)

        correct_prediction = torch.argmax(y_infer, 1) == y
        acc.append(correct_prediction.float().mean())
    grid_target = np.mean(acc)*100
    
    #정확도 반환
    return grid_target # Bayes_opt는 최대화가 목표입니다.

In [27]:
best_target = -np.inf
cnt = 0
print(f'| {"iter":^10} | {"target":^10} | {"batch_...":^10} | {"learni...":^10} |')
print(f'-----------------------------------------------------')
for batch_size in param_grids['batch_size']:
    for learning_rate in param_grids['learning_rate']:
        target = grid_search_from_scratch(batch_size,learning_rate)
        cnt += 1 
        print(f'| {cnt:^10} | {target:^10.3f} | {batch_size:^10} | {learning_rate:^10.3e} |')
        if target > best_target:
            best_target = target
            best_info = {
                'batch_size':batch_size,
                'learning_rate':learning_rate
            }
print(f'=====================================================')

print(f'탐색 결과 - target:{best_target}, info: {best_info}')

|    iter    |   target   | batch_...  | learni...  |
-----------------------------------------------------
|     1      |   79.593   |     64     | 1.000e-05  |
|     2      |   93.960   |     64     | 2.139e-04  |
|     3      |   95.407   |     64     | 4.178e-04  |
|     4      |   95.877   |     64     | 6.216e-04  |
|     5      |   96.336   |     64     | 8.255e-04  |
|     6      |   96.615   |     64     | 1.029e-03  |
|     7      |   96.625   |     64     | 1.233e-03  |
|     8      |   95.357   |     64     | 1.437e-03  |
|     9      |   95.817   |     64     | 1.641e-03  |
|     10     |   96.306   |     64     | 1.845e-03  |
|     11     |   95.927   |     64     | 2.049e-03  |
|     12     |   96.096   |     64     | 2.253e-03  |
|     13     |   96.266   |     64     | 2.457e-03  |
|     14     |   95.537   |     64     | 2.660e-03  |
|     15     |   95.088   |     64     | 2.864e-03  |
|     16     |   95.807   |     64     | 3.068e-03  |
|     17     |   95.268   | 