In [28]:
# example 1: bayes_opt
import bayes_opt

In [29]:
# 라이브러리 호출
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dataset
import torchvision.transforms as transforms

import random
import numpy as np
import matplotlib.pyplot as plt

In [30]:
# Dataset 설정
mnist_train = dataset.MNIST(root='./MNIST_data',
                          train=True, 
                          transform=transforms.ToTensor(), 
                          download=False)

mnist_test = dataset.MNIST(root='./MNIST_data', 
                         train=False, 
                         transform=transforms.ToTensor(), 
                         download=False)

# Neural network 모델만들기
class MNIST_full(nn.Module):
    def __init__(self):
        super(MNIST_full,self).__init__()
        self.lin1 = nn.Linear(784,256)
        self.lin2 = nn.Linear(256,128)
        self.lin3 = nn.Linear(128,10)

    def forward(self,x):
        x = x.view(-1,784)
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = F.softmax(self.lin3(x),dim=1)
        return x

# Bayesian optimization example

In [31]:
# LinearRegression 예제의 hyperparameter튜닝 진행
# 튜닝시킬 hyperparameter - learning rate + weight_decay
search_space = {
    "batch_size": (10, 256),
    "learning_rate": (1e-05, 1e-02)
}

In [32]:
def objective_function(batch_size,learning_rate):
    train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=int(batch_size),shuffle=True,drop_last=False)
    test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=32,shuffle=False,drop_last=False)
    
    model = MNIST_full()
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(3):
        loss_buffer = []
        for idx,(x,y) in enumerate(train_loader):
            optimizer.zero_grad()    

            y_onehot = torch.zeros((y.shape[0],10))
            y_onehot[range(y.shape[0]),y]=1

            y_infer = model(x)
            loss = criterion(y_infer,y)
            loss.backward()
            optimizer.step()
            loss_buffer.append(loss.item())

    acc = []
    for x,y in test_loader:
        # one_hot encoding
        y_onehot = torch.zeros((len(y),10))
        y_onehot[range(len(y)),y]=1
        y_infer = model(x)

        correct_prediction = torch.argmax(y_infer, 1) == y
        acc.append(correct_prediction.float().mean())
    bayes_target = np.mean(acc)*100
    return bayes_target # Bayes_opt는 최대화가 목표입니다.

optimizer = bayes_opt.BayesianOptimization(
    f = objective_function,
    pbounds=search_space,
    verbose=2,
    random_state=123
)
optimizer.maximize(
    init_points=2,
    n_iter=15,
    acq='ei'
)

|   iter    |  target   | batch_... | learni... |
-------------------------------------------------


Passing acquisition function parameters or gaussian process parameters to maximize
is no longer supported, and will cause an error in future releases. Instead,
please use the "set_gp_params" method to set the gp params, and pass an instance
 of bayes_opt.util.UtilityFunction using the acquisition_function argument

  optimizer.maximize(


| [0m1        [0m | [0m96.28    [0m | [0m181.3    [0m | [0m0.002869 [0m |
| [0m2        [0m | [0m80.22    [0m | [0m65.81    [0m | [0m0.005518 [0m |
| [0m3        [0m | [0m94.35    [0m | [0m182.6    [0m | [0m0.006338 [0m |
| [0m4        [0m | [0m67.18    [0m | [0m172.9    [0m | [0m1e-05    [0m |
| [0m5        [0m | [0m55.51    [0m | [0m191.6    [0m | [0m1e-05    [0m |
| [0m6        [0m | [0m93.94    [0m | [0m77.61    [0m | [0m0.004474 [0m |
| [0m7        [0m | [0m95.3     [0m | [0m83.5     [0m | [0m0.0004511[0m |
| [0m8        [0m | [0m94.23    [0m | [0m90.97    [0m | [0m0.004499 [0m |
| [0m9        [0m | [0m68.82    [0m | [0m99.32    [0m | [0m1e-05    [0m |
| [0m10       [0m | [0m94.37    [0m | [0m255.9    [0m | [0m0.007481 [0m |
| [0m11       [0m | [0m96.06    [0m | [0m249.3    [0m | [0m0.00228  [0m |
| [0m12       [0m | [0m82.33    [0m | [0m241.9    [0m | [0m0.01     [0m |
| [0m13       

In [33]:
print(f'최적 파라미터 정보: {optimizer.max["params"]}')
print(f'최적 값: {optimizer.max["target"]}')

최적 파라미터 정보: {'batch_size': 141.50065587861067, 'learning_rate': 0.0018151102437612245}
최적 값: 96.33586406707764


# 격자 검색 - Grid search

In [16]:
param_grids = {
    'batch_size': np.arange(64,128),
    'learning_rate': np.linspace(1e-05, 1e-02),
}

def grid_search_from_scratch(batch_size, learning_rate):
    train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=int(batch_size),shuffle=True,drop_last=False)
    test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=32,shuffle=False,drop_last=False)
    
    model = MNIST_full()
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(3):
        loss_buffer = []
        for idx,(x,y) in enumerate(train_loader):
            optimizer.zero_grad()    

            y_onehot = torch.zeros((y.shape[0],10))
            y_onehot[range(y.shape[0]),y]=1

            y_infer = model(x)
            loss = criterion(y_infer,y)
            loss.backward()
            optimizer.step()
            loss_buffer.append(loss.item())

    acc = []
    for x,y in test_loader:
        # one_hot encoding
        y_onehot = torch.zeros((len(y),10))
        y_onehot[range(len(y)),y]=1
        y_infer = model(x)

        correct_prediction = torch.argmax(y_infer, 1) == y
        acc.append(correct_prediction.float().mean())
    grid_target = np.mean(acc)*100
    
    return grid_target # Bayes_opt는 최대화가 목표입니다.

In [27]:
best_target = -np.inf
cnt = 0
print(f'| {"iter":^10} | {"target":^10} | {"batch_...":^10} | {"learni...":^10} |')
print(f'-----------------------------------------------------')
for batch_size in param_grids['batch_size']:
    for learning_rate in param_grids['learning_rate']:
        target = grid_search_from_scratch(batch_size,learning_rate)
        cnt += 1 
        print(f'| {cnt:^10} | {target:^10.3f} | {batch_size:^10} | {learning_rate:^10.3e} |')
        if target > best_target:
            best_target = target
            best_info = {
                'batch_size':batch_size,
                'learning_rate':learning_rate
            }
print(f'=====================================================')

print(f'탐색 결과 - target:{best_target}, info: {best_info}')

|    iter    |   target   | batch_...  | learni...  |
-----------------------------------------------------
|     1      |   79.593   |     64     | 1.000e-05  |
|     2      |   93.960   |     64     | 2.139e-04  |
|     3      |   95.407   |     64     | 4.178e-04  |
|     4      |   95.877   |     64     | 6.216e-04  |
|     5      |   96.336   |     64     | 8.255e-04  |
|     6      |   96.615   |     64     | 1.029e-03  |
|     7      |   96.625   |     64     | 1.233e-03  |
|     8      |   95.357   |     64     | 1.437e-03  |
|     9      |   95.817   |     64     | 1.641e-03  |
|     10     |   96.306   |     64     | 1.845e-03  |
|     11     |   95.927   |     64     | 2.049e-03  |
|     12     |   96.096   |     64     | 2.253e-03  |
|     13     |   96.266   |     64     | 2.457e-03  |
|     14     |   95.537   |     64     | 2.660e-03  |
|     15     |   95.088   |     64     | 2.864e-03  |
|     16     |   95.807   |     64     | 3.068e-03  |
|     17     |   95.268   | 