In [567]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary



In [568]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250228

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

**1. 데이터셋 로딩 및 데이터 분석**

In [569]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


In [570]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 8000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [571]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([826, 887, 835, 776, 776, 723, 831, 771, 737, 838])


In [572]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,826,10.325
1,1,887,11.087501
2,2,835,10.4375
3,3,776,9.7
4,4,776,9.7
5,5,723,9.0375
6,6,831,10.3875
7,7,771,9.6375
8,8,737,9.2125
9,9,838,10.475


In [573]:
# 테스트 데이터
test_labels = test_loader.dataset.targets
test_class_counts = torch.bincount(test_labels)
print(test_class_counts)

tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [574]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


**2. CNN 모델 정의**

In [575]:
# CNN 모델 정의

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(64 * 4 * 4, 64),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, 10),
            nn.Softmax()
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 64 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [576]:
# 모델 구조 출력

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()  # 10 Classes 의 Multi-Class 이므로

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─ReLU: 2-2                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-3                       [32, 64, 12, 12]          18,496
│    └─ReLU: 2-4                         [32, 64, 12, 12]          --
├─MaxPool2d: 1-4                         [32, 64, 6, 6]            --
├─Sequential: 1-5                        [32, 64, 4, 4]            --
│    └─Conv2d: 2-5                       [32, 64, 4, 4]            36,928
│    └─ReLU: 2-6                         [32, 64, 4, 4]            --
├─Sequential: 1-6                        [32, 64]                  --
│    └


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data (epoch) + Valid Data (하이퍼파라미터 최적화)

In [577]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 1000
num_valid_epoch = 2000
num_valid_hpo = 5000

assert NUM_TRAIN_SAMPLES == num_train + num_valid_epoch + num_valid_hpo

# 데이터셋 분리
train_dataset, valid_epoch_dataset, valid_hpo_dataset =\
    random_split(train_subset, [num_train, num_valid_epoch, num_valid_hpo])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_epoch_loader = DataLoader(valid_epoch_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_hpo_loader = DataLoader(valid_hpo_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 하이퍼파라미터 최적화 학습 실시 함수**

* 하이퍼파라미터 최적화 라이브러리는 Optuna 사용
* 하이퍼파라미터 탐색 70 회 실시 (각 Optimizer 별)
* 각 Optimizer 별로 하이퍼파라미터 존재

In [578]:
MAX_EPOCHS = 65536
EARLY_STOPPING_ROUNDS = 5  # Early Stopping Patience (epochs)
TRIAL_COUNT = 70           # HPO trial count

In [579]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [580]:
# Optuna 설정

!pip install optuna
import optuna
import logging

optuna.logging.set_verbosity(logging.WARNING)



In [581]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - optimizer       : 모델에서 사용할 Optimizer
# - train_loss_list : 각 epoch 에서의 train loss 기록

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, optimizer, train_loss_list):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        optimizer.zero_grad()
        outputs = model(images)

        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [582]:
# 모델 validation 실시

# args :
# - model        : validation 할 모델
# - valid_loader : Validation Data Loader

# returns :
# - accuracy : 모델의 validation 정확도

def run_validation(model, valid_loader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # validation 실시 및 정확도 측정
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

In [583]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model              : 학습할 모델
# - optimizer          : 모델에서 사용할 Optimizer
# - train_loader       : Training Data Loader
# - valid_epoch_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - valid_hpo_loader   : 최종적으로 해당 하이퍼파라미터 조합에 대한 Valid Data Loader
# - verbose            : 학습 중 프로세스 출력 여부

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid_acc 이 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid_acc 이 가장 높았던 epoch 에서 생성된 모델

def run_model_common(model, optimizer, train_loader, valid_epoch_loader, valid_hpo_loader,
                     verbose=False):

    train_loss_list = []       # train loss
    valid_acc_list = []        # valid accuracy
    max_valid_acc = 0.0        # max validation accuracy
    best_valid_acc_epoch = -1  # valid_acc 이 가장 높았던 epoch
    best_epoch_model = None    # valid_acc 이 가장 높았던 epoch 의 모델

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, optimizer, train_loss_list)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc = run_validation(model, valid_epoch_loader)
        valid_acc_list.append(epoch_acc)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN().to(device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc = run_validation(best_epoch_model, valid_epoch_loader)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with Epoch valid set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc = run_validation(best_epoch_model, valid_hpo_loader)

    if verbose:
        print(f"Final Acc (with HPO valid set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model

In [584]:
print(device)

cuda


**4-1. Adam Optimizer 실험**

In [585]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [586]:
verbose_at_first_trial = True  # 1st trial 에만 학습 중 정보 출력

def objective_adam(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, verbose_at_first_trial

    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.01, log=True),
        'beta1': trial.suggest_float('beta1', 0.8, 0.95),
        'beta2': trial.suggest_float('beta2', 0.99, 0.9999)
    }

    model = CNN().to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params['learning_rate'],
                                 betas=(params['beta1'], params['beta2']))

    final_acc, best_epoch_model = run_model_common(model,
                                                   optimizer,
                                                   train_loader,
                                                   valid_epoch_loader,
                                                   valid_hpo_loader,
                                                   verbose=verbose_at_first_trial)

    verbose_at_first_trial = False

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN().to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())
        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [587]:
study_adam = optuna.create_study(direction="maximize")
study_adam.optimize(objective_adam, n_trials=TRIAL_COUNT)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



best model updated
Epoch 1, Loss: 2.3061, Accuracy: 0.1015
best model updated
Epoch 2, Loss: 2.3034, Accuracy: 0.1025
Epoch 3, Loss: 2.3023, Accuracy: 0.1025
Epoch 4, Loss: 2.3029, Accuracy: 0.1015
Epoch 5, Loss: 2.3032, Accuracy: 0.1025
Epoch 6, Loss: 2.3016, Accuracy: 0.1025
Best Epoch: 1, Best Valid Acc: 0.1025
Valid Acc (with Epoch valid set) on Loaded Best Model: 0.1025
Final Acc (with HPO valid set) on Loaded Best Model: 0.1134
best_hyperparam_model updated with Accuracy=0.1134
Params: {'learning_rate': 0.004241296890590742, 'beta1': 0.9326793413386915, 'beta2': 0.9994640756255311}, Accuracy: 0.1134
best_hyperparam_model updated with Accuracy=0.9498
Params: {'learning_rate': 0.0003751003102133242, 'beta1': 0.8053131215255229, 'beta2': 0.9975286656027099}, Accuracy: 0.9498
best_hyperparam_model updated with Accuracy=0.9570
Params: {'learning_rate': 0.0006604550852222216, 'beta1': 0.9460726489185591, 'beta2': 0.9914995864620487}, Accuracy: 0.9570
Params: {'learning_rate': 0.0007360

In [588]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'learning_rate': 0.0009180813651987264, 'beta1': 0.948984639366484, 'beta2': 0.9939972600524515}, best acc: 0.9626


In [589]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc = run_validation(best_hyperparam_model, valid_hpo_loader)
print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8

Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9626


In [590]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc = run_validation(best_hyperparam_model, test_loader)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9665


**4-2. AdamW Optimizer 실험**

In [591]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [592]:
verbose_at_first_trial = True  # 1st trial 에만 학습 중 정보 출력

def objective_adamw(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, verbose_at_first_trial

    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.01, log=True),
        'beta1': trial.suggest_float('beta1', 0.8, 0.95),
        'beta2': trial.suggest_float('beta2', 0.99, 0.9999),
        'weight_decay': trial.suggest_float('weight_decay', 0.0001, 0.02, log=True)
    }

    model = CNN().to(device)
    optimizer = torch.optim.AdamW(model.parameters(),
                                  lr=params['learning_rate'],
                                  betas=(params['beta1'], params['beta2']),
                                  weight_decay=params['weight_decay'])

    final_acc, best_epoch_model = run_model_common(model,
                                                   optimizer,
                                                   train_loader,
                                                   valid_epoch_loader,
                                                   valid_hpo_loader,
                                                   verbose=verbose_at_first_trial)

    verbose_at_first_trial = False

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN().to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())
        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [593]:
study_adamw = optuna.create_study(direction="maximize")
study_adamw.optimize(objective_adamw, n_trials=TRIAL_COUNT)

best model updated
Epoch 1, Loss: 2.2930, Accuracy: 0.2615
best model updated
Epoch 2, Loss: 2.1809, Accuracy: 0.5310
best model updated
Epoch 3, Loss: 2.0824, Accuracy: 0.6090
best model updated
Epoch 4, Loss: 2.0154, Accuracy: 0.6805
best model updated
Epoch 5, Loss: 1.9611, Accuracy: 0.7310
best model updated
Epoch 6, Loss: 1.9081, Accuracy: 0.7985
best model updated
Epoch 7, Loss: 1.8506, Accuracy: 0.8685
best model updated
Epoch 8, Loss: 1.7958, Accuracy: 0.8950
best model updated
Epoch 9, Loss: 1.7516, Accuracy: 0.9230
best model updated
Epoch 10, Loss: 1.7028, Accuracy: 0.9375
best model updated
Epoch 11, Loss: 1.6697, Accuracy: 0.9395
Epoch 12, Loss: 1.6416, Accuracy: 0.9390
best model updated
Epoch 13, Loss: 1.6214, Accuracy: 0.9470
best model updated
Epoch 14, Loss: 1.5949, Accuracy: 0.9495
best model updated
Epoch 15, Loss: 1.5817, Accuracy: 0.9520
best model updated
Epoch 16, Loss: 1.5649, Accuracy: 0.9565
best model updated
Epoch 17, Loss: 1.5526, Accuracy: 0.9570
best mod

In [594]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'learning_rate': 0.0006673995164124837, 'beta1': 0.9114935608972462, 'beta2': 0.9990447493603954, 'weight_decay': 0.005398226747101173}, best acc: 0.9648


In [595]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc = run_validation(best_hyperparam_model, valid_hpo_loader)
print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8

Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9648


In [596]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc = run_validation(best_hyperparam_model, test_loader)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9677


**4-3. AdaDelta Optimizer 실험**

In [597]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [598]:
verbose_at_first_trial = True  # 1st trial 에만 학습 중 정보 출력

def objective_adadelta(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, verbose_at_first_trial

    params = {
        'p': trial.suggest_float('p', 0.5, 0.999)
    }

    model = CNN().to(device)
    optimizer = torch.optim.Adadelta(model.parameters(),
                                     rho=params['p'])

    final_acc, best_epoch_model = run_model_common(model,
                                                   optimizer,
                                                   train_loader,
                                                   valid_epoch_loader,
                                                   valid_hpo_loader,
                                                   verbose=verbose_at_first_trial)

    verbose_at_first_trial = False

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN().to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())
        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [599]:
study_adadelta = optuna.create_study(direction="maximize")
study_adadelta.optimize(objective_adadelta, n_trials=TRIAL_COUNT)

best model updated
Epoch 1, Loss: 2.3017, Accuracy: 0.1025
Epoch 2, Loss: 2.2982, Accuracy: 0.1025
best model updated
Epoch 3, Loss: 2.2795, Accuracy: 0.1860
best model updated
Epoch 4, Loss: 2.1950, Accuracy: 0.4545
best model updated
Epoch 5, Loss: 2.0808, Accuracy: 0.5150
best model updated
Epoch 6, Loss: 1.9781, Accuracy: 0.5740
best model updated
Epoch 7, Loss: 1.8974, Accuracy: 0.6995
best model updated
Epoch 8, Loss: 1.8135, Accuracy: 0.7680
best model updated
Epoch 9, Loss: 1.7487, Accuracy: 0.8065
best model updated
Epoch 10, Loss: 1.6949, Accuracy: 0.8540
Epoch 11, Loss: 1.6520, Accuracy: 0.7990
best model updated
Epoch 12, Loss: 1.5958, Accuracy: 0.9095
Epoch 13, Loss: 1.5701, Accuracy: 0.9000
best model updated
Epoch 14, Loss: 1.5455, Accuracy: 0.9265
best model updated
Epoch 15, Loss: 1.5351, Accuracy: 0.9365
Epoch 16, Loss: 1.5186, Accuracy: 0.9165
best model updated
Epoch 17, Loss: 1.5063, Accuracy: 0.9460
Epoch 18, Loss: 1.5000, Accuracy: 0.9425
Epoch 19, Loss: 1.4949, 

In [600]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'p': 0.8568164772833287}, best acc: 0.9558


In [601]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc = run_validation(best_hyperparam_model, valid_hpo_loader)
print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8

Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9558


In [602]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc = run_validation(best_hyperparam_model, test_loader)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9588


**5. HPO 성능 비교**

In [603]:
from optuna.visualization import plot_optimization_history

In [628]:
# Adam Optimizer HPO 추이

fig = plot_optimization_history(study_adam)
fig.update_layout(yaxis=dict(range=[0.93, 0.97]),
                  width=1100,
                  height=700,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

In [629]:
# AdamW Optimizer HPO 추이

fig = plot_optimization_history(study_adamw)
fig.update_layout(yaxis=dict(range=[0.93, 0.97]),
                  width=1100,
                  height=700,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

In [630]:
# AdaDelta Optimizer HPO 추이

fig = plot_optimization_history(study_adadelta)
fig.update_layout(yaxis=dict(range=[0.93, 0.97]),
                  width=1100,
                  height=700,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

**6. 각 Optimizer 별 각 Hyper-param 값에 따른 성능 분포**

In [656]:
# 각 하이퍼파라미터 값 별 성능 추이 표시 함수

# args :
# - trials_df       : study.trials_dataframe() 으로 가져온 Optuna study 의 DataFrame
# - optimizer_name  : Optimizer 이름
# - hyperparam_name : 하이퍼파라미터 이름

def plot_acc_per_hyperparam(trials_df, hyperparam_name, optimizer_name, remove_outlier=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=trials_df[f'params_{hyperparam_name}'],
                            y=100.0 * trials_df['value'],
                            mode='markers',
                            marker={'size': 5.5, 'color': '#049'}))

    chart_title = f'Accuracy Distribution ({hyperparam_name} of {optimizer_name})'
    if remove_outlier:
        chart_title += ' (w/o outliers)'

    fig.update_layout(width=700,
                      height=550,
                      title=chart_title,
                      xaxis_title=hyperparam_name,
                      yaxis_title='Accuracy (%)')

    if remove_outlier:
        fig.update_layout(yaxis=dict(range=[93, 97]))

    fig.show()

In [657]:
# 하이퍼파라미터 목록

hp_sets = {
    'Adam': {'study': study_adam,
             'hps': ['learning_rate', 'beta1', 'beta2']},
    'AdamW': {'study': study_adamw,
              'hps': ['learning_rate', 'beta1', 'beta2', 'weight_decay']},
    'AdaDelta': {'study': study_adadelta,
                 'hps': ['p']}
}

In [658]:
for optimizer_name in hp_sets.keys():
    study_for_opt = hp_sets[optimizer_name]['study']

    for hp in hp_sets[optimizer_name]['hps']:
        for remove_outlier in [False, True]:
            plot_acc_per_hyperparam(trials_df=study_for_opt.trials_dataframe(),
                                    hyperparam_name=hp,
                                    optimizer_name=optimizer_name,
                                    remove_outlier=remove_outlier)