In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary



In [23]:
np.printoptions(linewidth=160)

<contextlib._GeneratorContextManager at 0x7fe652147410>

In [24]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250304

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

**1. 데이터셋 로딩 및 데이터 분석**

In [25]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


In [26]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 9000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [27]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([ 868, 1038,  907,  883,  884,  808,  890,  934,  878,  910])


In [28]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,868,9.644445
1,1,1038,11.533333
2,2,907,10.077778
3,3,883,9.811111
4,4,884,9.822222
5,5,808,8.977778
6,6,890,9.888889
7,7,934,10.377778
8,8,878,9.755556
9,9,910,10.111111


In [29]:
# CNN 모델 정의

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 96, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Sequential(
            nn.Conv2d(96, 128, kernel_size=3),
            nn.ReLU()
        )
        self.conv5 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(128 * 4 * 4, 1024),
            nn.Sigmoid()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(1024, 256),
            nn.Sigmoid()
        )
        self.fc3 = nn.Sequential(
            nn.Linear(256, 64),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, 10),
            nn.Softmax()  # Classification Task 의 Output Layer 이므로 Softmax 고정
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.pool2(x)
        x = self.conv4(x)
        x = self.conv5(x)

        x = x.view(-1, 128 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc_final(x)

        return x

In [30]:
# 모델 구조 출력

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─ReLU: 2-2                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-3                       [32, 64, 12, 12]          18,496
│    └─ReLU: 2-4                         [32, 64, 12, 12]          --
├─Sequential: 1-4                        [32, 96, 12, 12]          --
│    └─Conv2d: 2-5                       [32, 96, 12, 12]          55,392
│    └─ReLU: 2-6                         [32, 96, 12, 12]          --
├─MaxPool2d: 1-5                         [32, 96, 6, 6]            --
├─Sequential: 1-6                        [32, 128, 4, 4]           --
│    └

  return inner()


**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data (epoch) + Valid Data (하이퍼파라미터 최적화)

In [31]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 2000
num_valid_epoch = 2000
num_valid_hpo = 5000

assert NUM_TRAIN_SAMPLES == num_train + num_valid_epoch + num_valid_hpo

# 데이터셋 분리
train_dataset, valid_epoch_dataset, valid_hpo_dataset =\
    random_split(train_subset, [num_train, num_valid_epoch, num_valid_hpo])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_epoch_loader = DataLoader(valid_epoch_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_hpo_loader = DataLoader(valid_hpo_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 하이퍼파라미터 최적화 학습 실시 함수**

* 하이퍼파라미터 최적화 라이브러리는 Optuna 사용
* 하이퍼파라미터 탐색 200 회 실시
* 하이퍼파라미터 목록
  * Conv. Layer 의 Weight Initialization 방법
    * 상수로 초기화 (0.0)
    * 상수로 초기화 (0.5)
    * Gaussian Distribution
      * 관련 상수 : 표준편차
    * Xavier Initialization - 균등 분포
    * Xavier Initialization - 정규 분포
    * He Initialization - 균등 분포
    * He Initialization - 정규 분포
  * Fully-Connected Layer 의 Weight Initialization 방법
    * Conv. Layer 의 Weight Initialization 방법 과 동일한 조합
  * Learning Rate
    * 0.0005 ~ 0.01 (= 5e-4 ~ 1e-2) 범위


In [32]:
MAX_EPOCHS = 65536
TRIAL_COUNT = 200          # HPO trial count
EARLY_STOPPING_ROUNDS = 5  # epoch count for Early Stopping

In [33]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [34]:
# Optuna 설정

!pip install optuna
import optuna
import logging

optuna.logging.set_verbosity(logging.WARNING)



In [35]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [36]:
# 모델 validation 실시

# args :
# - model        : validation 할 모델
# - valid_loader : Validation Data Loader
# - during_train : 모델 학습 중이면 True, 그렇지 않으면 False

# returns :
# - val_accuracy : 모델의 validation 정확도
# - val_loss     : 모델의 validation loss

def run_validation(model, valid_loader, during_train=True):
    model.eval()
    correct, total = 0, 0
    val_loss_sum = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss_batch = nn.CrossEntropyLoss(reduction='sum')(outputs, labels)
            val_loss_sum += val_loss_batch

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Accuracy 계산
        val_accuracy = correct / total
        val_loss = val_loss_sum / total

    return val_accuracy, val_loss

In [37]:
# Weight Initialization 적용

import torch.nn.init as init

# initialize weights for each layer

# args :
# - layer      : 가중치를 초기화할 레이어
# - layer_name : 가중치를 초기화할 레이어의 이름
# - option     : {'init_method': 가중치 초기화 옵션,
#                 'gaussian_std': Gaussian Distribution 초기화 시의 표준편차,
#                 'verbose': 가중치 정보 출력 여부}

def initialize_layer_weights(layer, layer_name, option):
    init_method = option['init_method']

    if init_method == 'const_zero':
        init.constant_(layer.weight, 0.0)

    elif init_method == 'const_0.5':
        init.constant_(layer.weight, 0.5)

    elif init_method == 'gaussian':
        init.normal_(layer.weight, mean=0.0, std=option['gaussian_std'])

    elif init_method == 'xavier_uniform':
        init.xavier_uniform_(layer.weight)

    elif init_method == 'xavier_normal':
        init.xavier_normal_(layer.weight)

    elif init_method == 'he_uniform':
        init.kaiming_uniform_(layer.weight, nonlinearity='relu')

    elif init_method == 'he_normal':
        init.kaiming_normal_(layer.weight, nonlinearity='relu')

    # bias 는 무조건 0으로 초기화
    if layer.bias is not None:
        init.constant_(layer.bias, 0.0)

    # 결과 출력
    weight_list = layer.weight.data.cpu().numpy().flatten()

    verbose = option['verbose']
    if verbose:
        print(f'\ninitialized weights of layer "{layer_name}" (with "{init_method}") :')
        print(f' - first 10 values : {weight_list[:10]}')
        print(f' - count           : {len(weight_list)}')
        print(f' - mean            : {weight_list.mean():.6f}')
        print(f' - std-dev         : {weight_list.std():.6f}')
        print(f' - max             : {weight_list.max():.6f}')
        print(f' - min             : {weight_list.min():.6f}\n')


# initialize weights

# args :
# - model   : 가중치를 초기화할 모델
# - verbose : 초기화된 가중치의 표시 여부

def initialize_weights(model, verbose=False):

    # Conv. Layers (conv1, conv2, conv3, conv4, conv5)
    for name, layer in model.named_modules():
        if name in ['conv1.0', 'conv2.0', 'conv3.0', 'conv4.0', 'conv5.0']:
            option = {
                'init_method': model.winit_conv,
                'gaussian_std': model.gaussian_std_conv if model.winit_conv == 'gaussian' else None,
                'verbose': verbose
            }
            initialize_layer_weights(layer=layer, layer_name=name, option=option)

    # Fully-Connected Layers (fc1, fc2, fc3, fc_final)
    for name, layer in model.named_modules():
        if name in ['fc1.0', 'fc2.0', 'fc3.0', 'fc_final.0']:
            option = {
                'init_method': model.winit_fc,
                'gaussian_std': model.gaussian_std_fc if model.winit_fc == 'gaussian' else None,
                'verbose': verbose
            }
            initialize_layer_weights(layer=layer, layer_name=name, option=option)

In [38]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model              : 학습할 모델
# - train_loader       : Training Data Loader
# - valid_epoch_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - valid_hpo_loader   : 최종적으로 해당 하이퍼파라미터 조합에 대한 Valid Data Loader
# - verbose            : 학습 중 프로세스 출력 여부

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid accuracy 가 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid accuracy 가 가장 높았던 epoch 에서 생성된 모델
# - epochs           : 해당 학습의 총 epoch count

def run_model_common(model, train_loader, valid_epoch_loader, valid_hpo_loader,
                     verbose=False):

    train_loss_list = []        # train loss
    valid_acc_list = []         # valid accuracy
    valid_loss_list = []        # valid loss

    max_valid_acc = 0.0         # max validation accuracy
    best_valid_acc_epoch = -1   # valid accuracy 가 가장 높았던 epoch
    best_epoch_model = None     # valid accuracy 가 가장 높았던 epoch 의 모델

    # 0. Weight Initialization 적용
    initialize_weights(model, verbose=verbose)

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, train_loss_list)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc, val_loss = run_validation(model, valid_epoch_loader)
        valid_acc_list.append(epoch_acc)
        valid_loss_list.append(val_loss)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN().to(device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            epochs = epoch
            break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {val_loss:.4f}, Valid Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc, _ = run_validation(best_epoch_model,
                                    valid_epoch_loader,
                                    during_train=False)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with Epoch valid set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc, _ = run_validation(best_epoch_model,
                                  valid_hpo_loader,
                                  during_train=False)

    if verbose:
        print(f"Final Acc (with HPO valid set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model, epochs

In [39]:
print(device)

cuda


**4-1. 실험 실시**

In [40]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [41]:
trial_count = 0   # 1st ~ 20th trial 에만 학습 중 정보 출력
epoch_count = []  # 각 trial 의 epoch 횟수 리스트

def objective(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, trial_count, epoch_count

    # hyper-params
    init_list = ['const_zero', 'const_0.5', 'gaussian', 'xavier_uniform',
                 'xavier_normal', 'he_uniform', 'he_normal']
    params = {
        'winit_conv': trial.suggest_categorical('winit_conv', init_list),
        'winit_fc': trial.suggest_categorical('winit_fc', init_list),
        'learning_rate': trial.suggest_float('learning_rate', 0.0005, 0.01, log=True)
    }

    if params['winit_conv'] == 'gaussian':
        params['gaussian_std_conv'] = trial.suggest_float('gaussian_std_conv', 0.25, 4.0, log=True)

    if params['winit_fc'] == 'gaussian':
        params['gaussian_std_fc'] = trial.suggest_float('gaussian_std_fc', 0.25, 4.0, log=True)

    # define model
    model = CNN().to(device)
    model.optimizer = torch.optim.AdamW(model.parameters(),
                                        lr=params['learning_rate'])

    model.winit_conv = params['winit_conv']
    model.winit_fc = params['winit_fc']

    if params['winit_conv'] == 'gaussian':
        model.gaussian_std_conv = params['gaussian_std_conv']

    if params['winit_fc'] == 'gaussian':
        model.gaussian_std_fc = params['gaussian_std_fc']

    # run model
    final_acc, best_epoch_model, epochs = run_model_common(model,
                                                           train_loader,
                                                           valid_epoch_loader,
                                                           valid_hpo_loader,
                                                           verbose=(trial_count < 20))

    trial_count += 1
    epoch_count.append(epochs)

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN().to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())

        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Trial {trial_count}, Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [42]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=TRIAL_COUNT)


initialized weights of layer "conv1.0" (with "const_0.5") :
 - first 10 values : [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 - count           : 288
 - mean            : 0.500000
 - std-dev         : 0.000000
 - max             : 0.500000
 - min             : 0.500000


initialized weights of layer "conv2.0" (with "const_0.5") :
 - first 10 values : [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 - count           : 18432
 - mean            : 0.500000
 - std-dev         : 0.000000
 - max             : 0.500000
 - min             : 0.500000


initialized weights of layer "conv3.0" (with "const_0.5") :
 - first 10 values : [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 - count           : 55296
 - mean            : 0.500000
 - std-dev         : 0.000000
 - max             : 0.500000
 - min             : 0.500000


initialized weights of layer "conv4.0" (with "const_0.5") :
 - first 10 values : [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 - count           : 110592
 - mean            : 0.500000
 - s

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Train Loss: 2.2923, Valid Loss: 2.2601, Valid Accuracy: 0.1955
best model updated
Epoch 2, Train Loss: 2.2525, Valid Loss: 2.2329, Valid Accuracy: 0.2240
best model updated
Epoch 3, Train Loss: 2.2223, Valid Loss: 2.2087, Valid Accuracy: 0.2465
best model updated
Epoch 4, Train Loss: 2.1978, Valid Loss: 2.1888, Valid Accuracy: 0.2620
best model updated
Epoch 5, Train Loss: 2.1768, Valid Loss: 2.1593, Valid Accuracy: 0.3010
best model updated
Epoch 6, Train Loss: 2.1693, Valid Loss: 2.1504, Valid Accuracy: 0.3090
Epoch 7, Train Loss: 2.1654, Valid Loss: 2.1748, Valid Accuracy: 0.2780
best model updated
Epoch 8, Train Loss: 2.1519, Valid Loss: 2.1294, Valid Accuracy: 0.3475
Epoch 9, Train Loss: 2.1357, Valid Loss: 2.1432, Valid Accuracy: 0.3170
best model updated
Epoch 10, Train Loss: 2.1439, Valid Loss: 2.1112, Valid Accuracy: 0.3595
Epoch 11, Train Loss: 2.1185, Valid Loss: 2.1338, Valid Accuracy: 0.3235
Epoch 12, Train Loss: 2.1149, Valid Loss: 2.1128, Vali

In [180]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'winit_conv': 'xavier_normal', 'winit_fc': 'gaussian', 'learning_rate': 0.000874356747178414, 'gaussian_std_fc': 0.3178767498552513}, best acc: 0.9744


In [181]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc, _ = run_validation(best_hyperparam_model,
                                    valid_hpo_loader,
                                    during_train=False)

print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9744


In [182]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc, _ = run_validation(best_hyperparam_model,
                                  test_loader,
                                  during_train=False)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9799


**5. HPO 성능 결과 확인**

In [183]:
from optuna.visualization import plot_optimization_history

In [184]:
# HPO 추이

fig = plot_optimization_history(study)
fig.update_layout(width=1000,
                  height=650,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

In [185]:
fig.update_layout(yaxis=dict(range=[0.94, 0.98]))
fig.show()

**6. 각 Hyperparameter 값에 따른 성능 분포 확인**

In [186]:
# trial DataFrame 가져오기

trials_df = study.trials_dataframe()

In [187]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_gaussian_std_conv,params_gaussian_std_fc,params_learning_rate,params_winit_conv,params_winit_fc,state
0,0,0.3736,2025-03-04 09:45:07.787554,2025-03-04 09:45:52.856920,0 days 00:00:45.069366,,,0.001323,const_0.5,he_normal,COMPLETE
1,1,0.1144,2025-03-04 09:45:52.857072,2025-03-04 09:46:04.187154,0 days 00:00:11.330082,,,0.004876,he_uniform,xavier_normal,COMPLETE
2,2,0.9516,2025-03-04 09:46:04.187343,2025-03-04 09:46:20.539594,0 days 00:00:16.352251,,,0.000814,he_uniform,xavier_uniform,COMPLETE
3,3,0.1144,2025-03-04 09:46:20.539725,2025-03-04 09:46:29.748873,0 days 00:00:09.209148,0.516163,,0.004755,gaussian,xavier_uniform,COMPLETE
4,4,0.9600,2025-03-04 09:46:29.749017,2025-03-04 09:47:18.153014,0 days 00:00:48.403997,,,0.000655,xavier_normal,he_normal,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...
195,195,0.9706,2025-03-04 11:01:11.549899,2025-03-04 11:01:31.262117,0 days 00:00:19.712218,,0.518161,0.000850,xavier_normal,gaussian,COMPLETE
196,196,0.9640,2025-03-04 11:01:31.262414,2025-03-04 11:01:51.755090,0 days 00:00:20.492676,,,0.000707,xavier_normal,xavier_normal,COMPLETE
197,197,0.9720,2025-03-04 11:01:51.755286,2025-03-04 11:02:19.048293,0 days 00:00:27.293007,,0.365124,0.000743,xavier_normal,gaussian,COMPLETE
198,198,0.1144,2025-03-04 11:02:19.048564,2025-03-04 11:02:27.021382,0 days 00:00:07.972818,,,0.000961,xavier_normal,const_0.5,COMPLETE


In [188]:
# epoch count 를 trial DataFrame 에 추가

print(list(epoch_count))

[25, 7, 11, 5, 37, 17, 19, 7, 5, 21, 6, 5, 31, 20, 37, 5, 23, 21, 13, 33, 8, 5, 20, 11, 17, 18, 6, 17, 6, 5, 5, 17, 16, 5, 5, 23, 12, 14, 23, 21, 30, 9, 26, 19, 15, 20, 27, 12, 20, 17, 5, 28, 10, 17, 16, 25, 26, 28, 6, 53, 7, 12, 10, 17, 6, 5, 17, 31, 24, 27, 5, 22, 13, 27, 5, 5, 23, 33, 29, 24, 21, 20, 12, 19, 10, 19, 14, 28, 21, 17, 5, 21, 28, 13, 22, 23, 8, 5, 34, 11, 7, 22, 12, 26, 13, 29, 23, 26, 28, 17, 25, 37, 33, 29, 13, 25, 18, 19, 14, 20, 19, 23, 28, 17, 29, 15, 24, 14, 25, 5, 23, 18, 19, 24, 21, 29, 22, 18, 31, 6, 19, 21, 17, 22, 21, 32, 14, 7, 21, 19, 6, 16, 16, 24, 20, 12, 23, 29, 14, 17, 17, 18, 25, 19, 32, 15, 5, 18, 15, 17, 15, 25, 16, 19, 6, 21, 24, 22, 23, 23, 17, 13, 13, 13, 14, 19, 11, 42, 15, 19, 21, 21, 18, 20, 27, 15, 16, 22, 5, 14]


In [189]:
trials_df['epoch_count'] = epoch_count

In [190]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_gaussian_std_conv,params_gaussian_std_fc,params_learning_rate,params_winit_conv,params_winit_fc,state,epoch_count
0,0,0.3736,2025-03-04 09:45:07.787554,2025-03-04 09:45:52.856920,0 days 00:00:45.069366,,,0.001323,const_0.5,he_normal,COMPLETE,25
1,1,0.1144,2025-03-04 09:45:52.857072,2025-03-04 09:46:04.187154,0 days 00:00:11.330082,,,0.004876,he_uniform,xavier_normal,COMPLETE,7
2,2,0.9516,2025-03-04 09:46:04.187343,2025-03-04 09:46:20.539594,0 days 00:00:16.352251,,,0.000814,he_uniform,xavier_uniform,COMPLETE,11
3,3,0.1144,2025-03-04 09:46:20.539725,2025-03-04 09:46:29.748873,0 days 00:00:09.209148,0.516163,,0.004755,gaussian,xavier_uniform,COMPLETE,5
4,4,0.9600,2025-03-04 09:46:29.749017,2025-03-04 09:47:18.153014,0 days 00:00:48.403997,,,0.000655,xavier_normal,he_normal,COMPLETE,37
...,...,...,...,...,...,...,...,...,...,...,...,...
195,195,0.9706,2025-03-04 11:01:11.549899,2025-03-04 11:01:31.262117,0 days 00:00:19.712218,,0.518161,0.000850,xavier_normal,gaussian,COMPLETE,15
196,196,0.9640,2025-03-04 11:01:31.262414,2025-03-04 11:01:51.755090,0 days 00:00:20.492676,,,0.000707,xavier_normal,xavier_normal,COMPLETE,16
197,197,0.9720,2025-03-04 11:01:51.755286,2025-03-04 11:02:19.048293,0 days 00:00:27.293007,,0.365124,0.000743,xavier_normal,gaussian,COMPLETE,22
198,198,0.1144,2025-03-04 11:02:19.048564,2025-03-04 11:02:27.021382,0 days 00:00:07.972818,,,0.000961,xavier_normal,const_0.5,COMPLETE,5


In [191]:
# 범례 알파벳순 정렬 & 범례에 표시할 Color Map 지정 (비슷한 method 는 비슷한 색으로)

color_map = {'const_zero': '#F70', 'const_0.5': '#FA0',
             'gaussian': '#C6F',
             'he_uniform': '#07D', 'he_normal': '#6CF',
             'xavier_uniform': '#6A0', 'xavier_normal': '#AE5'}

category_orders = color_map.keys()

In [192]:
# 1. Conv. Layer 초기화 방법 & Fully-Connected Layer 초기화 방법에 따른 최고 정확도

import plotly.express as px

count_data = trials_df.groupby(by=['params_winit_conv', 'params_winit_fc'], as_index=False)['value'].max()

fig = px.bar(count_data,
             x='params_winit_conv', y='value',
             color='params_winit_fc',
             color_discrete_map=color_map,
             category_orders={'params_winit_fc': category_orders},
             barmode='group',
             title='Max Accuracy by Weight init method of Conv. and F.C. layers')

fig.update_layout(width=1100, height=550,
                  xaxis_title='Conv Layer init method',
                  legend_title_text='FC Layer init method',
                  legend_traceorder='normal')
fig.show()

In [193]:
fig.update_layout(yaxis=dict(range=[0.85, 0.98]))
fig.show()

In [194]:
# 2. Conv. Layer 초기화 방법 별 정확도 분포

fig = px.scatter(trials_df,
                 x="params_learning_rate",
                 y="value",
                 color="params_winit_conv",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_fc': category_orders},
                 title="Accuracy Distribution by Conv. Layer init method")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Learning Rate',
                  yaxis_title='Accuracy',
                  legend_title='Conv. Layer init method')

fig.show()

In [195]:
fig.update_layout(xaxis=dict(range=[0.00045, 0.0015]),
                  yaxis=dict(range=[0.94, 0.98]))
fig.show()

In [196]:
# 3. Fully-Connected Layer 초기화 방법 별 정확도 분포

fig = px.scatter(trials_df,
                 x="params_learning_rate",
                 y="value",
                 color="params_winit_fc",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_fc': category_orders},
                 title="Accuracy Distribution by F.C. Layer init method")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Learning Rate',
                  yaxis_title='Accuracy',
                  legend_title='F.C. Layer init method')

fig.show()

In [197]:
fig.update_layout(xaxis=dict(range=[0.00045, 0.0015]),
                  yaxis=dict(range=[0.94, 0.98]))
fig.show()

In [198]:
# 4. Conv. Layer 초기화 방법 별 epoch 수 (대략적 학습 시간) 분포

fig = px.scatter(trials_df,
                 x="params_learning_rate",
                 y="epoch_count",
                 color="params_winit_conv",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_conv': category_orders},
                 title="Epoch Count Distribution by Conv. Layer init method")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Learning Rate',
                  yaxis_title='Epoch Count',
                  legend_title='Conv. Layer init method')

fig.show()

In [199]:
fig.update_layout(xaxis=dict(range=[0.00045, 0.002]))
fig.show()

In [200]:
# 5. Fully-Connected Layer 초기화 방법 별 epoch 수 (대략적 학습 시간) 분포

fig = px.scatter(trials_df,
                 x="params_learning_rate",
                 y="epoch_count",
                 color="params_winit_fc",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_fc': category_orders},
                 title="Epoch Count Distribution by F.C. Layer init method")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Learning Rate',
                  yaxis_title='Epoch Count',
                  legend_title='F.C. Layer init method')

fig.show()

In [201]:
fig.update_layout(xaxis=dict(range=[0.00045, 0.002]))
fig.show()

In [202]:
# 6. Conv. Layer weight 를 Gaussian Distribution 으로 초기화 시, 표준편차에 따른 정확도 분포

fig = px.scatter(trials_df,
                 x="params_gaussian_std_conv",
                 y="value",
                 color="params_winit_fc",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_fc': category_orders},
                 title="Accuracy Distribution by Std-dev of Conv. Layer Gaussian Distribution")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Std-dev of Conv. Layer Gaussian Distribution',
                  yaxis_title='Accuracy',
                  legend_title='F.C. Layer init method')

fig.show()

In [203]:
# 7. Fully-Connected Layer weight 를 Gaussian Distribution 으로 초기화 시, 표준편차에 따른 정확도 분포

fig = px.scatter(trials_df,
                 x="params_gaussian_std_fc",
                 y="value",
                 color="params_winit_conv",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_conv': category_orders},
                 title="Accuracy Distribution by Std-dev of F.C. Layer Gaussian Distribution")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Std-dev of F.C. Layer Gaussian Distribution',
                  yaxis_title='Accuracy',
                  legend_title='Conv. Layer init method')

fig.show()

In [204]:
fig.update_layout(xaxis=dict(range=[0.24, 0.6]),
                  yaxis=dict(range=[0.94, 0.975]))
fig.show()

In [205]:
# 8. Conv. Layer weight 를 Gaussian Distribution 으로 초기화 시, 표준편차에 따른 epoch 수 (대략적 학습 시간) 분포

fig = px.scatter(trials_df,
                 x="params_gaussian_std_conv",
                 y="epoch_count",
                 color="params_winit_fc",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_fc': category_orders},
                 title="Epoch Count Distribution by Std-dev of Conv. Layer Gaussian Distribution")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Std-dev of Conv. Layer Gaussian Distribution',
                  yaxis_title='Epoch Count',
                  legend_title='F.C. Layer init method')

fig.show()

In [206]:
# 9. Fully-Connected Layer weight 를 Gaussian Distribution 으로 초기화 시, 표준편차에 따른 epoch 수 (대략적 학습 시간) 분포

fig = px.scatter(trials_df,
                 x="params_gaussian_std_fc",
                 y="epoch_count",
                 color="params_winit_conv",
                 color_discrete_map=color_map,
                 category_orders={'params_winit_conv': category_orders},
                 title="Epoch Count Distribution by Std-dev of F.C. Layer Gaussian Distribution")

fig.update_layout(width=1000, height=600,
                  xaxis_title='Std-dev of F.C. Layer Gaussian Distribution',
                  yaxis_title='Epoch Count',
                  legend_title='Conv. Layer init method')

fig.show()

In [207]:
fig.update_layout(xaxis=dict(range=[0.24, 1.0]))
fig.show()