In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary



In [27]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250301

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

**1. 데이터셋 로딩 및 데이터 분석**

In [28]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


In [29]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 8000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [30]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([770, 943, 785, 858, 784, 701, 798, 819, 775, 767])


In [31]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,770,9.625
1,1,943,11.787501
2,2,785,9.8125
3,3,858,10.725001
4,4,784,9.8
5,5,701,8.7625
6,6,798,9.975
7,7,819,10.2375
8,8,775,9.6875
9,9,767,9.5875


In [32]:
# 테스트 데이터
test_labels = test_loader.dataset.targets
test_class_counts = torch.bincount(test_labels)
print(test_class_counts)

tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [33]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


**2. CNN 모델 정의**

In [34]:
# CNN 모델 정의

class CNN(nn.Module):

    # args:
    # - normalization_type : 모든 Conv. + FC Layer 에 적용할 Normalization type
    #                        - 미사용, Batch Norm, Layer Norm

    def __init__(self, normalization_type):
        super(CNN, self).__init__()
        self.normalization_type = normalization_type

        # Create Conv + ReLU layer
        def create_conv_ReLU_layer(conv2d_layer, normalization_type):
            if normalization_type == 'batch_norm':
                return nn.Sequential(
                    conv2d_layer,
                    nn.BatchNorm2d(conv2d_layer.out_channels),  # Batch Norm for Conv. Layers
                    nn.ReLU()
                )

            elif normalization_type == 'layer_norm':

                # conv2d_layer 의 모든 출력을 하나의 그룹으로 묶음 -> 레이어 정규화
                return nn.Sequential(
                    conv2d_layer,
                    nn.GroupNorm(1, conv2d_layer.out_channels),  # Layer Norm for Conv. Layers
                    nn.ReLU()
                )

            else:
                return nn.Sequential(
                    conv2d_layer,
                    nn.ReLU()
                )

        # Create Fully Connected + Sigmoid Layer
        def create_FC_sigmoid_layer(linear_layer, normalization_type):
            if normalization_type == 'batch_norm':
                return nn.Sequential(
                    linear_layer,
                    nn.BatchNorm1d(linear_layer.out_features),  # Batch Norm for Linear Layers
                    nn.Sigmoid()
                )

            elif normalization_type == 'layer_norm':
                return nn.Sequential(
                    linear_layer,
                    nn.LayerNorm(linear_layer.out_features),
                    nn.Sigmoid()
                )

            else:
                return nn.Sequential(
                    linear_layer,
                    nn.Sigmoid()
                )

        # Conv
        self.conv1 = create_conv_ReLU_layer(
            conv2d_layer=nn.Conv2d(1, 32, kernel_size=3, padding=1),
            normalization_type=normalization_type
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = create_conv_ReLU_layer(
            conv2d_layer=nn.Conv2d(32, 64, kernel_size=3),
            normalization_type=normalization_type
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = create_conv_ReLU_layer(
            conv2d_layer=nn.Conv2d(64, 64, kernel_size=3),
            normalization_type=normalization_type
        )

        # Fully Connected
        self.fc1 = create_FC_sigmoid_layer(
            linear_layer=nn.Linear(64 * 4 * 4, 64),
            normalization_type=normalization_type
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, 10),
            nn.Softmax()
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 64 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [35]:
# 모델 구조 출력 (Batch Normalization 적용 시)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN(normalization_type='batch_norm').to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─BatchNorm2d: 2-2                  [32, 32, 28, 28]          64
│    └─ReLU: 2-3                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-4                       [32, 64, 12, 12]          18,496
│    └─BatchNorm2d: 2-5                  [32, 64, 12, 12]          128
│    └─ReLU: 2-6                         [32, 64, 12, 12]          --
├─MaxPool2d: 1-4                         [32, 64, 6, 6]            --
├─Sequential: 1-5                        [32, 64, 4, 4]            --
│    └─Conv2d: 2-7                       [32, 64, 4, 4]            36,928
│    

  return inner()


In [36]:
# 모델 구조 출력 (Layer Normalization 적용 시)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN(normalization_type='layer_norm').to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─GroupNorm: 2-2                    [32, 32, 28, 28]          64
│    └─ReLU: 2-3                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-4                       [32, 64, 12, 12]          18,496
│    └─GroupNorm: 2-5                    [32, 64, 12, 12]          128
│    └─ReLU: 2-6                         [32, 64, 12, 12]          --
├─MaxPool2d: 1-4                         [32, 64, 6, 6]            --
├─Sequential: 1-5                        [32, 64, 4, 4]            --
│    └─Conv2d: 2-7                       [32, 64, 4, 4]            36,928
│    

**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data (epoch) + Valid Data (하이퍼파라미터 최적화)

In [37]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 1000
num_valid_epoch = 2000
num_valid_hpo = 5000

assert NUM_TRAIN_SAMPLES == num_train + num_valid_epoch + num_valid_hpo

# 데이터셋 분리
train_dataset, valid_epoch_dataset, valid_hpo_dataset =\
    random_split(train_subset, [num_train, num_valid_epoch, num_valid_hpo])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_epoch_loader = DataLoader(valid_epoch_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_hpo_loader = DataLoader(valid_hpo_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 하이퍼파라미터 최적화 학습 실시 함수**

* 하이퍼파라미터 최적화 라이브러리는 Optuna 사용
* 하이퍼파라미터 탐색 200 회 실시
* 하이퍼파라미터 목록
  * Conv1, Conv2, Conv3 Layer 각각의 Dropout Rate
  * 첫번째 Fully Connected Layer 의 Dropout Rate


In [38]:
MAX_EPOCHS = 65536
EARLY_STOPPING_ROUNDS = 5  # Early Stopping Patience (epochs)
TRIAL_COUNT = 200          # HPO trial count

In [39]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [40]:
# Optuna 설정

!pip install optuna
import optuna
import logging

optuna.logging.set_verbosity(logging.WARNING)



In [41]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록
# - reg_type        : Regularization 의 종류
#                     - 미사용, L1, L2, L1+L2
# - reg_lambda      : Regularization 에서 lambda 값

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list, reg_type, reg_lambda):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        CE_loss = nn.CrossEntropyLoss()(outputs, labels)

        # L1, L2 regularization 을 적용한 Loss 를 최종 Loss 로 함
        if reg_type == 'L1':
            l1_norm = sum(param.abs().sum() for param in model.parameters())
            loss = CE_loss + reg_lambda * l1_norm

        elif reg_type == 'L2':
            l2_norm = sum(param.pow(2.0).sum() for param in model.parameters())
            loss = CE_loss + reg_lambda * l2_norm

        # L1 + L2 에 대해서는 mean(L1, L2) 적용
        # - 하이퍼파라미터 최적화 시 'lambda' 값의 확률밀도함수 왜곡 방지 목적
        elif reg_type == 'L1+L2':
            l1_norm = sum(param.abs().sum() for param in model.parameters())
            l2_norm = sum(param.pow(2.0).sum() for param in model.parameters())
            loss = CE_loss + 0.5 * reg_lambda * (l1_norm + l2_norm)

        else:
            loss = CE_loss

        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [42]:
# 모델 validation 실시

# args :
# - model        : validation 할 모델
# - valid_loader : Validation Data Loader

# returns :
# - accuracy : 모델의 validation 정확도

def run_validation(model, valid_loader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # validation 실시 및 정확도 측정
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

In [43]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model              : 학습할 모델
# - train_loader       : Training Data Loader
# - valid_epoch_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - valid_hpo_loader   : 최종적으로 해당 하이퍼파라미터 조합에 대한 Valid Data Loader
# - reg_type           : Regularization 의 종류
#                        - 미사용, L1, L2, L1+L2
# - reg_lambda         : Regularization 에서 lambda 값
# - verbose            : 학습 중 프로세스 출력 여부

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid_acc 이 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid_acc 이 가장 높았던 epoch 에서 생성된 모델

def run_model_common(model, train_loader, valid_epoch_loader, valid_hpo_loader,
                     reg_type, reg_lambda, verbose=False):

    train_loss_list = []       # train loss
    valid_acc_list = []        # valid accuracy
    max_valid_acc = 0.0        # max validation accuracy
    best_valid_acc_epoch = -1  # valid_acc 이 가장 높았던 epoch
    best_epoch_model = None    # valid_acc 이 가장 높았던 epoch 의 모델

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, train_loss_list, reg_type, reg_lambda)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc = run_validation(model, valid_epoch_loader)
        valid_acc_list.append(epoch_acc)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN(normalization_type=model.normalization_type).to(device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc = run_validation(best_epoch_model, valid_epoch_loader)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with Epoch valid set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc = run_validation(best_epoch_model, valid_hpo_loader)

    if verbose:
        print(f"Final Acc (with HPO valid set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model

In [44]:
print(device)

cuda


**4-1. 실험 실시**

In [45]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [46]:
verbose_at_first_trial = True  # 1st trial 에만 학습 중 정보 출력

def objective(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, verbose_at_first_trial

    # hyper-params
    params = {
        'reg_type': trial.suggest_categorical('reg_type', ['none', 'L1', 'L2', 'L1+L2']),
        'norm_type': trial.suggest_categorical('norm_type', ['none', 'batch_norm', 'layer_norm']),
        'learning_rate': trial.suggest_float('learning_rate', 0.0005, 0.01, log=True)
    }

    params['reg_lambda'] = None
    if params['reg_type'] in ['L1', 'L2', 'L1+L2']:
        params['reg_lambda'] = trial.suggest_float('reg_lambda', 0.000001, 0.01, log=True)

    # define and run model
    model = CNN(normalization_type=params['norm_type']).to(device)
    model.optimizer = torch.optim.Adam(model.parameters(),
                                       lr=params['learning_rate'],
                                       weight_decay=0.0)

    final_acc, best_epoch_model = run_model_common(model,
                                                   train_loader,
                                                   valid_epoch_loader,
                                                   valid_hpo_loader,
                                                   reg_type=params['reg_type'],
                                                   reg_lambda=params['reg_lambda'],
                                                   verbose=verbose_at_first_trial)

    verbose_at_first_trial = False

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN(normalization_type=best_hyperparam_set['norm_type']).to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())
        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [47]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=TRIAL_COUNT)

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Loss: 2.2645, Accuracy: 0.6975
best model updated
Epoch 2, Loss: 2.1925, Accuracy: 0.7600
best model updated
Epoch 3, Loss: 2.1465, Accuracy: 0.7765
best model updated
Epoch 4, Loss: 2.0909, Accuracy: 0.7970
best model updated
Epoch 5, Loss: 2.0365, Accuracy: 0.8010
best model updated
Epoch 6, Loss: 1.9788, Accuracy: 0.8085
best model updated
Epoch 7, Loss: 1.9153, Accuracy: 0.8360
best model updated
Epoch 8, Loss: 1.8645, Accuracy: 0.8620
best model updated
Epoch 9, Loss: 1.8112, Accuracy: 0.9355
best model updated
Epoch 10, Loss: 1.7515, Accuracy: 0.9520
best model updated
Epoch 11, Loss: 1.7004, Accuracy: 0.9650
best model updated
Epoch 12, Loss: 1.6584, Accuracy: 0.9655
Epoch 13, Loss: 1.6215, Accuracy: 0.9645
best model updated
Epoch 14, Loss: 1.5920, Accuracy: 0.9660
Epoch 15, Loss: 1.5716, Accuracy: 0.9650
Epoch 16, Loss: 1.5546, Accuracy: 0.9655
Epoch 17, Loss: 1.5422, Accuracy: 0.9645
best model updated
Epoch 18, Loss: 1.5324, Accuracy: 0.9665
Epoch

In [48]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'reg_type': 'none', 'norm_type': 'batch_norm', 'learning_rate': 0.00252562467750915, 'reg_lambda': None}, best acc: 0.9794


In [49]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc = run_validation(best_hyperparam_model, valid_hpo_loader)
print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8

Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9794


In [50]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc = run_validation(best_hyperparam_model, test_loader)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9791


**5. HPO 성능 결과 확인**

In [51]:
from optuna.visualization import plot_optimization_history

In [52]:
# HPO 추이

fig = plot_optimization_history(study)
fig.update_layout(width=1100,
                  height=700,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

In [56]:
fig.update_layout(width=1100,
                  height=700,
                  yaxis_title='Accuracy (HPO valid set)',
                  yaxis=dict(range=[0.92, 0.98]))
fig.show()

**6. 각 Hyperparameter 값에 따른 성능 분포 확인**

In [57]:
# trial DataFrame 가져오기

trials_df = study.trials_dataframe()

In [58]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_learning_rate,params_norm_type,params_reg_lambda,params_reg_type,state
0,0,0.9664,2025-03-01 03:40:56.425273,2025-03-01 03:41:27.889138,0 days 00:00:31.463865,0.000593,layer_norm,0.000006,L1+L2,COMPLETE
1,1,0.5840,2025-03-01 03:41:27.889296,2025-03-01 03:41:44.609888,0 days 00:00:16.720592,0.000670,batch_norm,0.001845,L1,COMPLETE
2,2,0.9660,2025-03-01 03:41:44.610028,2025-03-01 03:42:01.958654,0 days 00:00:17.348626,0.002221,layer_norm,0.000029,L1+L2,COMPLETE
3,3,0.9690,2025-03-01 03:42:01.958804,2025-03-01 03:42:21.839548,0 days 00:00:19.880744,0.004712,layer_norm,,none,COMPLETE
4,4,0.9690,2025-03-01 03:42:21.839680,2025-03-01 03:42:42.686737,0 days 00:00:20.847057,0.003973,layer_norm,,none,COMPLETE
...,...,...,...,...,...,...,...,...,...,...
195,195,0.9770,2025-03-01 04:33:30.093287,2025-03-01 04:33:48.623970,0 days 00:00:18.530683,0.003447,batch_norm,,none,COMPLETE
196,196,0.9744,2025-03-01 04:33:48.624148,2025-03-01 04:34:00.836595,0 days 00:00:12.212447,0.003111,batch_norm,,none,COMPLETE
197,197,0.9768,2025-03-01 04:34:00.836774,2025-03-01 04:34:18.788371,0 days 00:00:17.951597,0.003772,batch_norm,,none,COMPLETE
198,198,0.9780,2025-03-01 04:34:18.788567,2025-03-01 04:34:38.095088,0 days 00:00:19.306521,0.004104,batch_norm,,none,COMPLETE


In [98]:
# Regularization 및 Normalization type

import plotly.express as px

count_data = trials_df.groupby(by=['params_norm_type', 'params_reg_type'], as_index=False)['value'].max()

fig = px.bar(count_data,
             x='params_norm_type', y='value',
             color='params_reg_type',
             barmode='group',
             title='Max Accuracy by Regularization type and Normalization type')

fig.update_layout(width=800, height=550,
                  yaxis=dict(range=[0.95, 0.98]))
fig.show()

In [99]:
# L1, L2 Regularization 의 lambda 값에 따른 정확도
# Coloring = Regularization type

fig = px.scatter(trials_df,
                 x="params_reg_lambda", y="value",
                 color="params_reg_type",
                 title="Accuracy by Lambda for L1, L2 Regularization")

fig.update_layout(width=800, height=500,
                  xaxis_title='lambda',
                  yaxis_title='accuracy')
fig.show()

In [100]:
fig.update_layout(xaxis=dict(range=[0, 0.0002]),
                  yaxis=dict(range=[0.945, 0.98]))
fig.show()

In [101]:
# L1, L2 Regularization 의 lambda 값에 따른 정확도
# Coloring = Normalization type

fig = px.scatter(trials_df,
                 x="params_reg_lambda", y="value",
                 color="params_norm_type",
                 title="Accuracy by Lambda for L1, L2 Regularization")

fig.update_layout(width=800, height=500,
                  xaxis_title='lambda',
                  yaxis_title='accuracy')
fig.show()

In [102]:
fig.update_layout(xaxis=dict(range=[0, 0.0002]),
                  yaxis=dict(range=[0.945, 0.98]))
fig.show()

In [103]:
# Learning Rate 에 따른 정확도
# Coloring = Regularization type

fig = px.scatter(trials_df,
                 x="params_learning_rate", y="value",
                 color="params_reg_type",
                 title="Accuracy by Learning Rate")

fig.update_layout(width=800, height=500,
                  xaxis_title='learning_rate',
                  yaxis_title='accuracy')
fig.show()

In [104]:
fig.update_layout(yaxis=dict(range=[0.92, 0.98]))
fig.show()

In [106]:
# Learning Rate 에 따른 정확도
# Coloring = Normalization type

fig = px.scatter(trials_df,
                 x="params_learning_rate", y="value",
                 color="params_norm_type",
                 title="Accuracy by Learning Rate")

fig.update_layout(width=800, height=500,
                  xaxis_title='lambda',
                  yaxis_title='accuracy')
fig.show()

In [109]:
fig.update_layout(yaxis=dict(range=[0.92, 0.98]))
fig.show()