In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250302

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

**1. 데이터셋 로딩 및 데이터 분석**

In [3]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 15.8MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 472kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.48MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 6.50MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [4]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 8000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [5]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([721, 915, 793, 872, 778, 738, 795, 849, 770, 769])


In [6]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,721,9.0125
1,1,915,11.437501
2,2,793,9.9125
3,3,872,10.900001
4,4,778,9.725
5,5,738,9.225
6,6,795,9.9375
7,7,849,10.612501
8,8,770,9.625
9,9,769,9.6125


In [7]:
# 테스트 데이터
test_labels = test_loader.dataset.targets
test_class_counts = torch.bincount(test_labels)
print(test_class_counts)

tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [8]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


In [9]:
# CNN 모델 정의

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(512 * 4 * 4, 512),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(512, 10),
            nn.Softmax()  # Classification Task 의 Output Layer 이므로 Softmax 고정
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 512 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [10]:
# 모델 구조 출력

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 128, 28, 28]         --
│    └─Conv2d: 2-1                       [32, 128, 28, 28]         1,280
│    └─ReLU: 2-2                         [32, 128, 28, 28]         --
├─MaxPool2d: 1-2                         [32, 128, 14, 14]         --
├─Sequential: 1-3                        [32, 256, 12, 12]         --
│    └─Conv2d: 2-3                       [32, 256, 12, 12]         295,168
│    └─ReLU: 2-4                         [32, 256, 12, 12]         --
├─MaxPool2d: 1-4                         [32, 256, 6, 6]           --
├─Sequential: 1-5                        [32, 512, 4, 4]           --
│    └─Conv2d: 2-5                       [32, 512, 4, 4]           1,180,160
│    └─ReLU: 2-6                         [32, 512, 4, 4]           --
├─Sequential: 1-6                        [32, 512]                 --


  return inner()


**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data (epoch) + Valid Data (하이퍼파라미터 최적화)

In [11]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 1000
num_valid_epoch = 2000
num_valid_hpo = 5000

assert NUM_TRAIN_SAMPLES == num_train + num_valid_epoch + num_valid_hpo

# 데이터셋 분리
train_dataset, valid_epoch_dataset, valid_hpo_dataset =\
    random_split(train_subset, [num_train, num_valid_epoch, num_valid_hpo])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_epoch_loader = DataLoader(valid_epoch_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_hpo_loader = DataLoader(valid_hpo_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 하이퍼파라미터 최적화 학습 실시 함수**

* 하이퍼파라미터 최적화 라이브러리는 Optuna 사용
* 하이퍼파라미터 탐색 100 회 실시
* 하이퍼파라미터 목록
  * Early Stopping 기준
    * Valid Data Accuracy
    * Valid Data Loss
  * Early Stopping 횟수
    * 3 ~ 20 범위의 자연수
  * Learning Rate
    * 0.0005 ~ 0.01 (= 5e-4 ~ 1e-2) 범위


In [12]:
MAX_EPOCHS = 65536
TRIAL_COUNT = 100  # HPO trial count

In [13]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [14]:
# Optuna 설정

!pip install optuna
import optuna
import logging

optuna.logging.set_verbosity(logging.WARNING)

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [15]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [16]:
# 모델 validation 실시

# args :
# - model        : validation 할 모델
# - valid_loader : Validation Data Loader
# - during_train : 모델 학습 중이면 True, 그렇지 않으면 False

# returns :
# - val_accuracy : 모델의 validation 정확도
# - val_loss     : 모델의 validation loss

def run_validation(model, valid_loader, during_train=True):
    model.eval()
    correct, total = 0, 0
    val_loss_sum = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss_batch = nn.CrossEntropyLoss(reduction='sum')(outputs, labels)
            val_loss_sum += val_loss_batch

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Accuracy 계산
        val_accuracy = correct / total
        val_loss = val_loss_sum / total

    return val_accuracy, val_loss

In [17]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model              : 학습할 모델
# - train_loader       : Training Data Loader
# - valid_epoch_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - valid_hpo_loader   : 최종적으로 해당 하이퍼파라미터 조합에 대한 Valid Data Loader
# - verbose            : 학습 중 프로세스 출력 여부

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid accuracy 가 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid accuracy 가 가장 높았던 epoch 에서 생성된 모델
# - epochs           : 해당 학습의 총 epoch count

def run_model_common(model, train_loader, valid_epoch_loader, valid_hpo_loader,
                     verbose=False):

    train_loss_list = []        # train loss
    valid_acc_list = []         # valid accuracy
    valid_loss_list = []        # valid loss

    max_valid_acc = 0.0         # max validation accuracy
    min_valid_loss = None       # min validation loss

    best_valid_acc_epoch = -1   # valid accuracy 가 가장 높았던 epoch
    best_valid_loss_epoch = -1  # valid loss 가 가장 낮았던 epoch
    best_epoch_model = None     # valid accuracy 가 가장 높았던 epoch 의 모델

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, train_loss_list)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc, val_loss = run_validation(model, valid_epoch_loader)
        valid_acc_list.append(epoch_acc)
        valid_loss_list.append(val_loss)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        # round 처리 이유: loss 의 미세한 개선의 반복으로 학습 과다 지연 방지
        if min_valid_loss is None or round(float(val_loss), 4) < min_valid_loss:
            min_valid_loss = round(float(val_loss), 4)
            best_valid_loss_epoch = epoch

        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN().to(device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        # Early Stopping type 에 따른 학습 종료
        if model.early_stopping_type == 'val_acc':
            if epoch - best_valid_acc_epoch >= model.early_stopping_rounds:
                epochs = epoch
                break

        if model.early_stopping_type == 'val_loss':
            if epoch - best_valid_loss_epoch >= model.early_stopping_rounds:
                epochs = epoch
                break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {val_loss:.4f}, Valid Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc, _ = run_validation(best_epoch_model,
                                    valid_epoch_loader,
                                    during_train=False)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with Epoch valid set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc, _ = run_validation(best_epoch_model,
                                  valid_hpo_loader,
                                  during_train=False)

    if verbose:
        print(f"Final Acc (with HPO valid set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model, epochs

In [18]:
print(device)

cuda


**4-1. 실험 실시**

In [19]:
hpo_best_acc = 0              # 모든 Hyper-param 조합의 HPO Valid set 정확도 중 가장 높은 것
best_hyperparam_set = None    # HPO Valid set 정확도가 가장 높은 Hyper-param 조합
best_hyperparam_model = None  # best_hyperparam_set 의 Hyper-param 조합으로 학습된 모델

In [20]:
trial_count = 0   # 1st ~ 10th trial 에만 학습 중 정보 출력
epoch_count = []  # 각 trial 의 epoch 횟수 리스트

def objective(trial):
    global hpo_best_acc, best_hyperparam_set, best_hyperparam_model, trial_count, epoch_count

    # hyper-params
    params = {
        'early_stopping_type': trial.suggest_categorical('early_stopping_type', ['val_acc', 'val_loss']),
        'early_stopping_rounds': trial.suggest_int('early_stopping_rounds', 3, 20),
        'learning_rate': trial.suggest_float('learning_rate', 0.0005, 0.01, log=True)
    }

    # define and run model
    model = CNN().to(device)
    model.optimizer = torch.optim.AdamW(model.parameters(),
                                        lr=params['learning_rate'])
    model.early_stopping_type = params['early_stopping_type']
    model.early_stopping_rounds = params['early_stopping_rounds']

    final_acc, best_epoch_model, epochs = run_model_common(model,
                                                           train_loader,
                                                           valid_epoch_loader,
                                                           valid_hpo_loader,
                                                           verbose=(trial_count < 10))

    trial_count += 1
    epoch_count.append(epochs)

    # global best model 갱신
    if final_acc > hpo_best_acc:
        hpo_best_acc = final_acc
        best_hyperparam_set = params

        best_hyperparam_model = CNN().to(device)
        best_hyperparam_model.load_state_dict(best_epoch_model.state_dict())

        print(f'best_hyperparam_model updated with Accuracy={hpo_best_acc:.4f}')

    print(f"Params: {params}, Accuracy: {final_acc:.4f}")
    return final_acc

In [21]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=TRIAL_COUNT)

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Train Loss: 2.0223, Valid Loss: 1.8260, Valid Accuracy: 0.6570
best model updated
Epoch 2, Train Loss: 1.7627, Valid Loss: 1.7449, Valid Accuracy: 0.7400
best model updated
Epoch 3, Train Loss: 1.6697, Valid Loss: 1.6599, Valid Accuracy: 0.8310
best model updated
Epoch 4, Train Loss: 1.6016, Valid Loss: 1.5998, Valid Accuracy: 0.8975
best model updated
Epoch 5, Train Loss: 1.5365, Valid Loss: 1.5598, Valid Accuracy: 0.9270
best model updated
Epoch 6, Train Loss: 1.5105, Valid Loss: 1.5322, Valid Accuracy: 0.9475
Epoch 7, Train Loss: 1.4978, Valid Loss: 1.5364, Valid Accuracy: 0.9420
best model updated
Epoch 8, Train Loss: 1.4886, Valid Loss: 1.5229, Valid Accuracy: 0.9535
Epoch 9, Train Loss: 1.4767, Valid Loss: 1.5232, Valid Accuracy: 0.9495
best model updated
Epoch 10, Train Loss: 1.4751, Valid Loss: 1.5156, Valid Accuracy: 0.9545
best model updated
Epoch 11, Train Loss: 1.4711, Valid Loss: 1.5141, Valid Accuracy: 0.9555
best model updated
Epoch 12, Train 

In [22]:
# Test Dataset 성능 평가

print(f'best hyper-param: {best_hyperparam_set}, best acc: {hpo_best_acc}')

best hyper-param: {'early_stopping_type': 'val_loss', 'early_stopping_rounds': 13, 'learning_rate': 0.0005608064122073217}, best acc: 0.9692


In [23]:
# best_hyperparam_model 이 정상적으로 load 되었는지 최종 확인

checked_hpo_acc, _ = run_validation(best_hyperparam_model,
                                    valid_hpo_loader,
                                    during_train=False)

print(f"Valid Acc (with HPO valid set) on Best Hyper-param Model: {checked_hpo_acc}")

assert abs(hpo_best_acc - checked_hpo_acc) < 1e-8

Valid Acc (with HPO valid set) on Best Hyper-param Model: 0.9692


In [24]:
# 테스트셋에 대한 최종 정확도

hpo_final_acc, _ = run_validation(best_hyperparam_model,
                                  test_loader,
                                  during_train=False)

print(f'Final HPO Acc (with test set) : {hpo_final_acc}')

Final HPO Acc (with test set) : 0.9754


**5. HPO 성능 결과 확인**

In [25]:
from optuna.visualization import plot_optimization_history

In [26]:
# HPO 추이

fig = plot_optimization_history(study)
fig.update_layout(width=1000,
                  height=650,
                  yaxis_title='Accuracy (HPO valid set)')
fig.show()

In [27]:
fig.update_layout(yaxis=dict(range=[0.94, 0.97]))
fig.show()

**6. 각 Hyperparameter 값에 따른 성능 분포 확인**

In [28]:
# trial DataFrame 가져오기

trials_df = study.trials_dataframe()

In [29]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_early_stopping_rounds,params_early_stopping_type,params_learning_rate,state
0,0,0.9660,2025-03-03 01:44:26.308978,2025-03-03 01:47:37.340082,0 days 00:03:11.031104,13,val_loss,0.000502,COMPLETE
1,1,0.9634,2025-03-03 01:47:37.340266,2025-03-03 01:48:44.843324,0 days 00:01:07.503058,14,val_loss,0.001897,COMPLETE
2,2,0.9524,2025-03-03 01:48:44.843558,2025-03-03 01:49:03.709005,0 days 00:00:18.865447,4,val_acc,0.001722,COMPLETE
3,3,0.9622,2025-03-03 01:49:03.709143,2025-03-03 01:49:27.357717,0 days 00:00:23.648574,5,val_acc,0.001424,COMPLETE
4,4,0.9546,2025-03-03 01:49:27.357856,2025-03-03 01:50:23.186112,0 days 00:00:55.828256,8,val_loss,0.002331,COMPLETE
...,...,...,...,...,...,...,...,...,...
95,95,0.9604,2025-03-03 03:51:50.822608,2025-03-03 03:52:24.697130,0 days 00:00:33.874522,12,val_loss,0.000644,COMPLETE
96,96,0.9642,2025-03-03 03:52:24.697287,2025-03-03 03:55:18.522352,0 days 00:02:53.825065,14,val_loss,0.000802,COMPLETE
97,97,0.9642,2025-03-03 03:55:18.522544,2025-03-03 03:56:01.813379,0 days 00:00:43.290835,16,val_acc,0.000740,COMPLETE
98,98,0.9638,2025-03-03 03:56:01.813543,2025-03-03 03:57:02.713486,0 days 00:01:00.899943,17,val_loss,0.000540,COMPLETE


In [30]:
# epoch count 를 trial DataFrame 에 추가

print(list(epoch_count))

[204, 78, 19, 25, 63, 13, 12, 43, 18, 21, 112, 250, 415, 180, 160, 204, 54, 12, 106, 110, 42, 128, 60, 84, 78, 213, 62, 70, 61, 45, 51, 134, 213, 63, 44, 324, 69, 5, 14, 35, 22, 61, 68, 16, 72, 29, 63, 24, 188, 17, 27, 354, 46, 144, 105, 109, 197, 51, 218, 69, 162, 163, 205, 41, 116, 125, 208, 197, 94, 20, 47, 85, 98, 36, 41, 28, 211, 83, 132, 15, 102, 92, 38, 39, 43, 97, 132, 13, 100, 47, 108, 72, 49, 60, 81, 37, 208, 49, 70, 84]


In [31]:
trials_df['epoch_count'] = epoch_count

In [32]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_early_stopping_rounds,params_early_stopping_type,params_learning_rate,state,epoch_count
0,0,0.9660,2025-03-03 01:44:26.308978,2025-03-03 01:47:37.340082,0 days 00:03:11.031104,13,val_loss,0.000502,COMPLETE,204
1,1,0.9634,2025-03-03 01:47:37.340266,2025-03-03 01:48:44.843324,0 days 00:01:07.503058,14,val_loss,0.001897,COMPLETE,78
2,2,0.9524,2025-03-03 01:48:44.843558,2025-03-03 01:49:03.709005,0 days 00:00:18.865447,4,val_acc,0.001722,COMPLETE,19
3,3,0.9622,2025-03-03 01:49:03.709143,2025-03-03 01:49:27.357717,0 days 00:00:23.648574,5,val_acc,0.001424,COMPLETE,25
4,4,0.9546,2025-03-03 01:49:27.357856,2025-03-03 01:50:23.186112,0 days 00:00:55.828256,8,val_loss,0.002331,COMPLETE,63
...,...,...,...,...,...,...,...,...,...,...
95,95,0.9604,2025-03-03 03:51:50.822608,2025-03-03 03:52:24.697130,0 days 00:00:33.874522,12,val_loss,0.000644,COMPLETE,37
96,96,0.9642,2025-03-03 03:52:24.697287,2025-03-03 03:55:18.522352,0 days 00:02:53.825065,14,val_loss,0.000802,COMPLETE,208
97,97,0.9642,2025-03-03 03:55:18.522544,2025-03-03 03:56:01.813379,0 days 00:00:43.290835,16,val_acc,0.000740,COMPLETE,49
98,98,0.9638,2025-03-03 03:56:01.813543,2025-03-03 03:57:02.713486,0 days 00:01:00.899943,17,val_loss,0.000540,COMPLETE,70


In [39]:
# Early Stopping Type 별, Learning Rate 에 따른 Accuracy 분포

import plotly.express as px

fig = px.scatter(trials_df,
                 x="params_learning_rate",
                 y="value",
                 color="params_early_stopping_type",
                 title="Accuracy Distribution by Learning Rate")

fig.update_layout(width=1000, height=600,
                  yaxis_title='Accuracy')

fig.show()

In [40]:
fig.update_layout(xaxis=dict(range=[0.00045, 0.0035]),
                  yaxis=dict(range=[0.945, 0.97]))
fig.show()

In [41]:
# Early Stopping Type 별, Early Stopping epoch 횟수에 따른 Accuracy 분포

import plotly.express as px

fig = px.scatter(trials_df,
                 x="params_early_stopping_rounds",
                 y="value",
                 color="params_early_stopping_type",
                 title="Accuracy Distribution by Early Stopping Rounds")

fig.update_layout(width=1000, height=600,
                  yaxis_title='Accuracy')

fig.show()

In [42]:
fig.update_layout(yaxis=dict(range=[0.945, 0.97]))
fig.show()

In [43]:
# Early Stopping Type 별, Early Stopping epoch 횟수에 따른 Epoch Count 분포

import plotly.express as px

fig = px.scatter(trials_df,
                 x="params_early_stopping_rounds",
                 y="epoch_count",
                 color="params_early_stopping_type",
                 color_continuous_scale=['#F53', '#DAF', '#14A'],
                 title="Epoch Count Distribution by Early Stopping Rounds")

fig.update_layout(width=1000, height=600,
                  yaxis_title='Epochs')

fig.show()