In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary



In [69]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250302

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

**1. 데이터셋 로딩 및 데이터 분석**

In [70]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


In [71]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 8000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [72]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([721, 915, 793, 872, 778, 738, 795, 849, 770, 769])


In [73]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,721,9.0125
1,1,915,11.437501
2,2,793,9.9125
3,3,872,10.900001
4,4,778,9.725
5,5,738,9.225
6,6,795,9.9375
7,7,849,10.612501
8,8,770,9.625
9,9,769,9.6125


In [74]:
# 테스트 데이터
test_labels = test_loader.dataset.targets
test_class_counts = torch.bincount(test_labels)
print(test_class_counts)

tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [75]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


**2. CNN 모델 정의**

In [76]:
# CNN 모델 정의

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(64 * 4 * 4, 64),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, 10),
            nn.Softmax()  # Classification Task 의 Output Layer 이므로 Softmax 고정
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 64 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [77]:
# 모델 구조 출력

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─ReLU: 2-2                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-3                       [32, 64, 12, 12]          18,496
│    └─ReLU: 2-4                         [32, 64, 12, 12]          --
├─MaxPool2d: 1-4                         [32, 64, 6, 6]            --
├─Sequential: 1-5                        [32, 64, 4, 4]            --
│    └─Conv2d: 2-5                       [32, 64, 4, 4]            36,928
│    └─ReLU: 2-6                         [32, 64, 4, 4]            --
├─Sequential: 1-6                        [32, 64]                  --
│    └

  return inner()


**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data

In [78]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 3000
num_valid = 5000

assert NUM_TRAIN_SAMPLES == num_train + num_valid

# 데이터셋 분리
train_dataset, valid_dataset =\
    random_split(train_subset, [num_train, num_valid])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 학습 실시 함수**

In [79]:
MAX_EPOCHS = 65536
EARLY_STOPPING_ROUNDS = 5  # Early Stopping Patience (epochs)

In [80]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [81]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [82]:
# 모델 validation 실시

# args :
# - model        : validation 할 모델
# - valid_loader : Validation Data Loader

# returns :
# - accuracy : 모델의 validation 정확도

def run_validation(model, valid_loader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # validation 실시 및 정확도 측정
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

In [83]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model        : 학습할 모델
# - train_loader : Training Data Loader
# - valid_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - test_loader  : 최종적으로 성능을 평가할 Test Data Loader
# - verbose      : 학습 중 프로세스 출력 여부 (0, 1, 2)

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid_acc 이 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid_acc 이 가장 높았던 epoch 에서 생성된 모델

def run_model_common(model, train_loader, valid_loader, test_loader, verbose=1):

    train_loss_list = []       # train loss
    valid_acc_list = []        # valid accuracy
    max_valid_acc = 0.0        # max validation accuracy
    best_valid_acc_epoch = -1  # valid_acc 이 가장 높았던 epoch
    best_epoch_model = None    # valid_acc 이 가장 높았던 epoch 의 모델

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, train_loss_list)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc = run_validation(model, valid_loader)
        valid_acc_list.append(epoch_acc)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN().to(device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose == 2:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            total_epochs = epoch
            break

        # 1-4. 결과 출력
        if verbose == 2 or (verbose == 1 and epoch % 50 == 49):
            print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc = run_validation(best_epoch_model, valid_loader)

    if verbose == 2:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with VALID set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc = run_validation(best_epoch_model, test_loader)

    if verbose == 2:
        print(f"Final Acc (with TEST set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model, total_epochs

In [84]:
print(device)

cuda


**4-1. 실험 실시**

In [85]:
best_acc = 0       # 모든 조합의 Test dataset 정확도 중 가장 높은 것
best_set = None    # Valid dataset 정확도가 가장 높은 조합
best_model = None  # 정확도가 가장 높은 best 조합으로 학습된 모델

In [86]:
accuracy_log = []  # 정확도 기록
time_log     = []  # 학습 전체 수행 시간 기록
epochs_log   = []  # epoch 횟수

In [87]:
import time

In [88]:
verbose = 2  # 1st trial 에만 학습 중 상세 정보 출력

# 각 learning rate 로 실험 진행 -> Test dataset Accuracy 반환 함수

def run_experiment(learning_rate):
    global best_acc, best_set, best_model, verbose
    global accuracy_log, time_log, epochs_log

    # time check start
    start = time.time()

    # define and run model
    model = CNN().to(device)
    model.optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    final_acc, best_epoch_model, total_epochs = run_model_common(model,
                                                                 train_loader,
                                                                 valid_loader,
                                                                 test_loader,
                                                                 verbose=verbose)

    verbose = 1

    # global best model 갱신
    if final_acc > best_acc:
        best_acc = final_acc
        best_set = {'learning_rate': learning_rate}

        best_model = CNN().to(device)
        best_model.load_state_dict(best_epoch_model.state_dict())

        print(f'best settings updated with Accuracy={best_acc:.4f}')

    settings = {'learning_rate': learning_rate}
    print(f"Settings: {settings}, Accuracy: {final_acc:.4f}, Total Epochs: {total_epochs}")

    # 기록 및 반환
    experiment_time = time.time() - start

    accuracy_log.append(final_acc)
    time_log.append(experiment_time)
    epochs_log.append(total_epochs)
    return final_acc

In [89]:
# 실험 실시

learning_rates = [pow(10, (-0.1) * x) for x in range(60, 14, -1)]

for lr in learning_rates:
    run_experiment(lr)

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Loss: 2.3029, Accuracy: 0.1094
Epoch 2, Loss: 2.3029, Accuracy: 0.1094
Epoch 3, Loss: 2.3028, Accuracy: 0.1094
Epoch 4, Loss: 2.3028, Accuracy: 0.1094
Epoch 5, Loss: 2.3027, Accuracy: 0.1094
Best Epoch: 0, Best Valid Acc: 0.1094
Valid Acc (with VALID set) on Loaded Best Model: 0.1094
Final Acc (with TEST set) on Loaded Best Model: 0.1028
best settings updated with Accuracy=0.1028
Settings: {'learning_rate': 1e-06}, Accuracy: 0.1028, Total Epochs: 5
Settings: {'learning_rate': 1.2589254117941661e-06}, Accuracy: 0.0958, Total Epochs: 5
Settings: {'learning_rate': 1.584893192461111e-06}, Accuracy: 0.1009, Total Epochs: 5
Settings: {'learning_rate': 1.9952623149688787e-06}, Accuracy: 0.1009, Total Epochs: 5
Settings: {'learning_rate': 2.511886431509577e-06}, Accuracy: 0.1010, Total Epochs: 5
Settings: {'learning_rate': 3.162277660168379e-06}, Accuracy: 0.0980, Total Epochs: 5
best settings updated with Accuracy=0.1671
Settings: {'learning_rate': 3.98107170553496

**5. 성능 결과 및 학습 시간 확인**

In [115]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 그래프로 표시 (1. Learning Rate & Time)

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=learning_rates,
                         y=[100.0 * acc for acc in accuracy_log],
                         mode='lines',
                         name='Accuracy'))

fig.add_trace(go.Scatter(x=learning_rates,
                         y=time_log,
                         mode='lines',
                         name='Time'),
              secondary_y=True)

fig.update_layout(width=1000,
                  height=600,
                  title='Accuracy and Time by Learning Rate',
                  xaxis_title='Learning Rate')

fig.update_xaxes(type="log")
fig.update_yaxes(title_text="<b>Accuracy</b> (%)",
                 range=[0.0, 100.0],
                 dtick=10,
                 secondary_y=False)

fig.update_yaxes(title_text="<b>Time</b> (sec.)",
                 range=[0.0, 400.0],
                 dtick=40,
                 secondary_y=True)

fig.show()

In [116]:
fig.update_yaxes(range=[84, 99], dtick=2, secondary_y=False)
fig.update_yaxes(range=[0.0, 375.0], dtick=50, secondary_y=True)

fig.show()

In [117]:
# 그래프로 표시 (2. Learning Rate & Epochs)

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=learning_rates,
                         y=[100.0 * acc for acc in accuracy_log],
                         mode='lines',
                         name='Accuracy'))

fig.add_trace(go.Scatter(x=learning_rates,
                         y=epochs_log,
                         mode='lines',
                         name='Epochs'),
              secondary_y=True)

fig.update_layout(width=1000,
                  height=600,
                  title='Accuracy and Epoch Count by Learning Rate',
                  xaxis_title='Learning Rate')

fig.update_xaxes(type="log")
fig.update_yaxes(title_text="<b>Accuracy</b> (%)",
                 range=[0.0, 100.0],
                 dtick=10,
                 secondary_y=False)

fig.update_yaxes(title_text="<b>Epoch</b> Count",
                 range=[0.0, 200.0],
                 dtick=20,
                 secondary_y=True)

fig.show()

In [118]:
fig.update_yaxes(range=[84, 99], dtick=2, secondary_y=False)
fig.update_yaxes(range=[0.0, 187.5], dtick=25, secondary_y=True)

fig.show()