In [902]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary



In [903]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20250303

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [904]:
torch.set_printoptions(linewidth=160)

**1. 데이터셋 로딩 및 데이터 분석**

In [905]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


In [906]:
# 시간 절약을 위해, 학습 데이터에서 랜덤하게 일부 샘플만 추출

from torch.utils.data import Subset, DataLoader

NUM_TRAIN_SAMPLES = 5000
BATCH_SIZE = 32

subset_indices = random.sample(range(len(train_dataset)), NUM_TRAIN_SAMPLES)
train_subset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# 테스트 데이터셋은 학습 대상이 아니므로 그대로 이용
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [907]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor([train_subset.dataset.targets[i] for i in subset_indices])
train_class_counts = torch.bincount(train_labels)
print(train_class_counts)

NUM_CLASSES = len(train_class_counts)

tensor([519, 543, 492, 499, 460, 451, 525, 553, 486, 472])


In [908]:
train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,519,10.38
1,1,543,10.86
2,2,492,9.84
3,3,499,9.98
4,4,460,9.2
5,5,451,9.02
6,6,525,10.5
7,7,553,11.06
8,8,486,9.72
9,9,472,9.44


In [909]:
# 테스트 데이터
test_labels = test_loader.dataset.targets
test_class_counts = torch.bincount(test_labels)
print(test_class_counts)

tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [910]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


**2. CNN 모델 정의**

In [911]:
# CNN 모델 정의

class CNN(nn.Module):

    def __init__(self, num_classes=10, final_activation=nn.Softmax()):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(64 * 4 * 4, 64),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, num_classes),
            final_activation
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 64 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [912]:
# 모델 구조 출력

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

print(summary(model, input_size=(BATCH_SIZE, 1, 28, 28)))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [32, 10]                  --
├─Sequential: 1-1                        [32, 32, 28, 28]          --
│    └─Conv2d: 2-1                       [32, 32, 28, 28]          320
│    └─ReLU: 2-2                         [32, 32, 28, 28]          --
├─MaxPool2d: 1-2                         [32, 32, 14, 14]          --
├─Sequential: 1-3                        [32, 64, 12, 12]          --
│    └─Conv2d: 2-3                       [32, 64, 12, 12]          18,496
│    └─ReLU: 2-4                         [32, 64, 12, 12]          --
├─MaxPool2d: 1-4                         [32, 64, 6, 6]            --
├─Sequential: 1-5                        [32, 64, 4, 4]            --
│    └─Conv2d: 2-5                       [32, 64, 4, 4]            36,928
│    └─ReLU: 2-6                         [32, 64, 4, 4]            --
├─Sequential: 1-6                        [32, 64]                  --
│    └

  return inner()


**3. 데이터셋 분리**

* Train Data -> Train Data + Valid Data

In [913]:
# 데이터셋 분리

from torch.utils.data import random_split

# 샘플 수
num_train = 2000
num_valid = 3000

assert NUM_TRAIN_SAMPLES == num_train + num_valid

# 데이터셋 분리
train_dataset, valid_dataset =\
    random_split(train_subset, [num_train, num_valid])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

**4. 학습 실시 함수**

In [914]:
MAX_EPOCHS = 65536
EARLY_STOPPING_ROUNDS = 10  # Early Stopping Patience (epochs)

In [915]:
from sklearn.metrics import accuracy_score
from copy import deepcopy

In [916]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list):
    model.train()
    train_loss = 0.0
    cnt = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        # Multi Label 일 때 처리
        if model.multi_label:
            pass

        # Single Label 일 때, Binary Cross Entropy / Mean Squared Error 일 때 output shape 변경 처리
        elif model.loss_function.__class__.__name__ in ['BCELoss', 'BCEWithLogitsLoss', 'MSELoss']:
            labels = torch.nn.functional.one_hot(labels,
                                                 num_classes=model.num_classes)
            labels = labels.float()

        # Loss 처리
        loss = model.loss_function(outputs, labels)

        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        cnt += 1

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [917]:
# 모델 validation 실시

# args :
# - model                  : validation 할 모델
# - valid_loader           : Validation Data Loader
# - print_first_batch_info : 1번째 batch 에서 output, label 을 출력할지 여부

# returns :
# - accuracy : 모델의 validation 정확도

def run_validation(model, valid_loader, print_first_batch_info=False):
    model.eval()
    correct, total = 0, 0
    print_output_and_label = True

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # 첫번째 batch 의 output 및 label 출력
            if print_first_batch_info and print_output_and_label:
                print(f'[ outputs ]\n{outputs}\n[ labels ]\n{labels}')
                print_output_and_label = False

            # validation 실시 및 정확도 측정
            total += labels.size(0)

            # For Multi Label
            # (각 sample 당 존재하는 num_classes 개의 0 ~ 1 값을 prediction 으로 간주하고,
            #  모든 sample 에 대한 전체 prediction 중 정답 prediction 의 비율로 정확도 측정)
            if model.multi_label:
                outputs_hard = torch.round(outputs)
                correct += (outputs_hard == labels).sum().item() / model.num_classes

            # For Single Label
            else:
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

In [918]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model        : 학습할 모델
# - train_loader : Training Data Loader
# - valid_loader : 각 epoch 마다 validation 할 Valid Data Loader
# - test_loader  : 최종적으로 성능을 평가할 Test Data Loader
# - verbose      : 학습 중 프로세스 출력 여부

# returns :
# - final_acc        : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid_acc 이 가장 높았던 epoch 의 모델로 측정)
# - best_epoch_model : valid_acc 이 가장 높았던 epoch 에서 생성된 모델

def run_model_common(model, train_loader, valid_loader, test_loader, verbose=False):

    train_loss_list = []       # train loss
    valid_acc_list = []        # valid accuracy
    max_valid_acc = 0.0        # max validation accuracy
    best_valid_acc_epoch = -1  # valid_acc 이 가장 높았던 epoch
    best_epoch_model = None    # valid_acc 이 가장 높았던 epoch 의 모델

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model, train_loader, train_loss_list)

        # 1-2. validate model (with EPOCH VALID SET)
        epoch_acc = run_validation(model, valid_loader)
        valid_acc_list.append(epoch_acc)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = CNN(num_classes=model.num_classes,
                                   final_activation=model.final_activation).to(device)
            best_epoch_model.multi_label = model.multi_label
            best_epoch_model.num_classes = model.num_classes

            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            total_epochs = epoch
            break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc = run_validation(best_epoch_model, valid_loader)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with VALID set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-8

    # 2. validate best-epoch model (with HPO VALID SET)
    final_acc = run_validation(model=best_epoch_model,
                               valid_loader=test_loader,
                               print_first_batch_info=True)

    if verbose:
        print(f"Final Acc (with TEST set) on Loaded Best Model: {final_acc}")

    return final_acc, best_epoch_model, total_epochs

In [919]:
print(device)

cuda


**5. 실험 실시**

In [920]:
# 실험 실시

# args:
# - loss_function    : 모델 학습에 사용할 Loss Function
# - train_loader     : Training Data Loader
# - valid_loader     : 각 epoch 마다 validation 할 Valid Data Loader
# - test_loader      : 최종적으로 성능을 평가할 Test Data Loader
# - num_classes      : 모델이 분류할 class 개수 (또는 output value 개수)
# - final_activation : 모델의 최종 Layer 의 Activation Function
# - multi_label      : Multi-Label 여부

# returns:
# - final_acc        : Test dataset Accuracy
# - best_epoch_model : Valid dataset Accuracy 가 가장 높은 모델

def run_experiment(loss_function, train_loader, valid_loader, test_loader,
                   num_classes=10, final_activation=nn.Softmax(), multi_label=False):

    # define and run model
    model = CNN(num_classes, final_activation).to(device)

    # model configurations
    model.num_classes = num_classes
    model.final_activation = final_activation
    model.loss_function = loss_function
    model.optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    model.multi_label = multi_label

    final_acc, best_epoch_model, total_epochs = run_model_common(model,
                                                                 train_loader,
                                                                 valid_loader,
                                                                 test_loader,
                                                                 verbose=True)

    print(f"Accuracy: {final_acc:.4f}, Total Epochs: {total_epochs}")

    return final_acc, best_epoch_model

In [921]:
# 실험 함수의 정상 동작 여부 확인

base_loss_function = nn.CrossEntropyLoss()
_, _ = run_experiment(base_loss_function, train_loader, valid_loader, test_loader)

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Train Loss: 2.1983, Valid Accuracy: 0.7333
best model updated
Epoch 2, Train Loss: 1.8871, Valid Accuracy: 0.8903
best model updated
Epoch 3, Train Loss: 1.6763, Valid Accuracy: 0.9247
best model updated
Epoch 4, Train Loss: 1.5817, Valid Accuracy: 0.9397
best model updated
Epoch 5, Train Loss: 1.5435, Valid Accuracy: 0.9447
best model updated
Epoch 6, Train Loss: 1.5222, Valid Accuracy: 0.9557
best model updated
Epoch 7, Train Loss: 1.5072, Valid Accuracy: 0.9597
Epoch 8, Train Loss: 1.4981, Valid Accuracy: 0.9553
Epoch 9, Train Loss: 1.4950, Valid Accuracy: 0.9590
Epoch 10, Train Loss: 1.4870, Valid Accuracy: 0.9583
best model updated
Epoch 11, Train Loss: 1.4864, Valid Accuracy: 0.9623
Epoch 12, Train Loss: 1.4814, Valid Accuracy: 0.9603
best model updated
Epoch 13, Train Loss: 1.4781, Valid Accuracy: 0.9630
Epoch 14, Train Loss: 1.4765, Valid Accuracy: 0.9540
best model updated
Epoch 15, Train Loss: 1.4765, Valid Accuracy: 0.9650
Epoch 16, Train Loss: 1.

5-1. **Binary Classification** with **Mean-Squared Error**

In [922]:
# Binary Classification 용으로 변경

from torch.utils.data import Dataset

class MNIST_bc(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.class_map = {0: 1, 3: 1, 6: 1, 8: 1, 9: 1,
                          1: 0, 2: 0, 4: 0, 5: 0, 7: 0}

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        new_label = self.class_map[label]
        return image, new_label

In [923]:
# 데이터셋 클래스 분포 분석 함수

def analyze_class_distribution(data_loader):
    targets = [data_loader.dataset[i][1] for i in range(len(data_loader.dataset))]

    class_list, class_counts = np.unique(targets, return_counts=True)
    class_percentage = np.array(class_counts) * 100.0 / sum(class_counts)

    class_distrib = pd.DataFrame({'class': class_list,
                                  'count': class_counts,
                                  'percentage (%)': class_percentage})

    return class_distrib

In [924]:
# Binary Classification 용 Data Loader

train_loader_bc = DataLoader(MNIST_bc(train_loader.dataset),
                             batch_size=BATCH_SIZE,
                             shuffle=True)

valid_loader_bc = DataLoader(MNIST_bc(valid_loader.dataset),
                             batch_size=BATCH_SIZE,
                             shuffle=False)

test_loader_bc = DataLoader(MNIST_bc(test_loader.dataset),
                            batch_size=BATCH_SIZE,
                            shuffle=False)

In [925]:
analyze_class_distribution(train_loader_bc)

Unnamed: 0,class,count,percentage (%)
0,0,998,49.9
1,1,1002,50.1


In [926]:
analyze_class_distribution(valid_loader_bc)

Unnamed: 0,class,count,percentage (%)
0,0,1501,50.033333
1,1,1499,49.966667


In [927]:
analyze_class_distribution(test_loader_bc)

Unnamed: 0,class,count,percentage (%)
0,0,5069,50.69
1,1,4931,49.31


In [928]:
# Binary Classification 을 Class 2개 기준의 Binary Cross-Entropy 를 이용하여 실험 진행

# 모델의 최종 출력값은 Softmax 함수를 통해 0 ~ 1 로 변환된 값임이 보장됨
# -> nn.BCEWithLogitsLoss() 사용 불필요

_, best_epoch_model = run_experiment(nn.BCELoss(),
                                     train_loader=train_loader_bc,
                                     valid_loader=valid_loader_bc,
                                     test_loader=test_loader_bc,
                                     num_classes=2,
                                     final_activation=nn.Softmax())

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Train Loss: 0.5639, Valid Accuracy: 0.8143
best model updated
Epoch 2, Train Loss: 0.3588, Valid Accuracy: 0.8797
best model updated
Epoch 3, Train Loss: 0.2148, Valid Accuracy: 0.9283
best model updated
Epoch 4, Train Loss: 0.1509, Valid Accuracy: 0.9437
best model updated
Epoch 5, Train Loss: 0.1087, Valid Accuracy: 0.9580
Epoch 6, Train Loss: 0.0722, Valid Accuracy: 0.9563
best model updated
Epoch 7, Train Loss: 0.0490, Valid Accuracy: 0.9610
Epoch 8, Train Loss: 0.0363, Valid Accuracy: 0.9593
Epoch 9, Train Loss: 0.0238, Valid Accuracy: 0.9533
best model updated
Epoch 10, Train Loss: 0.0234, Valid Accuracy: 0.9643
best model updated
Epoch 11, Train Loss: 0.0078, Valid Accuracy: 0.9697
best model updated
Epoch 12, Train Loss: 0.0030, Valid Accuracy: 0.9703
Epoch 13, Train Loss: 0.0018, Valid Accuracy: 0.9693
Epoch 14, Train Loss: 0.0014, Valid Accuracy: 0.9687
Epoch 15, Train Loss: 0.0012, Valid Accuracy: 0.9693
Epoch 16, Train Loss: 0.0010, Valid Accurac

In [929]:
# Softmax 대신 Sigmoid 를 사용한 경우

_, best_epoch_model = run_experiment(nn.BCELoss(),
                                     train_loader=train_loader_bc,
                                     valid_loader=valid_loader_bc,
                                     test_loader=test_loader_bc,
                                     num_classes=2,
                                     final_activation=nn.Sigmoid())

best model updated
Epoch 1, Train Loss: 0.6059, Valid Accuracy: 0.7893
best model updated
Epoch 2, Train Loss: 0.4022, Valid Accuracy: 0.8800
best model updated
Epoch 3, Train Loss: 0.2573, Valid Accuracy: 0.9263
best model updated
Epoch 4, Train Loss: 0.1721, Valid Accuracy: 0.9347
best model updated
Epoch 5, Train Loss: 0.1449, Valid Accuracy: 0.9367
Epoch 6, Train Loss: 0.0902, Valid Accuracy: 0.9320
best model updated
Epoch 7, Train Loss: 0.0776, Valid Accuracy: 0.9550
best model updated
Epoch 8, Train Loss: 0.0500, Valid Accuracy: 0.9643
Epoch 9, Train Loss: 0.0397, Valid Accuracy: 0.9583
Epoch 10, Train Loss: 0.0217, Valid Accuracy: 0.9630
Epoch 11, Train Loss: 0.0144, Valid Accuracy: 0.9500
Epoch 12, Train Loss: 0.0146, Valid Accuracy: 0.9503
best model updated
Epoch 13, Train Loss: 0.0084, Valid Accuracy: 0.9653
Epoch 14, Train Loss: 0.0038, Valid Accuracy: 0.9653
best model updated
Epoch 15, Train Loss: 0.0028, Valid Accuracy: 0.9667
Epoch 16, Train Loss: 0.0024, Valid Accurac

In [930]:
# nn.BCELoss() 대신 nn.BCEWithLogitsLoss() 사용 시

_, best_epoch_model = run_experiment(nn.BCEWithLogitsLoss(),
                                     train_loader=train_loader_bc,
                                     valid_loader=valid_loader_bc,
                                     test_loader=test_loader_bc,
                                     num_classes=2,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 0.6635, Valid Accuracy: 0.8087
best model updated
Epoch 2, Train Loss: 0.5909, Valid Accuracy: 0.8460
best model updated
Epoch 3, Train Loss: 0.5668, Valid Accuracy: 0.8810
best model updated
Epoch 4, Train Loss: 0.5510, Valid Accuracy: 0.9127
best model updated
Epoch 5, Train Loss: 0.5361, Valid Accuracy: 0.9293
best model updated
Epoch 6, Train Loss: 0.5313, Valid Accuracy: 0.9390
Epoch 7, Train Loss: 0.5199, Valid Accuracy: 0.9370
best model updated
Epoch 8, Train Loss: 0.5161, Valid Accuracy: 0.9557
Epoch 9, Train Loss: 0.5153, Valid Accuracy: 0.9440
best model updated
Epoch 10, Train Loss: 0.5159, Valid Accuracy: 0.9580
Epoch 11, Train Loss: 0.5133, Valid Accuracy: 0.9450
Epoch 12, Train Loss: 0.5130, Valid Accuracy: 0.9463
Epoch 13, Train Loss: 0.5106, Valid Accuracy: 0.9577
best model updated
Epoch 14, Train Loss: 0.5115, Valid Accuracy: 0.9583
Epoch 15, Train Loss: 0.5084, Valid Accuracy: 0.9553
Epoch 16, Train Loss: 0.5076, Valid Accurac

In [931]:
# Binary Classification 을 Mean-Squared Error (w/ Softmax as final activation) 를 이용하여 실험 진행

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader_bc,
                                     valid_loader=valid_loader_bc,
                                     test_loader=test_loader_bc,
                                     num_classes=2,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 0.1863, Valid Accuracy: 0.7750
best model updated
Epoch 2, Train Loss: 0.1111, Valid Accuracy: 0.8997
best model updated
Epoch 3, Train Loss: 0.0674, Valid Accuracy: 0.9187
best model updated
Epoch 4, Train Loss: 0.0409, Valid Accuracy: 0.9350
best model updated
Epoch 5, Train Loss: 0.0334, Valid Accuracy: 0.9467
Epoch 6, Train Loss: 0.0211, Valid Accuracy: 0.9450
Epoch 7, Train Loss: 0.0166, Valid Accuracy: 0.9357
best model updated
Epoch 8, Train Loss: 0.0119, Valid Accuracy: 0.9547
Epoch 9, Train Loss: 0.0093, Valid Accuracy: 0.9520
best model updated
Epoch 10, Train Loss: 0.0082, Valid Accuracy: 0.9577
Epoch 11, Train Loss: 0.0060, Valid Accuracy: 0.9560
best model updated
Epoch 12, Train Loss: 0.0056, Valid Accuracy: 0.9613
Epoch 13, Train Loss: 0.0051, Valid Accuracy: 0.9540
Epoch 14, Train Loss: 0.0045, Valid Accuracy: 0.9610
best model updated
Epoch 15, Train Loss: 0.0033, Valid Accuracy: 0.9663
Epoch 16, Train Loss: 0.0031, Valid Accurac

In [932]:
# Binary Classification 을 Mean-Squared Error (w/ Sigmoid as final activation) 를 이용하여 실험 진행

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader_bc,
                                     valid_loader=valid_loader_bc,
                                     test_loader=test_loader_bc,
                                     num_classes=2,
                                     final_activation=nn.Sigmoid())

best model updated
Epoch 1, Train Loss: 0.2213, Valid Accuracy: 0.7657
best model updated
Epoch 2, Train Loss: 0.1394, Valid Accuracy: 0.8537
best model updated
Epoch 3, Train Loss: 0.0822, Valid Accuracy: 0.9173
best model updated
Epoch 4, Train Loss: 0.0496, Valid Accuracy: 0.9353
best model updated
Epoch 5, Train Loss: 0.0352, Valid Accuracy: 0.9510
Epoch 6, Train Loss: 0.0233, Valid Accuracy: 0.9447
Epoch 7, Train Loss: 0.0190, Valid Accuracy: 0.9337
Epoch 8, Train Loss: 0.0152, Valid Accuracy: 0.9413
best model updated
Epoch 9, Train Loss: 0.0113, Valid Accuracy: 0.9550
best model updated
Epoch 10, Train Loss: 0.0081, Valid Accuracy: 0.9597
best model updated
Epoch 11, Train Loss: 0.0060, Valid Accuracy: 0.9603
Epoch 12, Train Loss: 0.0058, Valid Accuracy: 0.9590
Epoch 13, Train Loss: 0.0049, Valid Accuracy: 0.9597
Epoch 14, Train Loss: 0.0067, Valid Accuracy: 0.9490
best model updated
Epoch 15, Train Loss: 0.0036, Valid Accuracy: 0.9687
Epoch 16, Train Loss: 0.0032, Valid Accurac

5-2. **Multi-Class Classification** with **Mean-Squared Error**

In [933]:
# 정상적으로 Categorical Cross Entropy 적용 시

_, best_epoch_model = run_experiment(nn.CrossEntropyLoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 2.1745, Valid Accuracy: 0.6620
best model updated
Epoch 2, Train Loss: 1.8798, Valid Accuracy: 0.8333
best model updated
Epoch 3, Train Loss: 1.7023, Valid Accuracy: 0.8517
best model updated
Epoch 4, Train Loss: 1.6397, Valid Accuracy: 0.8613
best model updated
Epoch 5, Train Loss: 1.6146, Valid Accuracy: 0.8633
Epoch 6, Train Loss: 1.5973, Valid Accuracy: 0.8590
best model updated
Epoch 7, Train Loss: 1.5621, Valid Accuracy: 0.9363
best model updated
Epoch 8, Train Loss: 1.5153, Valid Accuracy: 0.9593
Epoch 9, Train Loss: 1.4984, Valid Accuracy: 0.9583
best model updated
Epoch 10, Train Loss: 1.4910, Valid Accuracy: 0.9623
best model updated
Epoch 11, Train Loss: 1.4850, Valid Accuracy: 0.9627
best model updated
Epoch 12, Train Loss: 1.4799, Valid Accuracy: 0.9643
best model updated
Epoch 13, Train Loss: 1.4773, Valid Accuracy: 0.9683
Epoch 14, Train Loss: 1.4750, Valid Accuracy: 0.9670
Epoch 15, Train Loss: 1.4726, Valid Accuracy: 0.9670
Epoch

In [934]:
# Mean Squared Error Loss + 최종 output 활성화 함수 = Softmax 일 때

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 0.0713, Valid Accuracy: 0.7790
best model updated
Epoch 2, Train Loss: 0.0340, Valid Accuracy: 0.8900
best model updated
Epoch 3, Train Loss: 0.0167, Valid Accuracy: 0.9373
best model updated
Epoch 4, Train Loss: 0.0099, Valid Accuracy: 0.9510
best model updated
Epoch 5, Train Loss: 0.0070, Valid Accuracy: 0.9513
best model updated
Epoch 6, Train Loss: 0.0048, Valid Accuracy: 0.9550
best model updated
Epoch 7, Train Loss: 0.0039, Valid Accuracy: 0.9563
best model updated
Epoch 8, Train Loss: 0.0026, Valid Accuracy: 0.9613
best model updated
Epoch 9, Train Loss: 0.0022, Valid Accuracy: 0.9627
best model updated
Epoch 10, Train Loss: 0.0016, Valid Accuracy: 0.9670
Epoch 11, Train Loss: 0.0012, Valid Accuracy: 0.9627
Epoch 12, Train Loss: 0.0010, Valid Accuracy: 0.9670
best model updated
Epoch 13, Train Loss: 0.0008, Valid Accuracy: 0.9673
best model updated
Epoch 14, Train Loss: 0.0007, Valid Accuracy: 0.9677
best model updated
Epoch 15, Train Loss

In [935]:
# Mean Squared Error Loss + 최종 output 활성화 함수 = Sigmoid 일 때

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Sigmoid())

best model updated
Epoch 1, Train Loss: 0.1081, Valid Accuracy: 0.0910
best model updated
Epoch 2, Train Loss: 0.0902, Valid Accuracy: 0.1097
Epoch 3, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 4, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 5, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 6, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 7, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 8, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 9, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 10, Train Loss: 0.0900, Valid Accuracy: 0.1097
Epoch 11, Train Loss: 0.0900, Valid Accuracy: 0.1097
Best Epoch: 1, Best Valid Acc: 0.10966666666666666
Valid Acc (with VALID set) on Loaded Best Model: 0.10966666666666666
[ outputs ]
tensor([[0.1012, 0.1042, 0.1048, 0.1000, 0.0929, 0.0987, 0.1009, 0.1161, 0.1045, 0.1095],
        [0.1012, 0.1042, 0.1048, 0.1000, 0.0929, 0.0987, 0.1009, 0.1161, 0.1045, 0.1095],
        [0.1012, 0.1042, 0.1048, 0.1000, 0.0929, 0.0987, 0.1009, 0.1161, 0.1

5-3. **Multi-Class Classification** with **Binary Cross-Entropy** (각 Class 별)

In [936]:
# 각 Class 별 Binary Cross-Entropy Loss + 최종 output 활성화 함수 = Softmax 일 때
# output 이 Softmax 를 거쳐서 0 ~ 1 이 되므로, nn.BCEWithLogitsLoss() 적용 불필요

_, best_epoch_model = run_experiment(nn.BCELoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 0.2621, Valid Accuracy: 0.7777
best model updated
Epoch 2, Train Loss: 0.1418, Valid Accuracy: 0.8977
best model updated
Epoch 3, Train Loss: 0.0847, Valid Accuracy: 0.9300
best model updated
Epoch 4, Train Loss: 0.0543, Valid Accuracy: 0.9460
Epoch 5, Train Loss: 0.0372, Valid Accuracy: 0.9460
best model updated
Epoch 6, Train Loss: 0.0284, Valid Accuracy: 0.9503
best model updated
Epoch 7, Train Loss: 0.0218, Valid Accuracy: 0.9623
Epoch 8, Train Loss: 0.0162, Valid Accuracy: 0.9610
best model updated
Epoch 9, Train Loss: 0.0126, Valid Accuracy: 0.9653
Epoch 10, Train Loss: 0.0099, Valid Accuracy: 0.9640
Epoch 11, Train Loss: 0.0088, Valid Accuracy: 0.9577
best model updated
Epoch 12, Train Loss: 0.0076, Valid Accuracy: 0.9673
Epoch 13, Train Loss: 0.0055, Valid Accuracy: 0.9650
Epoch 14, Train Loss: 0.0048, Valid Accuracy: 0.9663
best model updated
Epoch 15, Train Loss: 0.0043, Valid Accuracy: 0.9687
best model updated
Epoch 16, Train Loss: 0.

In [937]:
# 각 Class 별 Binary Cross-Entropy Loss + 최종 output 활성화 함수 = Sigmoid 일 때
# output 이 Sigmoid 를 거쳐서 0 ~ 1 이 되므로, nn.BCEWithLogitsLoss() 적용 불필요

_, best_epoch_model = run_experiment(nn.BCELoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Sigmoid())

best model updated
Epoch 1, Train Loss: 0.3826, Valid Accuracy: 0.1097
Epoch 2, Train Loss: 0.3253, Valid Accuracy: 0.1097
Epoch 3, Train Loss: 0.3253, Valid Accuracy: 0.1097
Epoch 4, Train Loss: 0.3254, Valid Accuracy: 0.1097
Epoch 5, Train Loss: 0.3253, Valid Accuracy: 0.1097
Epoch 6, Train Loss: 0.3254, Valid Accuracy: 0.1097
Epoch 7, Train Loss: 0.3254, Valid Accuracy: 0.1097
Epoch 8, Train Loss: 0.3253, Valid Accuracy: 0.1097
Epoch 9, Train Loss: 0.3256, Valid Accuracy: 0.1097
Epoch 10, Train Loss: 0.3252, Valid Accuracy: 0.1097
Best Epoch: 0, Best Valid Acc: 0.10966666666666666
Valid Acc (with VALID set) on Loaded Best Model: 0.10966666666666666
[ outputs ]
tensor([[0.1164, 0.1161, 0.1150, 0.1248, 0.1000, 0.0977, 0.0986, 0.1302, 0.1032, 0.1024],
        [0.1164, 0.1161, 0.1150, 0.1248, 0.1000, 0.0977, 0.0986, 0.1302, 0.1032, 0.1024],
        [0.1164, 0.1161, 0.1150, 0.1248, 0.1000, 0.0977, 0.0986, 0.1302, 0.1032, 0.1024],
        [0.1164, 0.1161, 0.1150, 0.1248, 0.1000, 0.0977, 0

In [938]:
# Softmax 활성화 함수를 사용할 때, nn.BCELoss() 대신 nn.BCEWithLogitsLoss() 적용하는 경우

_, best_epoch_model = run_experiment(nn.BCEWithLogitsLoss(),
                                     train_loader=train_loader,
                                     valid_loader=valid_loader,
                                     test_loader=test_loader,
                                     num_classes=10,
                                     final_activation=nn.Softmax())

best model updated
Epoch 1, Train Loss: 0.7225, Valid Accuracy: 0.6483
best model updated
Epoch 2, Train Loss: 0.6963, Valid Accuracy: 0.7880
best model updated
Epoch 3, Train Loss: 0.6799, Valid Accuracy: 0.9310
best model updated
Epoch 4, Train Loss: 0.6679, Valid Accuracy: 0.9430
best model updated
Epoch 5, Train Loss: 0.6629, Valid Accuracy: 0.9493
best model updated
Epoch 6, Train Loss: 0.6611, Valid Accuracy: 0.9510
best model updated
Epoch 7, Train Loss: 0.6594, Valid Accuracy: 0.9583
best model updated
Epoch 8, Train Loss: 0.6586, Valid Accuracy: 0.9603
best model updated
Epoch 9, Train Loss: 0.6581, Valid Accuracy: 0.9610
best model updated
Epoch 10, Train Loss: 0.6576, Valid Accuracy: 0.9617
best model updated
Epoch 11, Train Loss: 0.6571, Valid Accuracy: 0.9650
Epoch 12, Train Loss: 0.6570, Valid Accuracy: 0.9620
best model updated
Epoch 13, Train Loss: 0.6568, Valid Accuracy: 0.9657
Epoch 14, Train Loss: 0.6564, Valid Accuracy: 0.9633
best model updated
Epoch 15, Train Loss

5-4. **Multi-Label Classification** with **Mean-Squared Error**

In [939]:
# Multi-Label Classification 용으로 변경

from torch.utils.data import Dataset

class MNIST_mlc(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.class_map = {0: torch.tensor([1., 0., 1., 1.]),
                          1: torch.tensor([0., 0., 0., 1.]),
                          2: torch.tensor([1., 1., 0., 0.]),
                          3: torch.tensor([0., 1., 1., 0.]),
                          4: torch.tensor([1., 0., 0., 1.]),
                          5: torch.tensor([0., 1., 0., 0.]),
                          6: torch.tensor([1., 0., 1., 0.]),
                          7: torch.tensor([0., 1., 0., 0.]),
                          8: torch.tensor([1., 0., 1., 0.]),
                          9: torch.tensor([0., 0., 1., 1.])}

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        new_label = self.class_map[label]
        return image, new_label

In [940]:
# Binary Classification 용 Data Loader

train_loader_mlc = DataLoader(MNIST_mlc(train_loader.dataset),
                             batch_size=BATCH_SIZE,
                             shuffle=True)

valid_loader_mlc = DataLoader(MNIST_mlc(valid_loader.dataset),
                             batch_size=BATCH_SIZE,
                             shuffle=False)

test_loader_mlc = DataLoader(MNIST_mlc(test_loader.dataset),
                            batch_size=BATCH_SIZE,
                            shuffle=False)

In [941]:
# 데이터셋 클래스 분포 분석 함수

def analyze_class_distribution_mlc(data_loader):
    targets = [str(data_loader.dataset[i][1]) for i in range(len(data_loader.dataset))]

    class_list, class_counts = np.unique(targets, return_counts=True)
    class_percentage = np.array(class_counts) * 100.0 / sum(class_counts)

    class_distrib = pd.DataFrame({'class': class_list,
                                  'count': class_counts,
                                  'percentage (%)': class_percentage})

    return class_distrib

In [942]:
analyze_class_distribution_mlc(train_loader_mlc)

Unnamed: 0,class,count,percentage (%)
0,"tensor([0., 0., 0., 1.])",205,10.25
1,"tensor([0., 0., 1., 1.])",199,9.95
2,"tensor([0., 1., 0., 0.])",417,20.85
3,"tensor([0., 1., 1., 0.])",201,10.05
4,"tensor([1., 0., 0., 1.])",174,8.7
5,"tensor([1., 0., 1., 0.])",401,20.05
6,"tensor([1., 0., 1., 1.])",201,10.05
7,"tensor([1., 1., 0., 0.])",202,10.1


In [943]:
analyze_class_distribution_mlc(valid_loader_mlc)

Unnamed: 0,class,count,percentage (%)
0,"tensor([0., 0., 0., 1.])",338,11.266667
1,"tensor([0., 0., 1., 1.])",273,9.1
2,"tensor([0., 1., 0., 0.])",587,19.566667
3,"tensor([0., 1., 1., 0.])",298,9.933333
4,"tensor([1., 0., 0., 1.])",286,9.533333
5,"tensor([1., 0., 1., 0.])",610,20.333333
6,"tensor([1., 0., 1., 1.])",318,10.6
7,"tensor([1., 1., 0., 0.])",290,9.666667


In [944]:
analyze_class_distribution_mlc(test_loader_mlc)

Unnamed: 0,class,count,percentage (%)
0,"tensor([0., 0., 0., 1.])",1135,11.35
1,"tensor([0., 0., 1., 1.])",1009,10.09
2,"tensor([0., 1., 0., 0.])",1920,19.2
3,"tensor([0., 1., 1., 0.])",1010,10.1
4,"tensor([1., 0., 0., 1.])",982,9.82
5,"tensor([1., 0., 1., 0.])",1932,19.32
6,"tensor([1., 0., 1., 1.])",980,9.8
7,"tensor([1., 1., 0., 0.])",1032,10.32


In [945]:
# 정상적으로 Binary Cross-Entropy 적용 시

# Sigmoid 를 이미 적용하여 최종 출력값을 0 ~ 1 로 만들었으므로,
# nn.BCEWithLogitsLoss() 가 아닌 nn.BCELoss() 를 사용

_, best_epoch_model = run_experiment(nn.BCELoss(),
                                     train_loader=train_loader_mlc,
                                     valid_loader=valid_loader_mlc,
                                     test_loader=test_loader_mlc,
                                     num_classes=4,
                                     final_activation=nn.Sigmoid(),
                                     multi_label=True)

best model updated
Epoch 1, Train Loss: 0.5990, Valid Accuracy: 0.8268
best model updated
Epoch 2, Train Loss: 0.3787, Valid Accuracy: 0.9206
best model updated
Epoch 3, Train Loss: 0.2620, Valid Accuracy: 0.9304
best model updated
Epoch 4, Train Loss: 0.1949, Valid Accuracy: 0.9523
best model updated
Epoch 5, Train Loss: 0.1504, Valid Accuracy: 0.9613
best model updated
Epoch 6, Train Loss: 0.1194, Valid Accuracy: 0.9616
best model updated
Epoch 7, Train Loss: 0.0969, Valid Accuracy: 0.9647
best model updated
Epoch 8, Train Loss: 0.0781, Valid Accuracy: 0.9765
best model updated
Epoch 9, Train Loss: 0.0661, Valid Accuracy: 0.9769
Epoch 10, Train Loss: 0.0535, Valid Accuracy: 0.9768
best model updated
Epoch 11, Train Loss: 0.0448, Valid Accuracy: 0.9782
best model updated
Epoch 12, Train Loss: 0.0372, Valid Accuracy: 0.9798
Epoch 13, Train Loss: 0.0324, Valid Accuracy: 0.9782
best model updated
Epoch 14, Train Loss: 0.0282, Valid Accuracy: 0.9817
Epoch 15, Train Loss: 0.0231, Valid Acc

In [946]:
# Binary Cross Entropy 대신 Mean-Squared Error 적용 시 (w/ Softmax)

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader_mlc,
                                     valid_loader=valid_loader_mlc,
                                     test_loader=test_loader_mlc,
                                     num_classes=4,
                                     final_activation=nn.Softmax(),
                                     multi_label=True)

  return self._call_impl(*args, **kwargs)


best model updated
Epoch 1, Train Loss: 0.2506, Valid Accuracy: 0.6769
best model updated
Epoch 2, Train Loss: 0.1800, Valid Accuracy: 0.7012
best model updated
Epoch 3, Train Loss: 0.1526, Valid Accuracy: 0.7281
Epoch 4, Train Loss: 0.1382, Valid Accuracy: 0.7192
Epoch 5, Train Loss: 0.1316, Valid Accuracy: 0.7219
best model updated
Epoch 6, Train Loss: 0.1242, Valid Accuracy: 0.7390
Epoch 7, Train Loss: 0.1214, Valid Accuracy: 0.7194
best model updated
Epoch 8, Train Loss: 0.1190, Valid Accuracy: 0.7408
Epoch 9, Train Loss: 0.1186, Valid Accuracy: 0.7310
Epoch 10, Train Loss: 0.1154, Valid Accuracy: 0.7297
best model updated
Epoch 11, Train Loss: 0.1145, Valid Accuracy: 0.7517
Epoch 12, Train Loss: 0.1131, Valid Accuracy: 0.7128
Epoch 13, Train Loss: 0.1121, Valid Accuracy: 0.7284
Epoch 14, Train Loss: 0.1111, Valid Accuracy: 0.7413
Epoch 15, Train Loss: 0.1115, Valid Accuracy: 0.7508
Epoch 16, Train Loss: 0.1117, Valid Accuracy: 0.7332
best model updated
Epoch 17, Train Loss: 0.1105

In [947]:
# Binary Cross Entropy 대신 Mean-Squared Error 적용 시 (w/ Sigmoid)

_, best_epoch_model = run_experiment(nn.MSELoss(),
                                     train_loader=train_loader_mlc,
                                     valid_loader=valid_loader_mlc,
                                     test_loader=test_loader_mlc,
                                     num_classes=4,
                                     final_activation=nn.Sigmoid(),
                                     multi_label=True)

best model updated
Epoch 1, Train Loss: 0.1994, Valid Accuracy: 0.8393
best model updated
Epoch 2, Train Loss: 0.1167, Valid Accuracy: 0.9067
best model updated
Epoch 3, Train Loss: 0.0700, Valid Accuracy: 0.9404
best model updated
Epoch 4, Train Loss: 0.0464, Valid Accuracy: 0.9543
best model updated
Epoch 5, Train Loss: 0.0331, Valid Accuracy: 0.9647
best model updated
Epoch 6, Train Loss: 0.0235, Valid Accuracy: 0.9714
best model updated
Epoch 7, Train Loss: 0.0183, Valid Accuracy: 0.9744
best model updated
Epoch 8, Train Loss: 0.0153, Valid Accuracy: 0.9774
Epoch 9, Train Loss: 0.0126, Valid Accuracy: 0.9745
Epoch 10, Train Loss: 0.0103, Valid Accuracy: 0.9713
best model updated
Epoch 11, Train Loss: 0.0086, Valid Accuracy: 0.9777
best model updated
Epoch 12, Train Loss: 0.0086, Valid Accuracy: 0.9781
best model updated
Epoch 13, Train Loss: 0.0058, Valid Accuracy: 0.9821
Epoch 14, Train Loss: 0.0043, Valid Accuracy: 0.9803
Epoch 15, Train Loss: 0.0037, Valid Accuracy: 0.9789
Epoch

5-5. **Multi-Label Classification** with **Categorical Cross-Entropy**

In [948]:
# Binary Cross Entropy 대신 Categorical Cross-Entropy 적용 시

_, best_epoch_model = run_experiment(nn.CrossEntropyLoss(),
                                     train_loader=train_loader_mlc,
                                     valid_loader=valid_loader_mlc,
                                     test_loader=test_loader_mlc,
                                     num_classes=4,
                                     final_activation=nn.Softmax(),
                                     multi_label=True)

best model updated
Epoch 1, Train Loss: 2.3965, Valid Accuracy: 0.6833
best model updated
Epoch 2, Train Loss: 2.2011, Valid Accuracy: 0.7416
best model updated
Epoch 3, Train Loss: 2.1232, Valid Accuracy: 0.7572
Epoch 4, Train Loss: 2.0869, Valid Accuracy: 0.7558
best model updated
Epoch 5, Train Loss: 2.0706, Valid Accuracy: 0.7592
best model updated
Epoch 6, Train Loss: 2.0564, Valid Accuracy: 0.7642
best model updated
Epoch 7, Train Loss: 2.0401, Valid Accuracy: 0.7698
Epoch 8, Train Loss: 2.0340, Valid Accuracy: 0.7609
Epoch 9, Train Loss: 2.0262, Valid Accuracy: 0.7676
Epoch 10, Train Loss: 2.0256, Valid Accuracy: 0.7683
Epoch 11, Train Loss: 2.0208, Valid Accuracy: 0.7682
Epoch 12, Train Loss: 2.0197, Valid Accuracy: 0.7620
best model updated
Epoch 13, Train Loss: 2.0183, Valid Accuracy: 0.7705
Epoch 14, Train Loss: 2.0169, Valid Accuracy: 0.7617
best model updated
Epoch 15, Train Loss: 2.0153, Valid Accuracy: 0.7768
Epoch 16, Train Loss: 2.0144, Valid Accuracy: 0.7767
best mode