In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import plotly.graph_objects as go

import random
import pandas as pd
import numpy as np

!pip install torchinfo
from torchinfo import summary

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
# 경고 미 표시

import warnings
warnings.filterwarnings('ignore')

In [3]:
# Random Seed 고정 (학습 반복 시행 시에도 동일한 결과가 나오도록)

seed = 20251003

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [4]:
# 데이터셋 로딩

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transform,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transform,
                                          download=True)


100%|██████████| 9.91M/9.91M [00:00<00:00, 65.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.54MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.9MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.62MB/s]


**1. 데이터셋 분석**

In [5]:
# 데이터셋 로딩

from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [6]:
# 클래스 불균형 분석

# 학습 데이터
train_labels = torch.tensor(train_dataset.targets)
train_class_counts = torch.bincount(train_labels)
print(f'train data : {train_class_counts}')

# 테스트 데이터
test_labels = torch.tensor(test_dataset.targets)
test_class_counts = torch.bincount(test_labels)
print(f'test  data : {test_class_counts}')

NUM_CLASSES = len(train_class_counts)

train data : tensor([5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949])
test  data : tensor([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009])


In [7]:
# 클래스 불균형 분석 (percentage)

train_class_percentage = np.array(train_class_counts) * 100.0 / sum(train_class_counts)

train_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                                'count': train_class_counts,
                                'percentage (%)': train_class_percentage})

train_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,5923,9.871667
1,1,6742,11.236667
2,2,5958,9.93
3,3,6131,10.218333
4,4,5842,9.736667
5,5,5421,9.035
6,6,5918,9.863333
7,7,6265,10.441667
8,8,5851,9.751667
9,9,5949,9.915


In [8]:
test_class_percentage = np.array(test_class_counts) * 100.0 / sum(test_class_counts)

test_y_distrib = pd.DataFrame({'class': list(range(NUM_CLASSES)),
                               'count': test_class_counts,
                               'percentage (%)': test_class_percentage})

test_y_distrib

Unnamed: 0,class,count,percentage (%)
0,0,980,9.8
1,1,1135,11.35
2,2,1032,10.32
3,3,1010,10.1
4,4,982,9.82
5,5,892,8.92
6,6,958,9.58
7,7,1028,10.28
8,8,974,9.74
9,9,1009,10.09


**2. 0, 3, 8 숫자 데이터만 추출**

In [9]:
from torch.utils.data import Subset

# train dataset
train_targets = torch.tensor(train_dataset.targets)
train_mask = (train_targets == 0) | (train_targets == 3) | (train_targets == 8)
train_indices = torch.where(train_mask)[0]

train_subset = Subset(train_dataset, train_indices)

# test dataset
test_targets = torch.tensor(test_dataset.targets)
test_mask = (test_targets == 0) | (test_targets == 3) | (test_targets == 8)
test_indices = torch.where(test_mask)[0]

test_subset = Subset(test_dataset, test_indices)

In [10]:
# train, test Data Loader 갱신

train_subset_loader = DataLoader(train_subset,
                                 batch_size=BATCH_SIZE,
                                 shuffle=True)

test_subset_loader = DataLoader(test_subset,
                                batch_size=BATCH_SIZE,
                                shuffle=False)

In [11]:
# train, test Data Loader 의 class 가 0, 3, 8 뿐인지 확인

train_subset_labels = train_subset.dataset.targets[train_subset.indices]
train_subset_class_counts = torch.bincount(train_subset_labels)
print(f'train data (subset) [0,3,8] : {train_subset_class_counts}')

test_subset_labels = test_subset.dataset.targets[test_subset.indices]
test_subset_class_counts = torch.bincount(test_subset_labels)
print(f'test  data (subset) [0,3,8] : {test_subset_class_counts}')

train data (subset) [0,3,8] : tensor([5923,    0,    0, 6131,    0,    0,    0,    0, 5851])
test  data (subset) [0,3,8] : tensor([ 980,    0,    0, 1010,    0,    0,    0,    0,  974])


**3. CNN 모델 정의**

In [12]:
# 1. Non-Shared backbone & Shared head

class CNN_CASE_1(nn.Module):
    def __init__(self, backbone_1, backbone_2):
        super(CNN_CASE_1, self).__init__()

        self.backbone_1 = backbone_1
        self.backbone_2 = backbone_2
        self.final_linear = nn.Linear(2000, 3)
        self.final_softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x1 = self.backbone_1(x)
        x2 = self.backbone_2(x)
        x = torch.concatenate([x1, x2], dim=1)

        x = self.final_linear(x)
        x = self.final_softmax(x)

        return x

In [13]:
# 2. Shared backbone & Shared head

class CNN_CASE_2(nn.Module):
    def __init__(self, backbone):
        super(CNN_CASE_2, self).__init__()

        self.backbone = backbone
        self.final_linear = nn.Linear(1000, 3)
        self.final_softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.backbone(x)
        x = self.final_linear(x)
        x = self.final_softmax(x)

        return x

In [14]:
# 3. Shared backbone & Non-Shared head

class CNN_CASE_3(nn.Module):
    def __init__(self, backbone):
        super(CNN_CASE_3, self).__init__()

        self.backbone = backbone
        self.final_linear_1 = nn.Linear(1000, 2)
        self.final_linear_2 = nn.Linear(1000, 2)
        self.final_softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.backbone(x)
        x1 = self.final_linear_1(x)
        x2 = self.final_linear_2(x)
        x1 = self.final_softmax(x1)
        x2 = self.final_softmax(x2)

        return x1, x2

In [15]:
# 4. Non-Shared backbone & Non-Shared head (4-1)

class CNN_CASE_4_1(nn.Module):
    def __init__(self, backbone_1, backbone_2):
        super(CNN_CASE_4_1, self).__init__()

        self.backbone_1 = backbone_1
        self.backbone_2 = backbone_2
        self.final_linear_1 = nn.Linear(2000, 2)
        self.final_linear_2 = nn.Linear(2000, 2)
        self.final_softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x11 = self.backbone_1(x)
        x12 = self.backbone_2(x)
        x = torch.concatenate([x11, x12], dim=1)

        x21 = self.final_linear_1(x)
        x22 = self.final_linear_2(x)
        x21 = self.final_softmax(x21)
        x22 = self.final_softmax(x22)

        return x21, x22

In [16]:
# 5. Non-Shared backbone & Non-Shared head (4-2)

class CNN_CASE_4_2(nn.Module):
    def __init__(self, backbone_1, backbone_2):
        super(CNN_CASE_4_2, self).__init__()

        self.backbone_1 = backbone_1
        self.backbone_2 = backbone_2
        self.final_linear_1 = nn.Linear(1000, 2)
        self.final_linear_2 = nn.Linear(1000, 2)
        self.final_softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x11 = self.backbone_1(x)
        x12 = self.backbone_2(x)
        x21 = self.final_linear_1(x11)
        x22 = self.final_linear_2(x12)
        x21 = self.final_softmax(x21)
        x22 = self.final_softmax(x22)

        return x21, x22

**4. backbone model 로딩**

In [17]:
import torchvision.models as models

def define_model(backbone_name, case_name, device):

    # model count
    if case_name in ['1', '4_1', '4_2']:
        model_count = 2
    else:
        model_count = 1

    # load pre-trained backbone model
    pretrained_models = []

    for _ in range(model_count):
        if backbone_name == 'resnet18':
            pretrained_model = models.resnet18(pretrained=True)

        elif backbone_name == 'resnet34':
            pretrained_model = models.resnet34(pretrained=True)

        else:  # resnet50
            pretrained_model = models.resnet50(pretrained=True)

        pretrained_model.conv1 = torch.nn.Conv2d(1, 64,
                                                 kernel_size=(7, 7),
                                                 stride=(2, 2),
                                                 padding=(3, 3),
                                                 bias=False)
        pretrained_models.append(pretrained_model)

    # define full model
    if case_name == '1':
        model = CNN_CASE_1(backbone_1=pretrained_models[0],
                           backbone_2=pretrained_models[1])

    elif case_name == '2':
        model = CNN_CASE_2(backbone=pretrained_models[0])

    elif case_name == '3':
        model = CNN_CASE_3(backbone=pretrained_models[0])

    elif case_name == '4_1':
        model = CNN_CASE_4_1(backbone_1=pretrained_models[0],
                             backbone_2=pretrained_models[1])

    elif case_name == '4_2':
        model = CNN_CASE_4_2(backbone_1=pretrained_models[0],
                             backbone_2=pretrained_models[1])

    # device & optimizer & scheduler setting
    model.to(device)
    model.optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)
    model.scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=model.optimizer,
                                                             gamma=0.95)

    return model

**5. 모델 구조 출력**

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size=(BATCH_SIZE, 1, 28, 28)

In [19]:
temp_backbone_name = 'resnet18'

In [20]:
# 1. Non-Shared backbone & Shared head

model_case_1 = define_model(temp_backbone_name, case_name='1', device=device)
print(summary(model=model_case_1, input_size=input_size))

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 129MB/s]


Layer (type:depth-idx)                        Output Shape              Param #
CNN_CASE_1                                    [32, 3]                   --
├─ResNet: 1-1                                 [32, 1000]                --
│    └─Conv2d: 2-1                            [32, 64, 14, 14]          3,136
│    └─BatchNorm2d: 2-2                       [32, 64, 14, 14]          128
│    └─ReLU: 2-3                              [32, 64, 14, 14]          --
│    └─MaxPool2d: 2-4                         [32, 64, 7, 7]            --
│    └─Sequential: 2-5                        [32, 64, 7, 7]            --
│    │    └─BasicBlock: 3-1                   [32, 64, 7, 7]            73,984
│    │    └─BasicBlock: 3-2                   [32, 64, 7, 7]            73,984
│    └─Sequential: 2-6                        [32, 128, 4, 4]           --
│    │    └─BasicBlock: 3-3                   [32, 128, 4, 4]           230,144
│    │    └─BasicBlock: 3-4                   [32, 128, 4, 4]           295,42

In [21]:
# 2. Shared backbone & Shared head

model_case_2 = define_model(temp_backbone_name, case_name='2', device=device)
print(summary(model=model_case_2, input_size=input_size))

Layer (type:depth-idx)                        Output Shape              Param #
CNN_CASE_2                                    [32, 3]                   --
├─ResNet: 1-1                                 [32, 1000]                --
│    └─Conv2d: 2-1                            [32, 64, 14, 14]          3,136
│    └─BatchNorm2d: 2-2                       [32, 64, 14, 14]          128
│    └─ReLU: 2-3                              [32, 64, 14, 14]          --
│    └─MaxPool2d: 2-4                         [32, 64, 7, 7]            --
│    └─Sequential: 2-5                        [32, 64, 7, 7]            --
│    │    └─BasicBlock: 3-1                   [32, 64, 7, 7]            73,984
│    │    └─BasicBlock: 3-2                   [32, 64, 7, 7]            73,984
│    └─Sequential: 2-6                        [32, 128, 4, 4]           --
│    │    └─BasicBlock: 3-3                   [32, 128, 4, 4]           230,144
│    │    └─BasicBlock: 3-4                   [32, 128, 4, 4]           295,42

In [22]:
# 3. Shared backbone & Non-Shared head

model_case_3 = define_model(temp_backbone_name, case_name='3', device=device)
print(summary(model=model_case_3, input_size=input_size))

Layer (type:depth-idx)                        Output Shape              Param #
CNN_CASE_3                                    [32, 2]                   --
├─ResNet: 1-1                                 [32, 1000]                --
│    └─Conv2d: 2-1                            [32, 64, 14, 14]          3,136
│    └─BatchNorm2d: 2-2                       [32, 64, 14, 14]          128
│    └─ReLU: 2-3                              [32, 64, 14, 14]          --
│    └─MaxPool2d: 2-4                         [32, 64, 7, 7]            --
│    └─Sequential: 2-5                        [32, 64, 7, 7]            --
│    │    └─BasicBlock: 3-1                   [32, 64, 7, 7]            73,984
│    │    └─BasicBlock: 3-2                   [32, 64, 7, 7]            73,984
│    └─Sequential: 2-6                        [32, 128, 4, 4]           --
│    │    └─BasicBlock: 3-3                   [32, 128, 4, 4]           230,144
│    │    └─BasicBlock: 3-4                   [32, 128, 4, 4]           295,42

In [23]:
# 4. Non-Shared backbone & Non-Shared head (4-1)

model_case_4_1 = define_model(temp_backbone_name, case_name='4_1', device=device)
print(summary(model=model_case_4_1, input_size=input_size))

Layer (type:depth-idx)                        Output Shape              Param #
CNN_CASE_4_1                                  [32, 2]                   --
├─ResNet: 1-1                                 [32, 1000]                --
│    └─Conv2d: 2-1                            [32, 64, 14, 14]          3,136
│    └─BatchNorm2d: 2-2                       [32, 64, 14, 14]          128
│    └─ReLU: 2-3                              [32, 64, 14, 14]          --
│    └─MaxPool2d: 2-4                         [32, 64, 7, 7]            --
│    └─Sequential: 2-5                        [32, 64, 7, 7]            --
│    │    └─BasicBlock: 3-1                   [32, 64, 7, 7]            73,984
│    │    └─BasicBlock: 3-2                   [32, 64, 7, 7]            73,984
│    └─Sequential: 2-6                        [32, 128, 4, 4]           --
│    │    └─BasicBlock: 3-3                   [32, 128, 4, 4]           230,144
│    │    └─BasicBlock: 3-4                   [32, 128, 4, 4]           295,42

In [24]:
# 5. Non-Shared backbone & Non-Shared head (4-2)

model_case_4_2 = define_model(temp_backbone_name, case_name='4_2', device=device)
print(summary(model=model_case_4_2, input_size=input_size))

Layer (type:depth-idx)                        Output Shape              Param #
CNN_CASE_4_2                                  [32, 2]                   --
├─ResNet: 1-1                                 [32, 1000]                --
│    └─Conv2d: 2-1                            [32, 64, 14, 14]          3,136
│    └─BatchNorm2d: 2-2                       [32, 64, 14, 14]          128
│    └─ReLU: 2-3                              [32, 64, 14, 14]          --
│    └─MaxPool2d: 2-4                         [32, 64, 7, 7]            --
│    └─Sequential: 2-5                        [32, 64, 7, 7]            --
│    │    └─BasicBlock: 3-1                   [32, 64, 7, 7]            73,984
│    │    └─BasicBlock: 3-2                   [32, 64, 7, 7]            73,984
│    └─Sequential: 2-6                        [32, 128, 4, 4]           --
│    │    └─BasicBlock: 3-3                   [32, 128, 4, 4]           230,144
│    │    └─BasicBlock: 3-4                   [32, 128, 4, 4]           295,42

**6. 데이터셋 분리**

* Train Data -> Train Data + Valid Data

In [25]:
# dataset size

num_tv = sum(train_subset_class_counts)
num_valid = int(0.1 * num_tv)
num_train = num_tv - num_valid

In [26]:
# 데이터셋 분리

from torch.utils.data import random_split

train_dataset, valid_dataset = random_split(train_subset, [num_train, num_valid])

In [27]:
# {0,1,2} -> {0,3,8} Mapping 된 DataLoader 생성

from torch.utils.data import Dataset

class MappedDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, target = self.dataset[idx]
        target_mapping = {0: 0, 3: 1, 8: 2}
        target = target_mapping[target]

        return img, target

In [28]:
# DataLoader 생성

train_dataset_loader = DataLoader(MappedDataset(train_dataset),
                                  batch_size=BATCH_SIZE,
                                  shuffle=True)

valid_dataset_loader = DataLoader(MappedDataset(valid_dataset),
                                  batch_size=BATCH_SIZE,
                                  shuffle=False)

test_dataset_loader = DataLoader(MappedDataset(test_subset),
                                 batch_size=BATCH_SIZE,
                                 shuffle=False)

In [29]:
# {0,3} vs. {8} / {0,8} vs. {3} Split 된 DataLoader 생성

class DualOutputDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, target = self.dataset[idx]
        target1_mapping = {0: 0, 3: 0, 8: 1}
        target2_mapping = {0: 0, 3: 1, 8: 0}

        target1 = target1_mapping[target]
        target2 = target2_mapping[target]

        return img, [target1, target2]

In [30]:
# Dual Output Dataset 생성

train_01_02_dataset_loader = DataLoader(DualOutputDataset(train_dataset),
                                        batch_size=BATCH_SIZE,
                                        shuffle=True)

valid_01_02_dataset_loader = DataLoader(DualOutputDataset(valid_dataset),
                                        batch_size=BATCH_SIZE,
                                        shuffle=False)

test_01_02_dataset_loader = DataLoader(DualOutputDataset(test_subset),
                                       batch_size=BATCH_SIZE,
                                       shuffle=False)

**7. 모델 학습 함수 정의**

In [31]:
MAX_EPOCHS = 9999
EARLY_STOPPING_ROUNDS = 10  # Early Stopping Patience (epochs)

In [32]:
# dual labels -> PyTorch tensor

def convert_to_tensor(labels):
    labels[0] = list(labels[0])
    labels[1] = list(labels[1])
    labels_tensor = torch.tensor(labels)
    labels_tensor = labels_tensor.transpose(0, 1)

    return labels_tensor

In [33]:
# 모델 학습 실시

# args :
# - model           : 학습할 모델
# - train_loader    : Training Data Loader
# - train_loss_list : 각 epoch 에서의 train loss 기록
# - is_dual_head    : 신경망 출력값, 즉 MLP Head 가 2개인지의 여부

# returns :
# - train_loss : 모델의 Train Loss

def run_train(model, train_loader, train_loss_list, is_dual_head, verbose):
    model.train()
    train_loss = 0.0
    train_idx = 0

    for images, labels in train_loader:
        if is_dual_head:
            labels = convert_to_tensor(labels)
        images, labels = images.to(device), labels.to(device)

        # train 실시
        model.optimizer.zero_grad()
        outputs = model(images)

        if is_dual_head:
            loss1 = nn.CrossEntropyLoss()(outputs[0], labels[:, 0])
            loss2 = nn.CrossEntropyLoss()(outputs[1], labels[:, 1])
            loss = loss1 + loss2
        else:
            loss = nn.CrossEntropyLoss()(outputs, labels)

        loss.backward()
        model.optimizer.step()

        train_loss += loss.item()
        train_idx += 1

        if verbose:
            if train_idx % 20 == 0:
                print(f'idx: {train_idx}, loss for current idx: {loss.item():.6f}')

    train_loss_list.append(train_loss / len(train_loader))
    return train_loss_list[-1]

In [34]:
import time

# 모델 validation / test 실시

# args :
# - model                : validation/test 할 모델
# - valid_or_test_loader : validation/test Data Loader
# - is_dual_head         : 신경망 출력값, 즉 MLP Head 가 2개인지의 여부

# returns :
# - accuracy       : 모델의 validation/test 정확도
# - inference_time : inference 시간 (seconds)

def run_valid_or_test(model, valid_or_test_loader, is_dual_head):
    model.eval()
    correct, total = 0, 0
    inference_time_sum = 0.0
    valid_test_idx = 0

    with torch.no_grad():
        for images, labels in valid_or_test_loader:
            if is_dual_head:
                labels = convert_to_tensor(labels)
            images, labels = images.to(device), labels.to(device)

            start_at = time.time()
            outputs = model(images)
            inference_time_sum += time.time() - start_at

            # validation/test 실시 및 정확도 측정
            if is_dual_head:
                _, predicted_1 = torch.max(outputs[0], 1)
                _, predicted_2 = torch.max(outputs[1], 1)
                correct += ((predicted_1 == labels[:, 0]) & (predicted_2 == labels[:, 1])).sum().item()

            else:
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()

            total += labels.size(0)
            valid_test_idx += 1

    accuracy = correct / total
    inference_time = inference_time_sum / valid_test_idx

    return accuracy, inference_time

In [35]:
# 모델 학습 및 validation 전체 프로세스

# args :
# - model         : 학습할 모델
# - train_loader  : Training Data Loader
# - valid_loader  : Valid Data Loader
# - test_loader   : Test Data Loader
# - backbone_name : Backbone 모델 이름 ('resnet18', 'resnet34' or 'resnet50')
# - case_name     : 모델 구조 case 이름 ('1', '2', '3', '4_1' or '4_2')
# - verbose       : 학습 중 프로세스 출력 여부

# returns :
# - final_acc      : 해당 하이퍼파라미터 조합에 대한 최종 Accuracy (valid_acc 이 가장 높았던 epoch 의 모델로 측정)
# - inference_time : inference 시간 (seconds)

def run_model_common(model, train_loader, valid_loader, test_loader,
                     backbone_name, case_name, verbose=False):

    train_loss_list = []       # train loss
    valid_acc_list = []        # valid accuracy
    max_valid_acc = 0.0        # max validation accuracy
    best_valid_acc_epoch = -1  # valid_acc 이 가장 높았던 epoch (여기서는 Loss 가 아닌 Accuracy 기준 Early Stopping)
    best_epoch_model = None    # valid_acc 이 가장 높았던 epoch 의 모델

    is_dual_head = case_name in ['3', '4_1', '4_2']

    # 1. 학습 실시
    for epoch in range(MAX_EPOCHS):

        # 1-1. train model
        train_loss = run_train(model,
                               train_loader,
                               train_loss_list,
                               is_dual_head,
                               verbose=(verbose and epoch == 0))

        # 1-2. validate model (with VALID SET)
        epoch_acc, _ = run_valid_or_test(model, valid_loader, is_dual_head)
        valid_acc_list.append(epoch_acc)

        # 1-3. Early Stopping 처리 (overfitting 방지)
        if epoch_acc > max_valid_acc:
            max_valid_acc = epoch_acc
            best_valid_acc_epoch = epoch

            best_epoch_model = define_model(backbone_name, case_name, device)
            best_epoch_model.load_state_dict(model.state_dict())

            if verbose:
                print('best model updated')

        if epoch - best_valid_acc_epoch >= EARLY_STOPPING_ROUNDS:
            break

        # 1-4. 결과 출력
        if verbose:
            print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    # check best-epoch model correctly loaded
    checked_acc, _ = run_valid_or_test(best_epoch_model, valid_loader, is_dual_head)

    if verbose:
        print(f"Best Epoch: {best_valid_acc_epoch}, Best Valid Acc: {max_valid_acc}")
        print(f"Valid Acc (with Epoch valid set) on Loaded Best Model: {checked_acc}")

    assert abs(max_valid_acc - checked_acc) < 1e-6

    # 2. test best-epoch model (with TEST SET)
    final_acc, inference_time = run_valid_or_test(best_epoch_model, test_loader, is_dual_head)

    if verbose:
        print(f"Final Acc (with TEST set) on Loaded Best Model: {final_acc} (time: {inference_time})")

    return final_acc, inference_time


**8. 학습 실시**

In [36]:
print(device)

cuda


In [37]:
result_dict = {
    'backbone': [],
    'case': [],
    'accuracy': [],
    'inference_time': []
}

dataloader_by_case_name = {
    'dual_head': {
        'train': train_01_02_dataset_loader,
        'valid': valid_01_02_dataset_loader,
        'test': test_01_02_dataset_loader
    },
    'single_head': {
        'train': train_dataset_loader,
        'valid': valid_dataset_loader,
        'test': test_dataset_loader
    }
}

for case_name in ['1', '2', '3', '4_1', '4_2']:
    if case_name in ['3', '4_1', '4_2']:
        head_type = 'dual_head'
    else:
        head_type = 'single_head'

    for backbone_name in ['resnet18', 'resnet34', 'resnet50']:
        print(f'\n=== case={case_name}, backbone={backbone_name} ===')
        model = define_model(backbone_name, case_name, device=device)

        train_loader = dataloader_by_case_name[head_type]['train']
        valid_loader = dataloader_by_case_name[head_type]['valid']
        test_loader = dataloader_by_case_name[head_type]['test']

        final_acc, inference_time = run_model_common(model,
                                                     train_loader=train_loader,
                                                     valid_loader=valid_loader,
                                                     test_loader=test_loader,
                                                     backbone_name=backbone_name,
                                                     case_name=case_name,
                                                     verbose=True)

        result_dict['backbone'].append(backbone_name)
        result_dict['case'].append(case_name)
        result_dict['accuracy'].append(final_acc)
        result_dict['inference_time'].append(inference_time)


=== case=1, backbone=resnet18 ===
idx: 20, loss for current idx: 0.744580
idx: 40, loss for current idx: 0.596468
idx: 60, loss for current idx: 0.616834
idx: 80, loss for current idx: 0.589270
idx: 100, loss for current idx: 0.559304
idx: 120, loss for current idx: 0.618129
idx: 140, loss for current idx: 0.581857
idx: 160, loss for current idx: 0.585129
idx: 180, loss for current idx: 0.553517
idx: 200, loss for current idx: 0.577554
idx: 220, loss for current idx: 0.658458
idx: 240, loss for current idx: 0.588318
idx: 260, loss for current idx: 0.567388
idx: 280, loss for current idx: 0.592293
idx: 300, loss for current idx: 0.553608
idx: 320, loss for current idx: 0.596631
idx: 340, loss for current idx: 0.551694
idx: 360, loss for current idx: 0.594992
idx: 380, loss for current idx: 0.573671
idx: 400, loss for current idx: 0.631738
idx: 420, loss for current idx: 0.553669
idx: 440, loss for current idx: 0.588446
idx: 460, loss for current idx: 0.552554
idx: 480, loss for current

100%|██████████| 83.3M/83.3M [00:00<00:00, 193MB/s]


idx: 20, loss for current idx: 0.651090
idx: 40, loss for current idx: 0.687301
idx: 60, loss for current idx: 0.629467
idx: 80, loss for current idx: 0.661950
idx: 100, loss for current idx: 0.633864
idx: 120, loss for current idx: 0.586457
idx: 140, loss for current idx: 0.609921
idx: 160, loss for current idx: 0.620668
idx: 180, loss for current idx: 0.638501
idx: 200, loss for current idx: 0.583284
idx: 220, loss for current idx: 0.593105
idx: 240, loss for current idx: 0.697983
idx: 260, loss for current idx: 0.609189
idx: 280, loss for current idx: 0.584055
idx: 300, loss for current idx: 0.583040
idx: 320, loss for current idx: 0.616273
idx: 340, loss for current idx: 0.613708
idx: 360, loss for current idx: 0.596287
idx: 380, loss for current idx: 0.614363
idx: 400, loss for current idx: 0.580048
idx: 420, loss for current idx: 0.551555
idx: 440, loss for current idx: 0.560754
idx: 460, loss for current idx: 0.626799
idx: 480, loss for current idx: 0.578607
idx: 500, loss for c

100%|██████████| 97.8M/97.8M [00:00<00:00, 195MB/s]


idx: 20, loss for current idx: 0.765833
idx: 40, loss for current idx: 0.713934
idx: 60, loss for current idx: 0.786545
idx: 80, loss for current idx: 0.714292
idx: 100, loss for current idx: 0.615504
idx: 120, loss for current idx: 0.644406
idx: 140, loss for current idx: 0.597870
idx: 160, loss for current idx: 0.639823
idx: 180, loss for current idx: 0.602238
idx: 200, loss for current idx: 0.612866
idx: 220, loss for current idx: 0.661309
idx: 240, loss for current idx: 0.591121
idx: 260, loss for current idx: 0.639747
idx: 280, loss for current idx: 0.591613
idx: 300, loss for current idx: 0.597998
idx: 320, loss for current idx: 0.635947
idx: 340, loss for current idx: 0.583655
idx: 360, loss for current idx: 0.668444
idx: 380, loss for current idx: 0.617755
idx: 400, loss for current idx: 0.702301
idx: 420, loss for current idx: 0.551445
idx: 440, loss for current idx: 0.553230
idx: 460, loss for current idx: 0.590794
idx: 480, loss for current idx: 0.651021
idx: 500, loss for c

In [38]:
result_df = pd.DataFrame(result_dict)
result_df

Unnamed: 0,backbone,case,accuracy,inference_time
0,resnet18,1,0.994602,0.004782
1,resnet34,1,0.99359,0.010598
2,resnet50,1,0.995951,0.010913
3,resnet18,2,0.99359,0.002491
4,resnet34,2,0.996289,0.004127
5,resnet50,2,0.99224,0.005393
6,resnet18,3,0.996289,0.002437
7,resnet34,3,0.99359,0.004143
8,resnet50,3,0.991228,0.005534
9,resnet18,4_1,0.996964,0.004743


In [39]:
result_df.to_csv('result_experiment_2.csv')