## Import Libraries

In [1]:
from os import path, mkdir

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import ConcatDataset

import torchvision
import torchvision.models
import torchvision.transforms.v2 as transforms

import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

### Check GPU Availability

In [2]:
!nvidia-smi

Wed Aug 21 20:17:19 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   40C    P0    34W / 250W |    877MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   38C    P0    25W / 250W |      2MiB / 16280MiB |      0%      Default |
|       

In [3]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 4

device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device(f"cuda:{DEVICE_NUM}")
print("INFO: Using device -", device)

INFO: Using device - cuda:4


## Load DataSets

In [4]:
from typing import Callable, Optional
from torchvision.datasets.utils import download_and_extract_archive

torchvision.datasets.utils.tqdm = tqdm


class FoodImageDataset(torchvision.datasets.ImageFolder):
    download_url = "https://daiv-cnu.duckdns.org/contest/ai_competition[2024]_basic/dataset/datasets.zip"

    def __init__(self, root: str, force_download: bool = True, train: bool = True, valid: bool = False, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None):
        self.download(root, force=force_download)

        if train:
            if valid:
                root = path.join(root, "valid")
            else:
                root = path.join(root, "train")
        else:
            root = path.join(root, "test")

        super().__init__(root=root, transform=transform, target_transform=target_transform)

    @classmethod
    def download(cls, root: str, force: bool = False):
        if force or not path.isfile(path.join(root, "datasets.zip")):
            download_and_extract_archive(cls.download_url, download_root=root, extract_root=root, filename="datasets.zip")
            print("INFO: Dataset archive downloaded and extracted.")
        else:
            print("INFO: Dataset archive found in the root directory. Skipping download.")

In [5]:
IMG_SIZE = (512, 512)
IMG_NORM = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
CLASS_LABELS = 11
DATA_ROOT = path.join(".", "data")

In [6]:
# 기본 변환 (리사이징 및 정규화만)
basic_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToImage(), 
    transforms.ToDtype(torch.float32, scale=True),
    transforms.Normalize(**IMG_NORM)
])

# TODO: Compose 안 부분 원하는 증강을 추가(현재는 AutoAugment)
augment_transforms = [
    transforms.Compose([transforms.AugMix(), basic_transform]),
    transforms.Compose([transforms.RandAugment(), basic_transform]),
    transforms.Compose([transforms.TrivialAugmentWide(), basic_transform]),
]

In [7]:
# 기본적인 변환만 적용한 dataset
original_train_dataset = FoodImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=basic_transform)

# Custom Augmentation을 적용한 dataset
augmented_train_datasets = [
    FoodImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=aug_transform) 
    for aug_transform in augment_transforms
]

train_dataset = ConcatDataset([original_train_dataset] + augmented_train_datasets)
valid_dataset = FoodImageDataset(root=DATA_ROOT, force_download=False, valid=True, transform=basic_transform)
test_dataset = FoodImageDataset(root=DATA_ROOT, force_download=False, train=False, transform=basic_transform)
print(f"INFO: Dataset loaded successfully. Number of samples - Train({len(train_dataset)}), Valid({len(valid_dataset)}), Test({len(test_dataset)})")

INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset loaded successfully. Number of samples - Train(39464), Valid(3430), Test(3347)


## DataLoader

In [8]:
# Set Batch Size
BATCH_SIZE = 16

In [9]:
MULTI_PROCESSING = True  # Set False if DataLoader is causing issues

from platform import system
if MULTI_PROCESSING and system() != "Windows":  # Multiprocess data loading is not supported on Windows
    import multiprocessing
    cpu_cores = multiprocessing.cpu_count()
    print(f"INFO: Number of CPU cores - {cpu_cores}")
else:
    cpu_cores = 0
    print("INFO: Using DataLoader without multi-processing.")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)

INFO: Number of CPU cores - 48


## Define Model

### 0. ResNet With Attention

In [10]:
class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.query = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.key = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.value = nn.Conv2d(in_channels, in_channels, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        batch_size, C, H, W = x.size()
        query = self.query(x).view(batch_size, -1, H * W).permute(0, 2, 1)
        key = self.key(x).view(batch_size, -1, H * W)
        energy = torch.bmm(query, key)
        attention = torch.softmax(energy, dim=-1)
        value = self.value(x).view(batch_size, -1, H * W)
        out = torch.bmm(value, attention.permute(0, 2, 1))
        out = out.view(batch_size, C, H, W)
        return self.gamma * out + x

In [11]:
class ResNetA(nn.Module):
    def __init__(self, num_classes=11):
        super().__init__()
        
        # ResNet50을 기반 모델로 사용
        self.resnet = torchvision.models.resnet50(weights=None)
        
        # 입력 이미지 크기에 맞게 첫 번째 컨볼루션 레이어 조정
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # ResNet의 마지막 fully connected 레이어 제거
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-2])
        
        # Self-Attention 레이어 추가
        self.attention = SelfAttention(2048)
        
        # Global Average Pooling
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Fully Connected 레이어
        self.fc1 = nn.Linear(2048, 512)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.resnet(x)
        x = self.attention(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x


### Initialize Model

In [12]:
model = ResNetA(num_classes=CLASS_LABELS)

# 저장된 모델을 load할 지 설정
# 저장된 모델을 load하고 싶을 시 -> True
load_model = True
model_id = "ResNetA50"

# load하고 싶은 모델 정보 입력
model_info = "resnet50_with_attention__acc__0.817784"

if load_model:    
    model.load_state_dict(torch.load(path.join(".", "models", f"{model_info}.pt")))
    model.to(device)
else:
    model.to(device)

In [13]:
LEARNING_RATE = 1e-4

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

## Training Loop

In [14]:
from IPython.display import display
import ipywidgets as widgets

# Interactive Loss Plot Update
def create_plot():
    losses = []

    # Enable Interactive Mode
    plt.ion()

    # Loss Plot Setting
    fig, ax = plt.subplots(figsize=(6, 2))
    line, = ax.plot(losses)
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    ax.set_title("Cross Entropy Loss")

    # Display Plot
    plot = widgets.Output()
    display(plot)

    def update_plot(new_loss):
        losses.append(new_loss.item())
        line.set_ydata(losses)
        line.set_xdata(range(len(losses)))
        ax.relim()
        ax.autoscale_view()
        with plot:
            plot.clear_output(wait=True)
            display(fig)

    return update_plot

In [16]:
def save_model(model_id: str, acc: float, epoch: int) -> None:
    dir_path = path.join(".", "models", "backup")
    if not path.isdir(dir_path):
        mkdir(dir_path)
    
    model_info = f"{epoch:002}__{model_id}__acc__{acc:.6f}"
    save_path = path.join(dir_path, f"{model_info}.pt")
    torch.save(model.state_dict(), save_path)

In [17]:
# Set Epoch Count
num_epochs = 30

# Data Augmentation - cutmix 
cutmix = transforms.CutMix(num_classes=11)

In [18]:
train_length, valid_length = map(len, (train_loader, valid_loader))
cutmix_prob = 0.666

epochs = tqdm(range(num_epochs), desc="Running Epochs")
with (tqdm(total=train_length, desc="Training") as train_progress,
      tqdm(total=valid_length, desc="Validation") as valid_progress):
    
    update = create_plot()  # Create Loss Plot

    for epoch in epochs:
        train_progress.reset(total=train_length)
        valid_progress.reset(total=valid_length)

        # Training
        model.train()
        for i, (inputs, targets) in enumerate(train_loader):
            optimizer.zero_grad()

            # 일부 데이터에만 CutMix 적용
            if torch.rand(1).item() < cutmix_prob:
                inputs, targets = cutmix(inputs, targets)

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            update(loss)
            train_progress.update(1)

        val_acc, val_loss = 0, 0

        # Validation
        model.eval()
        with torch.no_grad():
            for i, (inputs, targets) in enumerate(valid_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)

                val_loss += criterion(outputs, targets).item() / valid_length
                val_acc += (torch.max(outputs, 1)[1] == targets.data).sum() / len(valid_dataset)
                valid_progress.update(1)

        print(f"\rEpoch [{epoch+1:2}/{num_epochs}], Step [{train_length}/{train_length}], Loss: {loss.item():.6f}, Valid Acc: {val_acc:.6%}, Valid Loss: {val_loss:.6f}", end="\n" if (epoch+1) % 5 == 0 or (epoch+1) == num_epochs else "")
        save_model(model_id=model_id, acc=val_acc, epoch=epoch+1)


Running Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/2467 [00:00<?, ?it/s]

Validation:   0%|          | 0/215 [00:00<?, ?it/s]

Output()

## Weights Save

In [18]:
if not path.isdir(path.join(".", "models")):
    mkdir(path.join(".", "models"))

model_info = f"{model_id}__acc__{val_acc:.6f}"

save_path = path.join(".", "models", f"{model_info}.pt")
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

Model saved to ./models/resnet50_with_attention__acc__0.804081.pt


# Model Evaluation

In [20]:
results = dict(id=[], label=[])
test_length = len(test_dataset)

# train_dataset에서 'classes' 속성을 얻기 위해 첫 번째 데이터셋 참조
train_classes = train_dataset.datasets[0].classes

model.to(device)
model.eval()
with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        
        # test_dataset에서 'classes' 속성을 얻음
        test_classes = test_dataset.classes
        
        results['id'] += [test_classes[i] for i in ids]
        results['label'] += [train_classes[i] for i in preds.cpu().detach().numpy().tolist()]

  0%|          | 0/210 [00:00<?, ?it/s]

In [21]:
# Save Results
results_df = pd.DataFrame(results)

submission_dir = "submissions"
if not path.isdir(submission_dir):
    mkdir(submission_dir)

submit_file_path = path.join(submission_dir, f"{model_info}.csv")
results_df.to_csv(submit_file_path, index=False)
print("File saved to", submit_file_path)

results_df.head()

File saved to submissions/resnet50_with_attention__acc__0.817784__ 7.csv


Unnamed: 0,id,label
0,TEST_0000,Dessert
1,TEST_0001,Dairy product
2,TEST_0002,Egg
3,TEST_0003,Meat
4,TEST_0004,Fried food
