# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import multiprocessing

num_workers = multiprocessing.cpu_count()  # 시스템의 CPU 코어 수를 가져옴

In [2]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.cuda.amp import GradScaler, autocast

# Optuna 임포트
import optuna
from optuna.integration import PyTorchLightningPruningCallback

In [3]:
# 시드를 고정합니다.
SEED = 2024
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [4]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert('RGB'))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [5]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader, leave=False)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            preds = model(image)
            loss = loss_fn(preds, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [6]:
# 검증을 위한 함수입니다.
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [7]:
# device 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 데이터셋 로드
data_path = 'data/'
train_df = pd.read_csv(os.path.join(data_path, "train.csv"))
tst_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))

# 테스트 데이터셋 정의
tst_dataset = ImageDataset(
    tst_df,
    os.path.join(data_path, "test/"),
    transform=None  # 나중에 설정
)

In [8]:
# Optuna 목적 함수 정의
def objective(trial):
    # 하이퍼파라미터 탐색 공간 정의
    model_name = trial.suggest_categorical('model_name', ['resnet50', 'efficientnet_b0', 'efficientnet_b3'])
    img_size = trial.suggest_categorical('img_size', [224, 256, 299])
    batch_size = trial.suggest_categorical('batch_size', [16, 32])
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    epochs = 10  # 하이퍼파라미터 탐색 시에는 에포크 수를 적게 설정

    # 데이터 변환 정의
    trn_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.RandomRotate90(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(p=0.5),
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    tst_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터셋 분할
    train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=SEED, stratify=train_df['target'])

    # Dataset 정의
    trn_dataset = ImageDataset(
        train_data,
        os.path.join(data_path, "train/"),
        transform=trn_transform
    )
    val_dataset = ImageDataset(
        val_data,
        os.path.join(data_path, "train/"),
        transform=tst_transform
    )

    # DataLoader 정의
    trn_loader = DataLoader(
        trn_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )

    # 모델 정의
    model = timm.create_model(
        model_name,
        pretrained=True,
        num_classes=17
    ).to(device)

    # Loss 함수, 옵티마이저, 스케줄러 정의
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = GradScaler()

    # 학습 루프
    for epoch in range(epochs):
        ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device, scaler=scaler)
        val_ret = validate(val_loader, model, loss_fn, device=device)
        scheduler.step()

        # Optuna에 중간 결과 보고
        trial.report(val_ret['val_f1'], epoch)

        # Pruning 여부 확인
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_ret['val_f1']  # 최대화할 목표: val_f1

In [9]:
# Optuna 스터디 생성 및 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# 최적의 하이퍼파라미터 출력
print('Best trial:')
trial = study.best_trial

print(f'  Val F1 Score: {trial.value}')
print('  Best hyperparameters:')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

# 최적의 하이퍼파라미터로 최종 모델 학습
best_params = trial.params

# 최적의 하이퍼파라미터 설정
model_name = best_params['model_name']
img_size = best_params['img_size']
batch_size = best_params['batch_size']
lr = best_params['lr']
weight_decay = best_params['weight_decay']
epochs = 30  # 최종 학습 시에는 더 많은 에포크 사용

[I 2024-10-31 08:59:30,036] A new study created in memory with name: no-name-c247f7ea-68ca-44c7-a73b-117a637099cc
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
[I 2024-10-31 09:00:39,766] Trial 0 finished with value: 0.7472897314758593 and parameters: {'model_name': 'efficientnet_b0', 'img_size': 299, 'batch_size': 32, 'lr': 2.6728860930060958e-05, 'weight_decay': 0.0001247288615611562}. Best is trial 0 with value: 0.7472897314758593.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
[I 2024-10-31 09:01:48,551] Trial 1 finished with value: 0.8565063671862352 and parameters: {'model_name': 'resnet50', 'img_size': 224, 'batch_size': 16, 'lr': 0.0002034089440817232, 'weight_decay': 0.0005272902597367718}. Best is trial 1 with value: 0.8565063671862352.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  weight_decay = trial.suggest_loguniform('wei

Best trial:
  Val F1 Score: 0.931033255030663
  Best hyperparameters:
    model_name: efficientnet_b0
    img_size: 224
    batch_size: 16
    lr: 0.0009034781909016593
    weight_decay: 2.1660707207344826e-05


## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [10]:
# 데이터 변환 재정의
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [11]:
# 전체 학습 데이터로 재학습
trn_dataset = ImageDataset(
    train_df,
    os.path.join(data_path, "train/"),
    transform=trn_transform
)

trn_loader = DataLoader(
    trn_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)

tst_dataset.transform = tst_transform

In [12]:
# 모델 정의
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)

# Loss 함수, 옵티마이저, 스케줄러 정의
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
scaler = GradScaler()

In [13]:
# 학습 루프
for epoch in range(epochs):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device, scaler=scaler)
    scheduler.step()

    # 로그 출력
    log = f"Epoch {epoch+1}/{epochs}\n"
    for k, v in ret.items():
        log += f"{k}: {v:.4f}  "
    print(log)

# 테스트 데이터에 대한 예측
tst_loader = DataLoader(
    tst_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

preds_list = []

model.eval()

                                                             

Epoch 1/30
train_loss: 1.3218  train_acc: 0.6210  train_f1: 0.5898  


                                                             

Epoch 2/30
train_loss: 0.6991  train_acc: 0.7790  train_f1: 0.7573  


                                                             

Epoch 3/30
train_loss: 0.4975  train_acc: 0.8350  train_f1: 0.8169  


                                                             

Epoch 4/30
train_loss: 0.3401  train_acc: 0.8790  train_f1: 0.8721  


                                                             

Epoch 5/30
train_loss: 0.3069  train_acc: 0.9025  train_f1: 0.8970  


                                                             

Epoch 6/30
train_loss: 0.2328  train_acc: 0.9197  train_f1: 0.9149  


                                                             

Epoch 7/30
train_loss: 0.2523  train_acc: 0.9172  train_f1: 0.9098  


                                                             

Epoch 8/30
train_loss: 0.2205  train_acc: 0.9255  train_f1: 0.9224  


                                                             

Epoch 9/30
train_loss: 0.1953  train_acc: 0.9325  train_f1: 0.9275  


                                                             

Epoch 10/30
train_loss: 0.1764  train_acc: 0.9408  train_f1: 0.9364  


                                                             

Epoch 11/30
train_loss: 0.1507  train_acc: 0.9497  train_f1: 0.9505  


                                                             

Epoch 12/30
train_loss: 0.1595  train_acc: 0.9382  train_f1: 0.9350  


                                                             

Epoch 13/30
train_loss: 0.1146  train_acc: 0.9561  train_f1: 0.9522  


                                                             

Epoch 14/30
train_loss: 0.1031  train_acc: 0.9624  train_f1: 0.9606  


                                                             

Epoch 15/30
train_loss: 0.0878  train_acc: 0.9643  train_f1: 0.9621  


                                                             

Epoch 16/30
train_loss: 0.0680  train_acc: 0.9752  train_f1: 0.9734  


                                                             

Epoch 17/30
train_loss: 0.0689  train_acc: 0.9752  train_f1: 0.9748  


                                                             

Epoch 18/30
train_loss: 0.0514  train_acc: 0.9809  train_f1: 0.9801  


                                                             

Epoch 19/30
train_loss: 0.0459  train_acc: 0.9860  train_f1: 0.9856  


                                                             

Epoch 20/30
train_loss: 0.0336  train_acc: 0.9911  train_f1: 0.9901  


                                                             

Epoch 21/30
train_loss: 0.0380  train_acc: 0.9885  train_f1: 0.9881  


                                                             

Epoch 22/30
train_loss: 0.0365  train_acc: 0.9866  train_f1: 0.9855  


                                                             

Epoch 23/30
train_loss: 0.0216  train_acc: 0.9924  train_f1: 0.9919  


                                                             

Epoch 24/30
train_loss: 0.0254  train_acc: 0.9943  train_f1: 0.9944  


                                                             

Epoch 25/30
train_loss: 0.0215  train_acc: 0.9917  train_f1: 0.9911  


                                                             

Epoch 26/30
train_loss: 0.0151  train_acc: 0.9949  train_f1: 0.9946  


                                                             

Epoch 27/30
train_loss: 0.0186  train_acc: 0.9955  train_f1: 0.9953  


                                                             

Epoch 28/30
train_loss: 0.0164  train_acc: 0.9949  train_f1: 0.9941  


                                                             

Epoch 29/30
train_loss: 0.0162  train_acc: 0.9955  train_f1: 0.9956  


                                                             

Epoch 30/30
train_loss: 0.0122  train_acc: 0.9968  train_f1: 0.9968  




EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
      

In [14]:
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 197/197 [00:06<00:00, 32.25it/s] 


In [15]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [16]:
sample_submission_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [17]:
pred_df.to_csv("pred.csv", index=False)