# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.cuda.amp import GradScaler, autocast

In [2]:
# 시드를 고정합니다.
SEED = 2024
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# EarlyStopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(model)
        elif score < self.best_score:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(model)
            self.counter = 0

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), 'checkpoint.pt')

In [4]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert('RGB'))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [5]:
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            preds = model(image)
            loss = loss_fn(preds, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [6]:
# 검증을 위한 함수입니다.
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [7]:
# device 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'data/'

# model config
model_name = 'efficientnet_b0'  # 'resnet50', 'efficientnet_b0', ...

# training config
img_size = 224
LR = 0.0009034781909016593
EPOCHS = 50
BATCH_SIZE = 16
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [8]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [9]:
# 데이터셋 로드 및 분할
train_df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=SEED, stratify=train_df['target'])

In [10]:
# Dataset 정의
trn_dataset = ImageDataset(
    train_data,
    os.path.join(data_path, "train/"),
    transform=trn_transform
)
val_dataset = ImageDataset(
    val_data,
    os.path.join(data_path, "train/"),
    transform=tst_transform
)
tst_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
tst_dataset = ImageDataset(
    tst_df,
    os.path.join(data_path, "test/"),
    transform=tst_transform
)
print(f'Train dataset size: {len(trn_dataset)}')
print(f'Validation dataset size: {len(val_dataset)}')
print(f'Test dataset size: {len(tst_dataset)}')

Train dataset size: 1256
Validation dataset size: 314
Test dataset size: 3140


In [11]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [12]:
# 모델 정의
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)

# Loss 함수, 옵티마이저, 스케줄러 정의
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
scaler = GradScaler()

# EarlyStopping 객체 생성
early_stopping = EarlyStopping(patience=5, verbose=True)

In [13]:
# 학습 루프
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device, scaler=scaler)
    val_ret = validate(val_loader, model, loss_fn, device=device)
    scheduler.step()

    # 로그 출력
    log = f"Epoch {epoch+1}/{EPOCHS}\n"
    for k, v in ret.items():
        log += f"{k}: {v:.4f}  "
    for k, v in val_ret.items():
        log += f"{k}: {v:.4f}  "
    print(log)

    # Early Stopping 체크
    early_stopping(val_ret['val_loss'], model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

Loss: 0.8714: 100%|██████████| 79/79 [00:11<00:00,  6.99it/s]


Epoch 1/50
train_loss: 1.3042  train_acc: 0.6218  train_f1: 0.5977  val_loss: 0.5449  val_acc: 0.8312  val_f1: 0.8124  


Loss: 0.7548: 100%|██████████| 79/79 [00:07<00:00,  9.92it/s]


Epoch 2/50
train_loss: 0.5797  train_acc: 0.8105  train_f1: 0.7929  val_loss: 0.3977  val_acc: 0.8822  val_f1: 0.8654  


Loss: 0.6800: 100%|██████████| 79/79 [00:08<00:00,  9.80it/s]


Epoch 3/50
train_loss: 0.3705  train_acc: 0.8694  train_f1: 0.8621  val_loss: 0.5651  val_acc: 0.8567  val_f1: 0.8370  
EarlyStopping counter: 1 out of 5


Loss: 0.2073: 100%|██████████| 79/79 [00:07<00:00, 10.09it/s]


Epoch 4/50
train_loss: 0.4209  train_acc: 0.8718  train_f1: 0.8600  val_loss: 0.4010  val_acc: 0.8726  val_f1: 0.8570  
EarlyStopping counter: 2 out of 5


Loss: 0.4214: 100%|██████████| 79/79 [00:07<00:00, 10.05it/s]


Epoch 5/50
train_loss: 0.3060  train_acc: 0.8949  train_f1: 0.8877  val_loss: 0.3579  val_acc: 0.8822  val_f1: 0.8685  


Loss: 0.6020: 100%|██████████| 79/79 [00:08<00:00,  9.80it/s]


Epoch 6/50
train_loss: 0.3028  train_acc: 0.8989  train_f1: 0.8940  val_loss: 0.4810  val_acc: 0.8567  val_f1: 0.8423  
EarlyStopping counter: 1 out of 5


Loss: 0.8850: 100%|██████████| 79/79 [00:08<00:00,  9.86it/s]


Epoch 7/50
train_loss: 0.2756  train_acc: 0.9180  train_f1: 0.9139  val_loss: 0.3121  val_acc: 0.9013  val_f1: 0.8902  


Loss: 0.1014: 100%|██████████| 79/79 [00:08<00:00,  9.86it/s]


Epoch 8/50
train_loss: 0.1887  train_acc: 0.9395  train_f1: 0.9350  val_loss: 0.3475  val_acc: 0.8949  val_f1: 0.8868  
EarlyStopping counter: 1 out of 5


Loss: 0.0059: 100%|██████████| 79/79 [00:08<00:00,  9.74it/s]


Epoch 9/50
train_loss: 0.2425  train_acc: 0.9116  train_f1: 0.9031  val_loss: 0.7976  val_acc: 0.8854  val_f1: 0.8779  
EarlyStopping counter: 2 out of 5


Loss: 0.0166: 100%|██████████| 79/79 [00:07<00:00, 10.00it/s]


Epoch 10/50
train_loss: 0.1912  train_acc: 0.9291  train_f1: 0.9240  val_loss: 0.4055  val_acc: 0.9204  val_f1: 0.9095  
EarlyStopping counter: 3 out of 5


Loss: 0.1288: 100%|██████████| 79/79 [00:08<00:00,  9.73it/s]


Epoch 11/50
train_loss: 0.1586  train_acc: 0.9467  train_f1: 0.9445  val_loss: 0.3082  val_acc: 0.9108  val_f1: 0.9012  


Loss: 0.0332: 100%|██████████| 79/79 [00:08<00:00,  9.77it/s]


Epoch 12/50
train_loss: 0.1436  train_acc: 0.9514  train_f1: 0.9508  val_loss: 0.3464  val_acc: 0.9236  val_f1: 0.9191  
EarlyStopping counter: 1 out of 5


Loss: 0.6863: 100%|██████████| 79/79 [00:07<00:00,  9.96it/s]


Epoch 13/50
train_loss: 0.1296  train_acc: 0.9554  train_f1: 0.9538  val_loss: 0.2924  val_acc: 0.9140  val_f1: 0.9117  


Loss: 0.3172: 100%|██████████| 79/79 [00:07<00:00,  9.98it/s]


Epoch 14/50
train_loss: 0.1067  train_acc: 0.9642  train_f1: 0.9625  val_loss: 0.3012  val_acc: 0.9172  val_f1: 0.9121  
EarlyStopping counter: 1 out of 5


Loss: 0.2429: 100%|██████████| 79/79 [00:08<00:00,  9.80it/s]


Epoch 15/50
train_loss: 0.1255  train_acc: 0.9602  train_f1: 0.9579  val_loss: 0.3895  val_acc: 0.8981  val_f1: 0.8990  
EarlyStopping counter: 2 out of 5


Loss: 0.1445: 100%|██████████| 79/79 [00:07<00:00,  9.95it/s]


Epoch 16/50
train_loss: 0.0820  train_acc: 0.9721  train_f1: 0.9720  val_loss: 0.4346  val_acc: 0.9013  val_f1: 0.8968  
EarlyStopping counter: 3 out of 5


Loss: 0.0527: 100%|██████████| 79/79 [00:08<00:00,  9.87it/s]


Epoch 17/50
train_loss: 0.1012  train_acc: 0.9705  train_f1: 0.9689  val_loss: 0.3960  val_acc: 0.9076  val_f1: 0.9012  
EarlyStopping counter: 4 out of 5


Loss: 0.0003: 100%|██████████| 79/79 [00:08<00:00,  9.86it/s]


Epoch 18/50
train_loss: 0.0755  train_acc: 0.9721  train_f1: 0.9713  val_loss: 0.3682  val_acc: 0.9045  val_f1: 0.9031  
EarlyStopping counter: 5 out of 5
Early stopping


# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [14]:
# 테스트 데이터에 대한 예측
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 197/197 [00:13<00:00, 14.51it/s]


In [15]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [16]:
sample_submission_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [17]:
pred_df.to_csv("pred.csv", index=False)

In [18]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,6
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,10
4,00901f504008d884.jpg,2
