# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.cuda.amp import GradScaler, autocast

In [2]:
# 시드를 고정합니다.
SEED = 2024
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [None]:
# EarlyStopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(model)
        elif score < self.best_score:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(model)
            self.counter = 0

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), 'checkpoint.pt')

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert('RGB'))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            preds = model(image)
            loss = loss_fn(preds, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [None]:
# 검증을 위한 함수입니다.
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'data/'

# model config
model_name = 'efficientnet_b3'  # 'resnet50', 'efficientnet_b0', ...

# training config
img_size = 224
LR = 1e-4
EPOCHS = 50
BATCH_SIZE = 16
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# 데이터셋 로드 및 분할
train_df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=SEED, stratify=train_df['target'])

In [7]:
# Dataset 정의
trn_dataset = ImageDataset(
    train_data,
    os.path.join(data_path, "train/"),
    transform=trn_transform
)
val_dataset = ImageDataset(
    val_data,
    os.path.join(data_path, "train/"),
    transform=tst_transform
)
tst_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
tst_dataset = ImageDataset(
    tst_df,
    os.path.join(data_path, "test/"),
    transform=tst_transform
)
print(f'Train dataset size: {len(trn_dataset)}')
print(f'Validation dataset size: {len(val_dataset)}')
print(f'Test dataset size: {len(tst_dataset)}')

1570 3140


In [8]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [9]:
# 모델 정의
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)

# Loss 함수, 옵티마이저, 스케줄러 정의
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
scaler = GradScaler()

# EarlyStopping 객체 생성
early_stopping = EarlyStopping(patience=5, verbose=True)

In [10]:
# 학습 루프
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device, scaler=scaler)
    val_ret = validate(val_loader, model, loss_fn, device=device)
    scheduler.step()

    # 로그 출력
    log = f"Epoch {epoch+1}/{EPOCHS}\n"
    for k, v in ret.items():
        log += f"{k}: {v:.4f}  "
    for k, v in val_ret.items():
        log += f"{k}: {v:.4f}  "
    print(log)

    # Early Stopping 체크
    early_stopping(val_ret['val_loss'], model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

Loss: 3.0239: 100%|██████████| 50/50 [00:08<00:00,  6.20it/s]


train_loss: 2.6095
train_acc: 0.1955
train_f1: 0.1536
epoch: 0.0000



Loss: 2.9590: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s]


train_loss: 2.0702
train_acc: 0.3981
train_f1: 0.3291
epoch: 1.0000



Loss: 2.5543: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s]


train_loss: 1.6589
train_acc: 0.5140
train_f1: 0.4468
epoch: 2.0000



Loss: 2.7678: 100%|██████████| 50/50 [00:06<00:00,  8.22it/s]


train_loss: 1.3199
train_acc: 0.5981
train_f1: 0.5546
epoch: 3.0000



Loss: 3.6605: 100%|██████████| 50/50 [00:06<00:00,  8.12it/s]


train_loss: 1.0680
train_acc: 0.6682
train_f1: 0.6341
epoch: 4.0000



Loss: 2.8290: 100%|██████████| 50/50 [00:06<00:00,  8.19it/s]


train_loss: 0.9847
train_acc: 0.6764
train_f1: 0.6457
epoch: 5.0000



Loss: 3.0303: 100%|██████████| 50/50 [00:06<00:00,  8.16it/s]


train_loss: 1.0150
train_acc: 0.6650
train_f1: 0.6349
epoch: 6.0000



Loss: 2.3271: 100%|██████████| 50/50 [00:06<00:00,  8.13it/s]


train_loss: 0.9654
train_acc: 0.6879
train_f1: 0.6625
epoch: 7.0000



Loss: 2.5383: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s]


train_loss: 0.7560
train_acc: 0.7643
train_f1: 0.7410
epoch: 8.0000



Loss: 3.3006: 100%|██████████| 50/50 [00:06<00:00,  8.07it/s]


train_loss: 1.0055
train_acc: 0.6777
train_f1: 0.6522
epoch: 9.0000



Loss: 2.4053: 100%|██████████| 50/50 [00:06<00:00,  8.16it/s]


train_loss: 0.8383
train_acc: 0.7178
train_f1: 0.6897
epoch: 10.0000



Loss: 1.9737: 100%|██████████| 50/50 [00:06<00:00,  8.26it/s]


train_loss: 0.6676
train_acc: 0.7777
train_f1: 0.7575
epoch: 11.0000



Loss: 2.7727: 100%|██████████| 50/50 [00:06<00:00,  8.22it/s]


train_loss: 0.6078
train_acc: 0.7936
train_f1: 0.7782
epoch: 12.0000



Loss: 2.9521: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s]


train_loss: 0.5146
train_acc: 0.8478
train_f1: 0.8325
epoch: 13.0000



Loss: 2.0341: 100%|██████████| 50/50 [00:06<00:00,  8.19it/s]


train_loss: 0.4551
train_acc: 0.8682
train_f1: 0.8593
epoch: 14.0000



Loss: 3.2889: 100%|██████████| 50/50 [00:06<00:00,  8.27it/s]


train_loss: 0.4305
train_acc: 0.8790
train_f1: 0.8697
epoch: 15.0000



Loss: 2.2498: 100%|██████████| 50/50 [00:06<00:00,  8.24it/s]


train_loss: 0.5522
train_acc: 0.8255
train_f1: 0.8105
epoch: 16.0000



Loss: 3.1394: 100%|██████████| 50/50 [00:06<00:00,  8.29it/s]


train_loss: 0.9039
train_acc: 0.7255
train_f1: 0.7096
epoch: 17.0000



Loss: 2.4328: 100%|██████████| 50/50 [00:06<00:00,  8.21it/s]


train_loss: 0.7970
train_acc: 0.7478
train_f1: 0.7304
epoch: 18.0000



Loss: 3.2207: 100%|██████████| 50/50 [00:06<00:00,  8.23it/s]

train_loss: 0.6032
train_acc: 0.8242
train_f1: 0.8117
epoch: 19.0000






# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [11]:
# 테스트 데이터에 대한 예측
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:10<00:00,  9.14it/s]


In [12]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [13]:
sample_submission_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [14]:
pred_df.to_csv("pred.csv", index=False)

In [15]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,10
2,00396fbc1f6cc21d.jpg,9
3,00471f8038d9c4b6.jpg,10
4,00901f504008d884.jpg,2
