# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [1]:
# 구글 드라이브 마운트, Colab을 이용하지 않는다면 패스해도 됩니다.
from google.colab import drive
drive.mount('/gdrive', force_remount=True)
drive.mount('/content/drive')

Mounted at /gdrive
Mounted at /content/drive


In [None]:
# # -C 옵션으로 원하는 위치에 다운로드를 받도록 설정하였습니다.
# !tar -zxvf /content/drive/MyDrive/fastcampus/dataset/code.tar.gz -C /content/drive/MyDrive/fastcampus/dataset
# !tar -zxvf /content/drive/MyDrive/fastcampus/dataset/data.tar.gz -C /content/drive/MyDrive/fastcampus/dataset

# # requirements.txt 파일, baseline_code 파일 이동 및 기존 경로의 파일 삭제
# !mv /content/baseline_code.ipynb /content/drive/MyDrive/fastcampus/dataset/
# !mv /content/requirements.txt /content/drive/MyDrive/fastcampus/dataset/

In [2]:
# 필요한 라이브러리를 설치합니다.
!pip install timm



## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [3]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

  check_for_updates()


In [4]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [5]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [6]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [None]:
# 평가 함수
def evaluate(loader, model, device):
    model.eval()
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in tqdm(loader):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    accuracy = accuracy_score(targets_list, preds_list)
    f1 = f1_score(targets_list, preds_list, average='macro')
    return {"accuracy": accuracy, "f1_score": f1, "pred_list": preds_list}

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [32]:
# device 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '/content/drive/MyDrive/fastcampus/dataset/data'

# model config
model_name = 'vit_base_patch16_224'  # Document Classification에 성능이 좋은 ViT 모델 선택

# training config
img_size = 224
LR = 1e-4
EPOCHS = 5
BATCH_SIZE = 16
num_workers = 2

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [33]:
# augmentation을 위한 transform 코드 (crop, rotate, mosaic 포함)
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),  # 크기 조정
    A.Rotate(limit=15),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [34]:
# Dataset 정의
trn_dataset = ImageDataset(
    f"{data_path}/train.csv",
    f"{data_path}/train/",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    f"{data_path}/sample_submission.csv",
    f"{data_path}/test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

1570 3140


In [35]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [36]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [37]:
# 학습 및 평가 코드
for epoch in range(EPOCHS):
    print(f"Epoch [{epoch+1}/{EPOCHS}]")
    train_metrics = train_one_epoch(trn_loader, model, optimizer, loss_fn, device)
    print(f"Train Loss: {train_metrics['train_loss']:.4f}, "
          f"Train Acc: {train_metrics['train_acc']:.4f}, "
          f"Train F1: {train_metrics['train_f1']:.4f}")

Epoch [1/5]


Loss: 0.0047: 100%|██████████| 99/99 [07:33<00:00,  4.58s/it]


Train Loss: 1.1315, Train Acc: 0.6134, Train F1: 0.5709
Epoch [2/5]


Loss: 0.0041: 100%|██████████| 99/99 [00:58<00:00,  1.68it/s]


Train Loss: 0.5122, Train Acc: 0.8089, Train F1: 0.7878
Epoch [3/5]


Loss: 0.0430: 100%|██████████| 99/99 [00:57<00:00,  1.71it/s]


Train Loss: 0.3810, Train Acc: 0.8535, Train F1: 0.8437
Epoch [4/5]


Loss: 0.0064: 100%|██████████| 99/99 [00:57<00:00,  1.72it/s]


Train Loss: 0.3057, Train Acc: 0.8879, Train F1: 0.8768
Epoch [5/5]


Loss: 0.5505: 100%|██████████| 99/99 [00:57<00:00,  1.71it/s]

Train Loss: 0.2189, Train Acc: 0.9134, Train F1: 0.9099





# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [42]:
# 최종 평가
eval_metrics = evaluate(tst_loader, model, device)
print(f"Test Accuracy: {eval_metrics['accuracy']:.4f}, Test F1 Score: {eval_metrics['f1_score']:.4f}")

100%|██████████| 197/197 [34:51<00:00, 10.62s/it]

Test Accuracy: 0.0723, Test F1 Score: 0.0079





In [43]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = eval_metrics['pred_list']

In [44]:
sample_submission_df = pd.read_csv(f"{data_path}/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [45]:
pred_df.to_csv(f"{data_path}/pred.csv", index=False)

In [46]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,9
3,00471f8038d9c4b6.jpg,6
4,00901f504008d884.jpg,2
