# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [1]:
# 구글 드라이브 마운트, Colab을 이용하지 않는다면 패스해도 됩니다.
# from google.colab import drive
# drive.mount('/gdrive', force_remount=True)
# drive.mount('/content/drive')

Mounted at /gdrive
Mounted at /content/drive


In [2]:
# 구글 드라이브에 업로드된 대회 데이터를 압축 해제하고 로컬에 저장합니다.
# !tar -xvf drive/MyDrive/datasets_fin.tar > /dev/null

In [3]:
# 필요한 라이브러리를 설치합니다.
# !pip install timm

Collecting timm
  Downloading timm-0.9.10-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.9.10


## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '../datasets_fin/'

# model config
model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 128
LR = 1e-3
EPOCHS = 15
BATCH_SIZE = 32
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [7]:
# Dataset 정의
trn_dataset = ImageDataset(
    "../datasets_fin/train.csv",
    "../datasets_fin/train/",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    "../datasets_fin/sample_submission.csv",
    "../datasets_fin/test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

1570 3140


In [8]:
trn_dataset.df

array([['002f99746285dfdd.jpg', 16],
       ['008ccd231e1fea5d.jpg', 10],
       ['008f5911bfda7695.jpg', 10],
       ...,
       ['ff51dd281a8423f1.jpg', 11],
       ['ff8a6a251ce51c95.jpg', 5],
       ['ffc22136f958deb1.jpg', 9]], dtype=object)

In [50]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    # shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [51]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [52]:
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 1.9088: 100%|██████████| 50/50 [00:06<00:00,  7.99it/s]


train_loss: 1.7841
train_acc: 0.5452
train_f1: 0.4737
epoch: 0.0000



Loss: 2.7095: 100%|██████████| 50/50 [00:06<00:00,  7.97it/s]


train_loss: 0.5263
train_acc: 0.8567
train_f1: 0.8235
epoch: 1.0000



Loss: 1.3883: 100%|██████████| 50/50 [00:06<00:00,  8.01it/s]


train_loss: 0.2646
train_acc: 0.9115
train_f1: 0.8942
epoch: 2.0000



Loss: 1.3917: 100%|██████████| 50/50 [00:06<00:00,  7.95it/s]


train_loss: 0.1858
train_acc: 0.9484
train_f1: 0.9445
epoch: 3.0000



Loss: 0.9230: 100%|██████████| 50/50 [00:06<00:00,  7.93it/s]


train_loss: 0.1147
train_acc: 0.9713
train_f1: 0.9691
epoch: 4.0000



Loss: 1.7383: 100%|██████████| 50/50 [00:06<00:00,  7.92it/s]


train_loss: 0.1067
train_acc: 0.9783
train_f1: 0.9755
epoch: 5.0000



Loss: 0.2973: 100%|██████████| 50/50 [00:06<00:00,  7.97it/s]


train_loss: 0.0838
train_acc: 0.9783
train_f1: 0.9757
epoch: 6.0000



Loss: 0.7044: 100%|██████████| 50/50 [00:06<00:00,  7.91it/s]


train_loss: 0.0478
train_acc: 0.9898
train_f1: 0.9897
epoch: 7.0000



Loss: 2.4259: 100%|██████████| 50/50 [00:06<00:00,  7.88it/s]


train_loss: 0.0955
train_acc: 0.9841
train_f1: 0.9845
epoch: 8.0000



Loss: 0.7317: 100%|██████████| 50/50 [00:06<00:00,  7.93it/s]


train_loss: 0.0638
train_acc: 0.9847
train_f1: 0.9831
epoch: 9.0000



Loss: 0.7476: 100%|██████████| 50/50 [00:06<00:00,  7.89it/s]


train_loss: 0.0491
train_acc: 0.9898
train_f1: 0.9891
epoch: 10.0000



Loss: 0.0961: 100%|██████████| 50/50 [00:06<00:00,  7.83it/s]


train_loss: 0.0494
train_acc: 0.9834
train_f1: 0.9822
epoch: 11.0000



Loss: 1.0265: 100%|██████████| 50/50 [00:06<00:00,  8.01it/s]


train_loss: 0.0498
train_acc: 0.9924
train_f1: 0.9918
epoch: 12.0000



Loss: 0.9779: 100%|██████████| 50/50 [00:06<00:00,  7.94it/s]


train_loss: 0.0465
train_acc: 0.9911
train_f1: 0.9911
epoch: 13.0000



Loss: 0.9515: 100%|██████████| 50/50 [00:06<00:00,  7.88it/s]

train_loss: 0.0495
train_acc: 0.9898
train_f1: 0.9895
epoch: 14.0000






# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [53]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:11<00:00,  8.70it/s]


In [54]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [55]:
sample_submission_df = pd.read_csv("../datasets_fin/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [56]:
pred_df.to_csv("../output/baseline_pred_128_"+str(EPOCHS)+".csv", index=False)

In [57]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,10
2,00396fbc1f6cc21d.jpg,10
3,00471f8038d9c4b6.jpg,13
4,00901f504008d884.jpg,2
