# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [None]:
# 구글 드라이브 마운트, Colab을 이용하지 않는다면 패스해도 됩니다.
# from google.colab import drive
# drive.mount('/gdrive', force_remount=True)
# drive.mount('/content/drive')

In [None]:
# 구글 드라이브에 업로드된 대회 데이터를 압축 해제하고 로컬에 저장합니다.
# !tar -xvf drive/MyDrive/datasets_fin.tar > /dev/null

In [1]:
# 필요한 라이브러리를 설치합니다.
# !pip install timm
!pip install augraphy albumentations tqdm

[0m

In [11]:
!pip install opencv-python

[0m

In [12]:
!apt-get update -y

Hit:1 http://archive.ubuntu.com/ubuntu focal InRelease
Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [128 kB]
Get:3 http://archive.ubuntu.com/ubuntu focal-updates InRelease [128 kB]
Get:4 http://archive.ubuntu.com/ubuntu focal-backports InRelease [128 kB]
Get:5 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [33.5 kB]
Get:6 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1540 kB]
Get:7 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [3844 kB]
Get:8 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [3969 kB]
Get:9 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [4310 kB]
Get:10 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [3817 kB]
Get:11 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1254 kB]
Get:12 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [30.9 kB]
Fetched 19.2 MB in

In [21]:
!apt-get install -y libgl1-mesa-glx
# !apt install libgl1-mesa-glx

E: dpkg was interrupted, you must manually run 'dpkg --configure -a' to correct the problem. 


In [18]:
!apt-get update && apt-get install -y python3-opencv

Hit:1 http://security.ubuntu.com/ubuntu focal-security InRelease
Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease
Hit:3 http://archive.ubuntu.com/ubuntu focal-updates InRelease
Hit:4 http://archive.ubuntu.com/ubuntu focal-backports InRelease
Reading package lists... Done
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  adwaita-icon-theme at-spi2-core autoconf automake autotools-dev binutils
  binutils-common binutils-x86-64-linux-gnu cpp cpp-8 cpp-9 dbus
  dbus-user-session dconf-gsettings-backend dconf-service dmsetup file
  fontconfig fontconfig-config fonts-dejavu-core gcc gcc-8 gcc-8-base gcc-9
  gcc-9-base gdal-data gfortran gfortran-8 gfortran-9 gir1.2-glib-2.0
  glib-networking glib-networking-common glib-networking-services
  gsettings-desktop-schemas gstreamer1.0-plugins-base gtk-update-icon-cache
  hicolor-icon-theme humanity-icon-theme i965-va-driver ibverbs-providers


## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'data/'

# model config
model_name = 'resnet50' # 'resnet34', 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 224
LR = 1e-3
EPOCHS = 10 #1
BATCH_SIZE = 32
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# # 다양한 aug
# trn_transform = A.Compose([
#     A.Resize(256, 256), 
#     A.RandomCrop(224, 224),
#     A.OneOf([
#                 A.HorizontalFlip(p=1),
#                 A.RandomRotate90(p=1),
#                 A.VerticalFlip(p=1)            
#     ], p=1),
#     A.OneOf([
#                 A.MotionBlur(p=1),
#                 A.OpticalDistortion(p=1),
#                 A.GaussNoise(p=1)                 
#     ], p=1),
#     # images normalization
#     A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
#     ToTensorV2(),
# ])


# tst_transform = A.Compose([
#     A.Resize(256, 256), 
#     A.RandomCrop(224, 224),
#     A.OneOf([
#                 A.HorizontalFlip(p=1),
#                 A.RandomRotate90(p=1),
#                 A.VerticalFlip(p=1)            
#     ], p=1),
#     A.OneOf([
#                 A.MotionBlur(p=1),
#                 A.OpticalDistortion(p=1),
#                 A.GaussNoise(p=1)                 
#     ], p=1),
#     # images normalization
#     A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
#     ToTensorV2(),
# ])

In [7]:
# csv 파일 합치기
train_df = pd.read_csv("data/train.csv")
aug_df = pd.read_csv("data/aug_img.csv")
aug2_df = pd.read_csv("data/aug_img2.csv")
# "/data/ephemeral/home/data/aug_img2.csv"

combine_df = pd.concat([train_df, aug_df, aug2_df], ignore_index=True)

# 파일 저장
combine_df.to_csv("data/multi_aug1_combine.csv", index=False)

In [8]:
len(combine_df)

32970

In [9]:
# Dataset 정의
trn_dataset = ImageDataset(
    "data/multi_aug1_combine.csv",
    "data/aug_img/",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    "data/sample_submission.csv",
    "data/test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

32970 3140


In [10]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [11]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [12]:
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 0.0553: 100%|██████████| 1031/1031 [01:59<00:00,  8.64it/s]


train_loss: 0.3952
train_acc: 0.8645
train_f1: 0.8568
epoch: 0.0000



Loss: 0.0264: 100%|██████████| 1031/1031 [01:57<00:00,  8.80it/s]


train_loss: 0.0908
train_acc: 0.9687
train_f1: 0.9673
epoch: 1.0000



Loss: 0.0418: 100%|██████████| 1031/1031 [01:57<00:00,  8.80it/s]


train_loss: 0.0490
train_acc: 0.9838
train_f1: 0.9829
epoch: 2.0000



Loss: 0.3437: 100%|██████████| 1031/1031 [01:57<00:00,  8.79it/s]


train_loss: 0.0387
train_acc: 0.9870
train_f1: 0.9864
epoch: 3.0000



Loss: 0.0129: 100%|██████████| 1031/1031 [01:57<00:00,  8.79it/s]


train_loss: 0.0353
train_acc: 0.9886
train_f1: 0.9882
epoch: 4.0000



Loss: 0.0080: 100%|██████████| 1031/1031 [01:57<00:00,  8.80it/s]


train_loss: 0.0271
train_acc: 0.9912
train_f1: 0.9907
epoch: 5.0000



Loss: 0.0031: 100%|██████████| 1031/1031 [01:57<00:00,  8.79it/s]


train_loss: 0.0235
train_acc: 0.9929
train_f1: 0.9927
epoch: 6.0000



Loss: 0.0015: 100%|██████████| 1031/1031 [01:57<00:00,  8.81it/s]


train_loss: 0.0210
train_acc: 0.9933
train_f1: 0.9929
epoch: 7.0000



Loss: 0.0001: 100%|██████████| 1031/1031 [01:57<00:00,  8.81it/s]


train_loss: 0.0202
train_acc: 0.9938
train_f1: 0.9934
epoch: 8.0000



Loss: 0.0026: 100%|██████████| 1031/1031 [01:57<00:00,  8.78it/s]

train_loss: 0.0187
train_acc: 0.9939
train_f1: 0.9935
epoch: 9.0000






# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [13]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:14<00:00,  6.88it/s]


In [14]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [15]:
sample_submission_df = pd.read_csv("data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [16]:
pred_df.to_csv("pred_res50_multi_aug1.csv", index=False)

In [None]:
pred_df.head(10)

In [17]:
pred_df.head(20)

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2
5,009b22decbc7220c.jpg,15
6,00b33e0ee6d59427.jpg,0
7,00bbdcfbbdb3e131.jpg,8
8,00c03047e0fbef40.jpg,15
9,00c0dabb63ca7a16.jpg,4
