# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉


## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [None]:
import os
import random
import time

import timm
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torchvision import transforms
from torch.optim import AdamW, RMSprop, SGD, Adam, Adamax, Adadelta, Adagrad
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [None]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [None]:
# 학습을 위한 함수입니다.
def train_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        # scaler.scale(loss).backward()
        # scaler.step(optimizer)
        # scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    # scheduler.step()

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [None]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '/home/data'

# model config
model_name = 'efficientnet_b2' # 'resnet50' 'efficientnet-b0', ...

# 하이퍼파라미터를 설정합니다.
img_size = 256
LR = 1e-3
EPOCHS = 3
BATCH_SIZE = 32
num_workers = 4

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [None]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# Dataset 정의
trn_dataset = ImageDataset(
    # "/home/data/train.csv",
    "/home/train_csv/train_augmented(label_change).csv",
    # "/home/data/train",
    # "/home/data/rotate_train",
    "/home/data/augmented(flip,blur,noise)",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    "/home/data/sample_submission.csv",
    "/home/data/test",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

50240 3140


In [None]:
# train, valid dataset을 나눕니다.
n_train = int(len(trn_dataset) * 0.8)
n_valid = len(trn_dataset) - n_train

trn_dataset, valid_dataset = torch.utils.data.random_split(trn_dataset, [n_train, n_valid])
print(len(trn_dataset), len(valid_dataset))

40192 10048


In [None]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,  # 배치 크기 설정
    shuffle=True,  # 데이터 셔플링 설정
    num_workers=num_workers,  # 워커 수 설정
    pin_memory=True  # 핀 메모리 설정
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [None]:
'''# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = RMSprop(model.parameters(), lr=LR)
'''

# 모델 로드 및 최적화 설정 (수정)
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label Smoothing 적용
optimizer = RMSprop(model.parameters(), lr=LR, weight_decay=1e-5)  # Adam사용, Weight Decay 적용

In [None]:
from torch.utils.tensorboard import SummaryWriter

# TensorBoard SummaryWriter
writer = SummaryWriter(log_dir='runs/exp1')

# 학습을 시작합니다.
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_ret = train_epoch(trn_loader, model, optimizer, loss_fn, device)
    print(f"Train Loss: {train_ret['train_loss']:.4f}, Acc: {train_ret['train_acc']:.4f}, F1: {train_ret['train_f1']:.4f}")

    valid_ret = train_epoch(valid_loader, model, optimizer, loss_fn, device)
    print(f"Valid Loss: {valid_ret['train_loss']:.4f}, Acc: {valid_ret['train_acc']:.4f}, F1: {valid_ret['train_f1']:.4f}")

    writer.add_scalar('Loss/train', train_ret['train_loss'], epoch)
    writer.add_scalar('Acc/train', train_ret['train_acc'], epoch)
    writer.add_scalar('F1/train', train_ret['train_f1'], epoch)
    writer.add_scalar('Loss/valid', valid_ret['train_loss'], epoch)
    writer.add_scalar('Acc/valid', valid_ret['train_acc'], epoch)
    writer.add_scalar('F1/valid', valid_ret['train_f1'], epoch)

    writer.close()

Epoch 1/3


Loss: 0.6636: 100%|██████████| 1256/1256 [01:39<00:00, 12.57it/s]


Train Loss: 0.7990, Acc: 0.9220, F1: 0.9165


Loss: 0.6112: 100%|██████████| 314/314 [00:25<00:00, 12.35it/s]


Valid Loss: 0.6576, Acc: 0.9732, F1: 0.9727
Epoch 2/3


Loss: 0.6057: 100%|██████████| 1256/1256 [01:40<00:00, 12.49it/s]


Train Loss: 0.6409, Acc: 0.9792, F1: 0.9781


Loss: 0.5913: 100%|██████████| 314/314 [00:25<00:00, 12.36it/s]


Valid Loss: 0.6229, Acc: 0.9863, F1: 0.9855
Epoch 3/3


Loss: 0.6033: 100%|██████████| 1256/1256 [01:40<00:00, 12.48it/s]


Train Loss: 0.6277, Acc: 0.9834, F1: 0.9823


Loss: 0.7082: 100%|██████████| 314/314 [00:25<00:00, 12.36it/s]


Valid Loss: 0.6176, Acc: 0.9861, F1: 0.9856


# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [None]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:03<00:00, 28.39it/s]


In [None]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [None]:
sample_submission_df = pd.read_csv("/home/data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [None]:
pred_df.to_csv("/home/data/pred/EffiNetb2_epoch3.csv", index=False)

In [None]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,6
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2
