# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉

In [46]:
!nvidia-smi

Thu Aug  8 11:53:58 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  | 00000000:4B:00.0 Off |                  N/A |
| 39%   31C    P8              24W / 350W |   8182MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [29]:
import os
import random
import time

import timm
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import albumentations as A
import pandas as pd
from collections import Counter
import numpy as np
import torch.nn as nn
from sklearn.model_selection import KFold
from albumentations.pytorch import ToTensorV2
from torchvision import transforms
from torch.optim import AdamW, RMSprop, SGD, Adam, Adamax, Adadelta, Adagrad
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, StepLR

In [30]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [31]:
# 데이터셋 클래스
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, int(target)  # target을 정수로 변환

In [32]:
# 학습을 위한 함수
def train_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "loss": train_loss,
        "acc": train_acc,
        "f1": train_f1,
    }

    return ret

# 검증을 위한 함수
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "loss": val_loss,
        "acc": val_acc,
        "f1": val_f1,
    }

    return ret

In [33]:
# 하이퍼파라미터 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'efficientnet_b5' # efficientnet_b2 or efficientnet_b5
img_size = 224
LR = 1e-4
EPOCHS = 10 # 3~10
BATCH_SIZE = 32
num_workers = 4

In [34]:
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [35]:
# 데이터셋 로드
trn_dataset = ImageDataset(
    "/home/train_csv/train1200(crop, flip, blur, noise).csv",
    # "/home/data/augmented(flip,blur,noise)",
    "/home/data/augmented1200(crop,flip,blur,noise)",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    "/home/data/sample_submission.csv",
    "/home/data/test_transform_real_denoise",
    transform=tst_transform
)

print(len(trn_dataset), len(tst_dataset))

48000 3140


In [36]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean', target_weights=None):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.target_weights = target_weights

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        
        # Focal Loss 계산
        focal_weights = torch.ones_like(pt)
        if self.target_weights is not None:
            for cls, weight in self.target_weights.items():
                focal_weights[targets == cls] *= weight

        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss * focal_weights

        if self.reduction == 'mean':
            return torch.mean(focal_loss)
        elif self.reduction == 'sum':
            return torch.sum(focal_loss)
        else:
            return focal_loss

In [37]:
# 최적화 함수 정의
def get_optimizer(model, name='adam', lr=1e-3):
    if name == 'adam':
        return Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    elif name == 'adamw':
        return AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
    elif name == 'sgd':
        return SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-5)
    elif name == 'rmsprop':
        return RMSprop(model.parameters(), lr=lr, weight_decay=1e-5)
    else:
        raise ValueError(f"Unsupported optimizer: {name}")

# 손실 함수 정의
def get_loss(name='focal'):
    if name == 'ce':
        return nn.CrossEntropyLoss(label_smoothing=0.1)
    elif name == 'focal':
        target_weights = {3: 15, 7: 10, 14: 10}
        return FocalLoss(alpha=1, gamma=2, target_weights=target_weights)
    else:
        raise ValueError(f"Unsupported loss: {name}")

# 스케줄러 정의
def get_scheduler(optimizer, name='cosine', T_0=10):
    if name == 'cosine':
        return CosineAnnealingWarmRestarts(optimizer, T_0=T_0)
    elif name == 'step':
        return StepLR(optimizer, step_size=30, gamma=0.1)
    else:
        raise ValueError(f"Unsupported scheduler: {name}")

In [38]:
# 모델 학습 함수
def train_model(model, train_loader, val_loader, optimizer, scheduler, loss_fn, device, num_epochs):
    best_val_f1 = 0
    for epoch in range(num_epochs):
        train_ret = train_epoch(train_loader, model, optimizer, loss_fn, device)
        val_ret = validate(val_loader, model, loss_fn, device)
        
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_ret['loss']:.4f}, Acc: {train_ret['acc']:.4f}, F1: {train_ret['f1']:.4f}")
        print(f"Val Loss: {val_ret['loss']:.4f}, Acc: {val_ret['acc']:.4f}, F1: {val_ret['f1']:.4f}")
        
        if val_ret['f1'] > best_val_f1:
            best_val_f1 = val_ret['f1']
            torch.save(model.state_dict(), f'effinetb2_best_fold{fold+1}.pth')
    
    return best_val_f1

In [39]:
n_splits = 5 # 5Fold
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=SEED)

# k-fold 교차 검증
fold_scores = []
fold_predictions = []
for fold, (train_idx, val_idx) in enumerate(kfold.split(trn_dataset)):
    print(f"Fold {fold+1}")
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx)
    
    train_loader = DataLoader(
        trn_dataset, 
        batch_size=BATCH_SIZE, 
        sampler=train_subsampler, 
        num_workers=num_workers, 
        pin_memory=True
    )
    val_loader = DataLoader(
        trn_dataset, 
        batch_size=BATCH_SIZE,
        sampler=val_subsampler, 
        num_workers=num_workers, 
        pin_memory=True
    )
    
    model = timm.create_model(
        model_name, 
        pretrained=True, 
        num_classes=17
    ).to(device)
    optimizer = get_optimizer(model, name='adam', lr=LR)
    loss_fn = get_loss(name='focal')
    scheduler = get_scheduler(optimizer, name='cosine', T_0=10)
    
    best_val_f1 = train_model(model, train_loader, val_loader, optimizer, scheduler, loss_fn, device, EPOCHS)
    fold_scores.append(best_val_f1)

print(f"K-Fold CV score: {np.mean(fold_scores):.4f} (+/- {np.std(fold_scores):.4f})")

Fold 1


INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b5.sw_in12k_ft_in1k)
INFO:timm.models._hub:[timm/efficientnet_b5.sw_in12k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
Loss: 0.1545: 100%|██████████| 1200/1200 [03:10<00:00,  6.29it/s]


Epoch 1/10
Train Loss: 1.3316, Acc: 0.7847, F1: 0.8020
Val Loss: 0.3521, Acc: 0.9308, F1: 0.9363


Loss: 0.2376: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 2/10
Train Loss: 0.2496, Acc: 0.9491, F1: 0.9533
Val Loss: 0.2123, Acc: 0.9623, F1: 0.9657


Loss: 0.0010: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 3/10
Train Loss: 0.1733, Acc: 0.9616, F1: 0.9653
Val Loss: 0.1104, Acc: 0.9824, F1: 0.9833


Loss: 0.0692: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 4/10
Train Loss: 0.0878, Acc: 0.9796, F1: 0.9813
Val Loss: 0.4603, Acc: 0.9626, F1: 0.9619


Loss: 0.0011: 100%|██████████| 1200/1200 [03:10<00:00,  6.31it/s]


Epoch 5/10
Train Loss: 0.0797, Acc: 0.9829, F1: 0.9842
Val Loss: 0.0899, Acc: 0.9850, F1: 0.9844


Loss: 0.0253: 100%|██████████| 1200/1200 [03:10<00:00,  6.31it/s]


Epoch 6/10
Train Loss: 0.0336, Acc: 0.9908, F1: 0.9916
Val Loss: 0.0503, Acc: 0.9868, F1: 0.9885


Loss: 1.1719: 100%|██████████| 1200/1200 [03:10<00:00,  6.31it/s]


Epoch 7/10
Train Loss: 0.0204, Acc: 0.9948, F1: 0.9952
Val Loss: 0.0264, Acc: 0.9962, F1: 0.9966


Loss: 0.0011: 100%|██████████| 1200/1200 [03:09<00:00,  6.32it/s]


Epoch 8/10
Train Loss: 0.0050, Acc: 0.9988, F1: 0.9989
Val Loss: 0.0150, Acc: 0.9978, F1: 0.9978


Loss: 0.0000: 100%|██████████| 1200/1200 [03:10<00:00,  6.31it/s]


Epoch 9/10
Train Loss: 0.0011, Acc: 0.9995, F1: 0.9995
Val Loss: 0.0149, Acc: 0.9983, F1: 0.9985


Loss: 0.0000: 100%|██████████| 1200/1200 [03:10<00:00,  6.31it/s]


Epoch 10/10
Train Loss: 0.0006, Acc: 0.9998, F1: 0.9998
Val Loss: 0.0146, Acc: 0.9984, F1: 0.9986
Fold 2


INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b5.sw_in12k_ft_in1k)
INFO:timm.models._hub:[timm/efficientnet_b5.sw_in12k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
Loss: 0.0536: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 1/10
Train Loss: 1.2802, Acc: 0.7949, F1: 0.8110
Val Loss: 0.2425, Acc: 0.9476, F1: 0.9536


Loss: 0.0480: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 2/10
Train Loss: 0.2505, Acc: 0.9478, F1: 0.9520
Val Loss: 0.3083, Acc: 0.9454, F1: 0.9516


Loss: 0.0645: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 3/10
Train Loss: 0.1465, Acc: 0.9683, F1: 0.9704
Val Loss: 0.1640, Acc: 0.9653, F1: 0.9704


Loss: 0.0206: 100%|██████████| 1200/1200 [03:10<00:00,  6.30it/s]


Epoch 4/10
Train Loss: 0.1094, Acc: 0.9772, F1: 0.9789
Val Loss: 0.1118, Acc: 0.9846, F1: 0.9866


Loss: 0.0001:  84%|████████▍ | 1008/1200 [02:39<00:30,  6.31it/s]

In [None]:
trn_loader = DataLoader(
    trn_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=num_workers, 
    pin_memory=True
)
val_loader = DataLoader(
    tst_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=num_workers, 
    pin_memory=True
)

final_model = timm.create_model(
    model_name, 
    pretrained=True, 
    num_classes=17
).to(device)
final_optimizer = get_optimizer(final_model, name='adam', lr=LR)
final_loss_fn = get_loss(name='focal')
final_scheduler = get_scheduler(final_optimizer, name='cosine', T_0=10)

train_model(final_model, trn_loader, val_loader, final_optimizer, final_scheduler, final_loss_fn, device, EPOCHS)

INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b2.ra_in1k)
INFO:timm.models._hub:[timm/efficientnet_b2.ra_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
Loss: 0.6307:  35%|███▌      | 527/1500 [00:35<01:05, 14.88it/s]

Loss: 0.0391: 100%|██████████| 1500/1500 [01:41<00:00, 14.78it/s]


Epoch 1/3
Train Loss: 0.7112, Acc: 0.8165, F1: 0.8282
Val Loss: 7.9210, Acc: 0.0643, F1: 0.0071


Loss: 0.1346: 100%|██████████| 1500/1500 [01:41<00:00, 14.81it/s]


Epoch 2/3
Train Loss: 0.1076, Acc: 0.9627, F1: 0.9656
Val Loss: 9.1173, Acc: 0.0631, F1: 0.0070


Loss: 0.0173: 100%|██████████| 1500/1500 [01:41<00:00, 14.76it/s]


Epoch 3/3
Train Loss: 0.0463, Acc: 0.9833, F1: 0.9846
Val Loss: 9.4621, Acc: 0.0672, F1: 0.0074


0.007407797496796392

In [26]:
# 추론
final_model.load_state_dict(torch.load('/home/code/effinetb2_best_fold5.pth'))
final_model.eval()

preds_list = []

for image, _ in tqdm(val_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = final_model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:02<00:00, 35.27it/s]


In [27]:
# 결과 저장
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("/home/data/pred/kfold_pred/effinetb2_best_fold5_testDenoising.csv", index=False)

In [28]:
# 성능 평가
answer_df = pd.read_csv("/home/data/answer.csv")
assert (answer_df['ID'] == pred_df['ID']).all()

f1 = f1_score(answer_df['target'], pred_df['target'], average='macro')
print(f"Macro F1 Score: {f1:.4f}")

Macro F1 Score: 0.7730
