## 1. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import CosineAnnealingLR
import wandb
import datetime

In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values if isinstance(df, pd.DataFrame) else pd.read_csv(df).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device, epoch=None):
    model.train()
    train_loss = 0
    preds_list, targets_list = [], []

    pbar = tqdm(loader, desc=f"Train Epoch {epoch+1}" if epoch is not None else "Train")
    for image, targets in pbar:
        image, targets = image.to(device), targets.to(device)

        optimizer.zero_grad(set_to_none=True)
        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).cpu().numpy())
        targets_list.extend(targets.cpu().numpy())

    # ---- epoch별 평균 계산 ----
    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    # ---- wandb 로그 기록 ----
    wandb.log({
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
        "lr": optimizer.param_groups[0]["lr"],        # ✅ 학습률 로그 추가
        "epoch": epoch + 1 if epoch is not None else 0
    })

    return {"train_loss": train_loss, "train_acc": train_acc, "train_f1": train_f1}


# Validation용 함수 추가
def valid_one_epoch(loader, model, loss_fn, device, epoch=None):
    model.eval()
    val_loss = 0
    preds_list, targets_list = [], []

    with torch.no_grad():
        pbar = tqdm(loader, desc=f"Valid Epoch {epoch+1}" if epoch is not None else "Valid")
        for image, targets in pbar:
            image, targets = image.to(device), targets.to(device)
            preds = model(image)
            loss = loss_fn(preds, targets)
            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    # ---- wandb 로그 기록 ----
    wandb.log({
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
        "epoch": epoch + 1 if epoch is not None else 0
    })

    return {"val_loss": val_loss, "val_acc": val_acc, "val_f1": val_f1}


## 2. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'datasets_fin/'

# model config
# model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...
model_name = 'efficientnet_b3'
# model_name = 'convnext_tiny'
# model_name = 'vit_base_patch16_224'
# model_name = 'swin_tiny_patch4_window7_224'

# training config
img_size = 640 # 224, 640
LR = 1e-3
EPOCHS = 2
BATCH_SIZE = 32
num_workers = 4

## 3. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    
    # --- 실제 Test domain 대응 증강 ---
    A.Rotate(limit=180, p=0.7),                     # 회전
    A.HorizontalFlip(p=0.5),                        # 좌우 반전
    A.VerticalFlip(p=0.3),                          # 상하 반전
    A.RandomResizedCrop(height=img_size, width=img_size, scale=(0.8, 1.0), p=0.4),  # 크롭
    A.MotionBlur(blur_limit=5, p=0.3),              # 블러
    A.GaussNoise(var_limit=(10, 50), p=0.3),        # 노이즈
    A.RandomBrightnessContrast(p=0.3),              # 밝기/대비
    A.HueSaturationValue(p=0.2),                    # 색조 변형 (인쇄/조명 차이 대응)
    
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [7]:
# print("총 이미지 수:", len(os.listdir("../data/train_balanced")))
print("총 이미지 수:", len(os.listdir("../data/train_mod_balanced")))
# df = pd.read_csv("../data/train_balanced.csv")
df = pd.read_csv("../data/train_mod_balanced.csv")
print(df["target"].value_counts().sort_index())


총 이미지 수: 1704
target
0     100
1     100
2     100
3     100
4     100
5     100
6     100
7     102
8     100
9     100
10    102
11    100
12    100
13    100
14    100
15    100
16    100
Name: count, dtype: int64


In [None]:
# --- ✅ K-Fold split으로 변경 ---
# train_df = pd.read_csv("../data/train_balanced.csv")
train_df = pd.read_csv("../data/train_mod_balanced.csv")

folds = 5
skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)

# ✅ progressive resizing 헬퍼 추가
def adjust_img_size(epoch):
    if epoch < 30:
        return 384
    elif epoch < 45:
        return 512
    else:
        return 640

def update_transforms(new_size):
    global trn_transform, tst_transform
    trn_transform = A.Compose([
        A.Resize(height=new_size, width=new_size),
        A.Rotate(limit=90, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomResizedCrop(height=new_size, width=new_size, scale=(0.9, 1.0), p=0.3),
        A.GaussNoise(var_limit=(10, 40), p=0.2),
        A.RandomBrightnessContrast(p=0.3),
        A.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    tst_transform = A.Compose([
        A.Resize(height=new_size, width=new_size),
        A.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

# ✅ fold별 학습 루프
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
    print(f"\n===== Fold {fold+1}/{folds} =====")

    trn_df = train_df.iloc[train_idx].reset_index(drop=True)
    val_df = train_df.iloc[val_idx].reset_index(drop=True)

    trn_dataset = ImageDataset(trn_df, "../data/train_balanced/", transform=trn_transform)
    val_dataset = ImageDataset(val_df, "../data/train_balanced/", transform=tst_transform)

    trn_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    optimizer = Adam(model.parameters(), lr=LR)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)
    loss_fn = nn.CrossEntropyLoss()

    run_name = f"{model_name}_fold{fold+1}_{datetime.datetime.now().strftime('%m%d_%H%M')}"
    wandb.init(project="document-type-classification", name=run_name)

    for epoch in range(EPOCHS):
        new_size = adjust_img_size(epoch)
        update_transforms(new_size)
        print(f"\n[Fold {fold+1}] [Epoch {epoch+1}] 이미지 크기 조정: {new_size}px")

        train_metrics = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device, epoch=epoch)
        val_metrics = valid_one_epoch(val_loader, model, loss_fn, device=device, epoch=epoch)
        scheduler.step()

        print(
            f"[Fold {fold+1}] [Epoch {epoch+1}/{EPOCHS}] "
            f"Train F1: {train_metrics['train_f1']:.4f}, "
            f"Val F1: {val_metrics['val_f1']:.4f}, "
            f"LR: {optimizer.param_groups[0]['lr']:.8f}"
        )

    model_path = f"{model_name}_fold{fold+1}.pt"
    # torch.save(model.state_dict(), f"model_fold{fold+1}.pt")
    torch.save(model.state_dict(), model_path)
    print(f"✅ 모델 저장 완료: {model_path}")
    wandb.finish()


## 4. Train Model
* 모델을 로드하고, 학습을 진행합니다.

# 5. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [1]:
# --- ✅ 모든 Fold 학습 완료 후 Inference 단계 ---
print("\n===== TTA Inference 시작 =====")

# ✅ test dataset / loader 정의
tst_dataset = ImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    transform=tst_transform
)
tst_loader = DataLoader(tst_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

# ✅ fold별 모델 경로 지정 (이미 학습 완료된 모델들)
model_paths = [f"{model_name}_fold{i+1}.pt" for i in range(folds)]

tta_transforms = [
    lambda x: x,
    lambda x: torch.flip(x, dims=[3]),
    lambda x: torch.flip(x, dims=[2]),
    lambda x: torch.rot90(x, k=1, dims=[2,3]),
    lambda x: torch.rot90(x, k=3, dims=[2,3])
]

preds_all = []
for path in model_paths:
    print(f"\n▶ Loading {path}")
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()

    preds_fold = []
    for images, _ in tqdm(tst_loader):
        images = images.to(device)
        tta_preds = []

        with torch.no_grad():
            for tta in tta_transforms:
                imgs_tta = tta(images)
                preds = model(imgs_tta)
                tta_preds.append(preds.softmax(dim=1).cpu().numpy())

        avg_preds = np.mean(tta_preds, axis=0)
        preds_fold.append(avg_preds)

    preds_fold = np.concatenate(preds_fold)
    preds_all.append(preds_fold)

# ✅ K-Fold 평균 앙상블
avg_preds = np.mean(preds_all, axis=0)
final_preds = np.argmax(avg_preds, axis=1)

# ✅ 현재 시간 기반 파일명 생성
timestamp = datetime.datetime.now().strftime("%m%d_%H%M")
save_path = f"pred_{timestamp}.csv"

# ✅ 결과 저장
tst_df = pd.read_csv("../data/sample_submission.csv")
tst_df["target"] = final_preds
tst_df.to_csv(save_path, index=False)

print(f"✅ Saved submission: {save_path}")


===== TTA Inference 시작 =====


NameError: name 'ImageDataset' is not defined

In [None]:
# best fold만 가지고 추론하는 방법
best_fold = 2
best_model_path = f"{model_name}_fold{best_fold}_best.pt"

model = timm.create_model(model_name, pretrained=False, num_classes=17).to(device)
model.load_state_dict(torch.load(best_model_path, map_location=device))
model.eval()

# 단일 모델 추론
preds_all = []
for images, _ in tqdm(tst_loader):
    images = images.to(device)
    with torch.no_grad():
        preds = model(images)
        preds_all.append(preds.softmax(dim=1).cpu().numpy())

avg_preds = np.concatenate(preds_all)
final_preds = np.argmax(avg_preds, axis=1)
