# **📄 Image Classification 대회**
> 문서 타입 분류 대회
> EfficientNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스

## Contents
- Imort Library & Define Functions
- Hyper-tuning
- Load Data
- Train Model
- Inderence & save File

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드
* 학습 및 추론에 필요한 함수와 클래스를 정의

In [25]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn

from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from timm.data.mixup import Mixup

import math

### 결과의 재현성을 위해 시드를 고정합니다.


In [26]:
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic=True 
torch.backends.cudnn.benchmark=False

### 모델 모니터링 (wandb 사용)

In [27]:
import wandb
!wandb login

#entitiy : 고정 , project : '본인이름_cv' 이런 느낌, name : "첫번쨰 시도" ... etc  
run = wandb.init(project="kimjeongheon_cv", entity = 'CV_대회', name ='TopOfTheWorld')

[34m[1mwandb[0m: Currently logged in as: [33mgimjeongheon38[0m. Use [1m`wandb login --relogin`[0m to force relogin


## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [29]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model config
model_name = 'efficientnet_b4'

# training config
img_size = 380
LR = 0.001717

EPOCHS = 100
BATCH_SIZE = 16
num_workers = 8

#focal loss
loss_alpha = 0.55
loss_gamma = 1.477

#AdamW - optimizer
dropout_ratio = 0.3 

# 증강 데이터 배수 설정
num_augmentations = 10
#aug
alpha = 0.621
mixup_prob =  0.3853

use_amp=True

#gradient
accumulation_steps = 2


wandb.config.update({
    "model_name": model_name,
    "img_size": img_size,
    "LR" : LR,
    "BATCH_SIZE":BATCH_SIZE,
    "num_workers": num_workers,
    "loss_alpha" : loss_alpha,
    "loss_gamma" : loss_gamma,
    "dropout_ratio" : dropout_ratio,
    "num_augmentations": num_augmentations,
    "alpha": alpha,
    "mixup_prob":mixup_prob
    }
)

In [30]:
from torch.cuda.amp import autocast, GradScaler

def train_one_epoch(loader, model, optimizer, loss_fn, device, val_loader=None, epoch=None, mixup_fn=None, accumulation_steps=2, use_amp=True):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    scaler = GradScaler(enabled=use_amp)  # GradScaler 초기화

    pbar = tqdm(loader)

    for i, (image, targets) in enumerate(pbar):
        image = image.to(device)
        targets = targets.to(device)

        if mixup_fn is not None:
            image, targets = mixup_fn(image, targets)

        with autocast(enabled=use_amp):  # autocast 컨텍스트 매니저 사용
            preds = model(image)
            loss = loss_fn(preds, targets)

        loss = loss / accumulation_steps  # 손실 정규화

        scaler.scale(loss).backward()  # 스케일링된 그래디언트 계산

        if (i + 1) % accumulation_steps == 0:
            if use_amp:
                scaler.step(optimizer)  # 스케일링된 그래디언트로 옵티마이저 업데이트
                scaler.update()  # 스케일러 업데이트
            else:
                optimizer.step()
            optimizer.zero_grad()

        train_loss += loss.item() * accumulation_steps
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.argmax(dim=1).detach().cpu().numpy())  # 정수 레이블로 변환

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    if val_loader:
        val_preds, val_targets = [], []
        model.eval()
        with torch.no_grad():
            for images, targets in val_loader:
                images = images.to(device)
                targets = targets.to(device, dtype=torch.long)

                if mixup_fn is not None:
                    images, targets = mixup_fn(images, targets)

                preds = model(images)
                val_preds.extend(preds.argmax(dim=1).cpu().numpy())
                val_targets.extend(targets.argmax(dim=1).cpu().numpy())

        val_loss = loss_fn(preds, targets).item()
        val_acc = accuracy_score(val_targets, val_preds)
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        ret.update({
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1": val_f1,
            "val_preds": val_preds,  # val_preds를 결과 딕셔너리에 추가
            "val_targets": val_targets,  # val_targets를 결과 딕셔너리에 추가
        })
        
        if epoch is not None:
                    epoch_interval = max(1, EPOCHS // 20)  # 전체 epochs의 1/20 간격으로 로그 출력
                    if (epoch + 1) % epoch_interval == 0 or epoch == 0 or epoch == EPOCHS - 1:
                        print(f"Epoch: {epoch+1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
                            f"Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Train F1: {train_f1:.4f}, Val F1: {val_f1:.4f}")

    return ret

In [31]:
import os
import cv2
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from albumentations import (
    Compose, RandomResizedCrop, Resize, HorizontalFlip, VerticalFlip,
    RandomRotate90, Rotate, GaussianBlur, HueSaturationValue,
    RandomBrightnessContrast, Normalize
)

# 증강 기법 정의
def get_train_augmentation(img_size, mixup_prob=0.5, alpha=1.0):
    augmentation = Compose([
        RandomResizedCrop(height=img_size, width=img_size, scale=(0.8, 1.0), p=0.5),
        Resize(height=img_size, width=img_size),
        HorizontalFlip(p=0.6),
        VerticalFlip(p=0.6),
        RandomRotate90(p=0.5),
        Rotate(limit=(-35, 35), p=0.5),
        GaussianBlur(blur_limit=(3, 7), p=0.5),
        HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
        RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        
        # 클래스 간 차이 부각을 위한 추가 기법
        A.ImageCompression(quality_lower=60, quality_upper=100, p=0.5),  # 이미지 압축 및 품질 저하
        A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=0.5), # CoarseDropout 추가
        Normalize(mean=[0.57433558, 0.58330406, 0.58818927],
                    std=[0.18964056, 0.18694252, 0.18506919]),
                    #직접 구함
        ToTensorV2()
    ])
    
    # Wandb에 증강 기법 기록
    wandb.config.update({"augmentation": str(augmentation)})
    return augmentation

# 테스트 데이터 변환 기법 정의
def get_test_augmentation(img_size):
    return Compose([
        Resize(height=img_size, width=img_size),
        Normalize(mean=[0.57433558, 0.58330406, 0.58818927],
                    std=[0.18964056, 0.18694252, 0.18506919]),
                    #직접 구함
        ToTensorV2()
    ])
    
class ImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, num_augmentations=1):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.num_augmentations = num_augmentations

    def __len__(self):
        return len(self.image_paths) * self.num_augmentations

    def __getitem__(self, idx):
        image_idx = idx // self.num_augmentations
        image_path = self.image_paths[image_idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.labels[image_idx]

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
            if 'mixup' in augmented:
                label = augmented['mixup']['target']

        return image, label

In [32]:
class EarlyStopping:
    """조기 종료(Early stopping) 을 위한 클래스"""
    def __init__(self, patience=7, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_f1_max = np.Inf
        self.delta = delta

    def __call__(self, val_f1, model):
        score = val_f1

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_f1, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_f1, model)
            self.counter = 0

    def save_checkpoint(self, val_f1, model):
        """모델의 매개변수를 저장합니다"""
        if self.verbose:
            print(f'Validation F1 score increased ({self.val_f1_max:.6f} --> {val_f1:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_f1_max = val_f1

In [33]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = (self.alpha * (1 - pt) ** self.gamma * ce_loss).mean()

        if self.reduction == 'none':
            return focal_loss
        elif self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()


In [34]:
import numpy as np
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score

train_df = pd.read_csv("/data/ephemeral/home/data/train.csv", usecols=['ID', 'target'])
train_image_paths = [f"/data/ephemeral/home/data/train/{fname}" for fname in train_df['ID']]
train_labels = train_df['target'].values

test_df = pd.read_csv("/data/ephemeral/home/data/sample_submission.csv")
test_image_paths = test_df['ID'].apply(lambda x: f"/data/ephemeral/home/data/test/{x}").tolist()
test_labels = [0] * len(test_image_paths)  # 테스트 데이터셋에는 레이블이 없으므로 더미 레이블 사용

tst_dataset = ImageDataset(test_image_paths, test_labels, transform=get_test_augmentation(img_size))
tst_loader = DataLoader(tst_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)


In [35]:

# Stratified K-Fold Cross Validation 설정
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
wandb.config.update({"n_splits": str(n_splits)})


fold_scores = []

# 5개의 Fold에 대한 예측 확률을 저장할 리스트
pred_probs = []

# 모델 초기화
model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)

loss_fn = FocalLoss(alpha = loss_alpha, gamma=loss_gamma)
model.dropout = nn.Dropout(p=dropout_ratio)

# AdamW 옵티마이저 사용
optimizer = optim.AdamW(model.parameters(), lr=LR,weight_decay=0.03994)
wandb.config.update({"optimizer": str(optimizer)})

# 학습률 스케줄러 설정
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

# 조기 종료 설정
early_stopping = EarlyStopping(patience=5, verbose=True)

# 각 fold에 대해 모델 학습 및 평가
for fold, (train_idx, val_idx) in enumerate(skf.split(train_image_paths, train_labels)):
    print(f'Fold {fold+1}/{n_splits}')

    # 학습 데이터와 검증 데이터 분리
    train_paths = [train_image_paths[i] for i in train_idx]
    train_labels_ = [train_labels[i] for i in train_idx]
    val_paths = [train_image_paths[i] for i in val_idx]
    val_labels = [train_labels[i] for i in val_idx]

    # 데이터셋 및 데이터로더 생성
    train_dataset = ImageDataset(train_paths, train_labels_, transform=get_train_augmentation(img_size, mixup_prob=0.5, alpha=1.0), num_augmentations=num_augmentations)
    val_dataset = ImageDataset(val_paths, val_labels, transform=get_train_augmentation(img_size, mixup_prob=0.5, alpha=1.0), num_augmentations=num_augmentations)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)


    wandb.watch(model, log='all')

    # 모델 학습
    for epoch in range(EPOCHS):
        mixup_fn = Mixup(mixup_alpha=alpha, cutmix_alpha=0.0, 
                         prob=mixup_prob, switch_prob=0.0, mode='batch', label_smoothing=0.0, num_classes=17)
        results = train_one_epoch(train_loader, model, optimizer, loss_fn, device, val_loader, epoch, mixup_fn)

        # 학습률 스케줄러 업데이트
        scheduler.step(results['val_f1'])
        wandb.log({"epoch": epoch, "train_loss": results["train_loss"], "train_acc": results["train_acc"], "train_f1": results["train_f1"],
                   "val_loss": results["val_loss"], "val_acc": results["val_acc"], "val_f1": results["val_f1"]})
        
        # 조기 종료 확인
        early_stopping(results['val_f1'], model)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

    # 모델 평가
    val_preds = results['val_preds']
    val_targets = results['val_targets']
    val_f1 = f1_score(val_targets, val_preds, average='macro')
    fold_scores.append(val_f1)
    wandb.log({"val_f1": val_f1})

    
    # 현재 Fold 모델의 테스트 데이터에 대한 예측 확률 저장
    model.eval()
    with torch.no_grad():
        fold_pred_probs = []
        for image, _ in tqdm(tst_loader):
            image = image.to(device)
            preds = model(image)
            fold_pred_probs.extend(preds.softmax(dim=1).detach().cpu().numpy())
        pred_probs.append(fold_pred_probs)

Fold 1/5


Loss: 0.0553: 100%|██████████| 785/785 [01:16<00:00, 10.26it/s]


Epoch: 1/100, Train Loss: 0.2346, Val Loss: 0.1185, Train Acc: 0.8455, Val Acc: 0.8736, Train F1: 0.8365, Val F1: 0.8507
Validation F1 score increased (inf --> 0.850680).  Saving model ...


Loss: 0.0125: 100%|██████████| 785/785 [01:17<00:00, 10.18it/s]


Validation F1 score increased (0.850680 --> 0.877809).  Saving model ...


Loss: 0.0002: 100%|██████████| 785/785 [01:18<00:00, 10.00it/s]


Validation F1 score increased (0.877809 --> 0.898740).  Saving model ...


Loss: 0.2785: 100%|██████████| 785/785 [01:18<00:00, 10.04it/s]


EarlyStopping counter: 1 out of 5


Loss: 0.0044: 100%|██████████| 785/785 [01:17<00:00, 10.07it/s]


Epoch: 5/100, Train Loss: 0.1074, Val Loss: 0.0146, Train Acc: 0.9305, Val Acc: 0.8854, Train F1: 0.9287, Val F1: 0.8784
EarlyStopping counter: 2 out of 5


Loss: 0.0021: 100%|██████████| 785/785 [01:18<00:00, 10.04it/s]


EarlyStopping counter: 3 out of 5


Loss: 0.1973: 100%|██████████| 785/785 [01:17<00:00, 10.09it/s]


Validation F1 score increased (0.898740 --> 0.899823).  Saving model ...


Loss: 0.0083: 100%|██████████| 785/785 [01:18<00:00, 10.05it/s]


EarlyStopping counter: 1 out of 5


Loss: 0.0003: 100%|██████████| 785/785 [01:17<00:00, 10.13it/s]


EarlyStopping counter: 2 out of 5


Loss: 0.0022: 100%|██████████| 785/785 [01:18<00:00, 10.05it/s]


Epoch: 10/100, Train Loss: 0.0848, Val Loss: 0.1421, Train Acc: 0.9409, Val Acc: 0.9194, Train F1: 0.9402, Val F1: 0.9177
Validation F1 score increased (0.899823 --> 0.917740).  Saving model ...


Loss: 0.0000: 100%|██████████| 785/785 [01:17<00:00, 10.09it/s]


EarlyStopping counter: 1 out of 5


Loss: 0.0004: 100%|██████████| 785/785 [01:39<00:00,  7.92it/s]


EarlyStopping counter: 2 out of 5


Loss: 0.0001: 100%|██████████| 785/785 [02:28<00:00,  5.29it/s]


EarlyStopping counter: 3 out of 5


Loss: 0.0002: 100%|██████████| 785/785 [02:26<00:00,  5.35it/s]


Epoch 00014: reducing learning rate of group 0 to 8.5850e-04.
EarlyStopping counter: 4 out of 5


Loss: 0.0367: 100%|██████████| 785/785 [02:28<00:00,  5.30it/s]


Epoch: 15/100, Train Loss: 0.0698, Val Loss: 1.5060, Train Acc: 0.9469, Val Acc: 0.9115, Train F1: 0.9468, Val F1: 0.9098
EarlyStopping counter: 5 out of 5
Early stopping


100%|██████████| 197/197 [00:31<00:00,  6.33it/s]


Fold 2/5


Loss: 0.0172: 100%|██████████| 785/785 [02:27<00:00,  5.33it/s]


Epoch: 1/100, Train Loss: 0.0657, Val Loss: 0.0006, Train Acc: 0.9505, Val Acc: 0.9726, Train F1: 0.9494, Val F1: 0.9710
Validation F1 score increased (0.917740 --> 0.970975).  Saving model ...
Early stopping


100%|██████████| 197/197 [00:34<00:00,  5.79it/s]


Fold 3/5


Loss: 0.0001: 100%|██████████| 785/785 [02:29<00:00,  5.26it/s]


Epoch: 1/100, Train Loss: 0.0656, Val Loss: 0.0002, Train Acc: 0.9449, Val Acc: 0.9675, Train F1: 0.9443, Val F1: 0.9693
EarlyStopping counter: 1 out of 5
Early stopping


100%|██████████| 197/197 [00:33<00:00,  5.80it/s]


Fold 4/5


Loss: 0.0000: 100%|██████████| 785/785 [02:31<00:00,  5.18it/s]


Epoch: 1/100, Train Loss: 0.0597, Val Loss: 0.0000, Train Acc: 0.9505, Val Acc: 0.9758, Train F1: 0.9496, Val F1: 0.9756
Validation F1 score increased (0.970975 --> 0.975580).  Saving model ...
Early stopping


100%|██████████| 197/197 [00:31<00:00,  6.17it/s]


Fold 5/5


Loss: 0.0001: 100%|██████████| 785/785 [02:33<00:00,  5.11it/s]


Epoch: 1/100, Train Loss: 0.0605, Val Loss: 0.0001, Train Acc: 0.9531, Val Acc: 0.9739, Train F1: 0.9522, Val F1: 0.9734
EarlyStopping counter: 1 out of 5
Early stopping


100%|██████████| 197/197 [00:32<00:00,  6.03it/s]


In [36]:
# 전체 평균 점수 계산
mean_score = np.mean(fold_scores)
print(f'Mean Macro F1 Score: {mean_score:.4f}')
wandb.log({"mean_score": mean_score})

pred_probs_mean = np.mean(pred_probs, axis=0)
preds_list_argmax = np.argmax(pred_probs_mean, axis=1)
wandb.log({"pred_probs": pred_probs})
wandb.log({"preds_list": preds_list_argmax})

submission_df = pd.read_csv("/data/ephemeral/home/data/sample_submission.csv")
submission_df['target'] = preds_list_argmax
submission_df.to_csv("submission1.csv", index=False)

Mean Macro F1 Score: 0.9598
