In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
import torch
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm
from sklearn.metrics import f1_score
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from itertools import combinations

# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_f1": train_f1,
    }

    return ret

# one epoch 검증을 위한 함수입니다.
def validate_one_epoch(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in tqdm(loader):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "val_loss": val_loss,
        "val_f1": val_f1,
    }

    return ret

# Early Stopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=3, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '/root/JY/data/'

# 모델 목록
model_names = [
    'swin_base_patch4_window7_224',
]

# training config
img_size = 224
LR = 1e-4
EPOCHS = 5
BATCH_SIZE = 32
num_workers = 4
n_splits = 5
patience = 3  # Early Stopping patience

# Augmentation 기법 목록
augmentation_list = [
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomResizedCrop(height=img_size, width=img_size, scale=(0.8, 1.0), ratio=(0.9, 1.1), p=0.5),
    A.ToGray(p=0.1),
    A.ElasticTransform(p=0.1),
    A.GaussNoise(p=0.1),
    A.MotionBlur(blur_limit=5, p=0.2),
]

# 결과를 저장할 딕셔너리
results = {}

# 모델별로 증강 기법 테스트
for model_name in model_names:
    print(f'Testing model: {model_name}')
    results[model_name] = {}
    for i in range(1, len(augmentation_list) + 1):
        for combo in combinations(augmentation_list, i):
            aug_name = '+'.join([str(aug) for aug in combo])
            print(f'Testing augmentation combination: {aug_name}')

            # Augmentation 조합 생성
            trn_transform = A.Compose([
                A.Resize(height=img_size, width=img_size),
                *combo,
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ])

            # Dataset 정의
            train_df = pd.read_csv("/root/JY/data/train.csv")
            test_df = pd.read_csv("/root/JY/data/sample_submission.csv")

            skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

            fold_val_f1_scores = []

            for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.iloc[:, 1])):
                print(f'Fold {fold + 1}/{n_splits}')
    
                train_subset = train_df.iloc[train_idx]
                val_subset = train_df.iloc[val_idx]

                trn_dataset = ImageDataset(
                    train_subset,
                    "/root/JY/data/train/",
                    transform=trn_transform
                )
                val_dataset = ImageDataset(
                    val_subset,
                    "/root/JY/data/train/",
                    transform=A.Compose([
                        A.Resize(height=img_size, width=img_size),
                        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2(),
                    ])
                )
    
                trn_loader = DataLoader(
                    trn_dataset,
                    batch_size=BATCH_SIZE,
                    shuffle=True,
                    num_workers=num_workers,
                    pin_memory=True,
                    drop_last=False
                )
                val_loader = DataLoader(
                    val_dataset,
                    batch_size=BATCH_SIZE,
                    shuffle=False,
                    num_workers=num_workers,
                    pin_memory=True
                )

                # load model
                model = timm.create_model(
                    model_name,
                    pretrained=True,
                    num_classes=17
                ).to(device)

                loss_fn = nn.CrossEntropyLoss()
                optimizer = Adam(model.parameters(), lr=LR)
                
                early_stopping = EarlyStopping(patience=patience)

                for epoch in range(EPOCHS):
                    train_ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
                    val_ret = validate_one_epoch(val_loader, model, loss_fn, device=device)
        
                    log = f"Fold {fold + 1}, Epoch {epoch + 1}\n"
                    log += f"Train Loss: {train_ret['train_loss']:.4f}, Train F1: {train_ret['train_f1']:.4f}\n"
                    log += f"Val Loss: {val_ret['val_loss']:.4f}, Val F1: {val_ret['val_f1']:.4f}\n"
                    print(log)
                    
                    early_stopping(val_ret['val_f1'])
                    if early_stopping.early_stop:
                        print(f"Early stopping at epoch {epoch + 1}")
                        break
        
                fold_val_f1_scores.append(val_ret['val_f1'])

            avg_val_f1_score = np.mean(fold_val_f1_scores)
            results[model_name][aug_name] = avg_val_f1_score
            print(f'Model: {model_name}, Augmentation combination: {aug_name}, Average Val F1: {avg_val_f1_score:.4f}')

# 각 모델별로 가장 좋은 증강 기법 조합 찾기
for model_name in model_names:
    best_augmentation = max(results[model_name], key=results[model_name].get)
    print(f'Model: {model_name}, Best augmentation combination: {best_augmentation}, F1 Score: {results[model_name][best_augmentation]:.4f}')


Testing model: swin_base_patch4_window7_224
Testing augmentation combination: RandomBrightnessContrast(always_apply=False, p=0.5, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True)
Fold 1/5


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Loss: 0.5607: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
100%|██████████| 10/10 [00:01<00:00,  8.45it/s]


Fold 1, Epoch 1
Train Loss: 0.9855, Train F1: 0.6654
Val Loss: 0.3021, Val F1: 0.8412



Loss: 0.0029: 100%|██████████| 40/40 [00:11<00:00,  3.60it/s]
100%|██████████| 10/10 [00:01<00:00,  8.50it/s]


Fold 1, Epoch 2
Train Loss: 0.2622, Train F1: 0.8853
Val Loss: 0.3638, Val F1: 0.8462



Loss: 0.1729:  57%|█████▊    | 23/40 [00:06<00:04,  3.41it/s]


KeyboardInterrupt: 