#### code
- stratified k-fold split
    - 각 폴드별로 학습하여 앙상블(평균)-soft voting(평균 앙상블)
- earlystopping
#### data
- csv파일에 label 변경
- 1, 13, 14 클래스에 대해서 오버샘플링하여 class imbalance 완화
- 7번 클래스를 7, 17로 나눈 후 예측 후 17을 7로 변경
- 7번 17번 클래스가 약 6:4정도의 비율을 가지고 있어서 다시 class imbalance 발생, 회전, 크롭으로 오버샘플링하여 class imbalance 완화

In [1]:
import os
import time
import random

import torch
import pandas as pd
import numpy as np
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import StratifiedKFold

from dotenv import load_dotenv
from datetime import datetime
from zoneinfo import ZoneInfo
import wandb

import pytesseract
import matplotlib.pyplot as plt

import timm
import albumentations as A
import torch.nn as nn
from albumentations.pytorch import ToTensorV2


In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 학습에 사용할 장치를 설정합니다.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# wandb 연동
load_dotenv()
api_key = os.getenv('WANDB_API_KEY')

wandb.login(key=api_key)

train_time = datetime.fromtimestamp(time.time(), tz=ZoneInfo("Asia/Seoul")).strftime("%Y%m%d-%H%M%S")



Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: alvlalvl92 (alvlalvl). Use `wandb login --relogin` to force relogin


In [5]:
# data config
data_path = '../data/'

# model config
model_name = 'efficientnet_b5' # 'resnet50' 'efficientnet-b0', ...

In [6]:
# training config
img_size = 128
LR = 1e-5
EPOCHS = 10
BATCH_SIZE = 4
num_workers = 0
num_folds = 3
augment_ratio = 2
early_stopping_patience = 5  # Early Stopping 설정

In [7]:
# 데이터셋 클래스를 수정합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None, augment_ratio=1):
        self.df = pd.read_csv(csv)
        self.path = path
        self.transform = transform
        self.augment_ratio = augment_ratio

        # 데이터 증강
        if self.augment_ratio > 1:
            self.df = self.df.loc[self.df.index.repeat(self.augment_ratio)].reset_index(drop=True)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df.iloc[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [8]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader) # ,leave=False
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [9]:
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        pbar = tqdm(loader) # leave=False
        for image, targets in pbar:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

            pbar.set_description(f"Val Loss: {loss.item():.4f}")

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }

    return ret

In [10]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_score is None:
            self.best_score = val_loss
        elif val_loss > self.best_score - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_loss
            self.counter = 0

In [11]:
# # augmentation을 위한 transform 코드
# trn_transform = A.Compose([
#     # 이미지 크기 조정
#     A.Resize(height=img_size, width=img_size),
#     A.OneOf([
#         A.GaussNoise(var_limit=(10.0, 800.0), p=0.75),
#         A.GaussianBlur(blur_limit=(1, 7), p=0.5)
#     ], p=0.75),
#     A.RandomRotate90(p=0.5),
#     A.HorizontalFlip(p=0.75),
#     A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
#     A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.25),
#     A.CoarseDropout(max_holes=6, max_height=32, max_width=32, p=0.5),
#     A.ElasticTransform(alpha=1, sigma=30, alpha_affine=30, p=0.5),
#     A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
#     A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
#     A.Rotate(limit=30, p=0.75),
#     A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
#     A.MotionBlur(blur_limit=5, p=0.5),
#     A.OpticalDistortion(p=0.5),
#     A.Transpose(p=0.5),
#     A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ToTensorV2(),
# ])

import albumentations as A
from albumentations.pytorch import ToTensorV2

# 이미지 크기 조정
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    
    # 레이아웃 관련 변환
    A.OneOf([
        A.RandomCrop(height=img_size, width=img_size, p=1.0),  # Random crop을 통해 다양한 크기의 이미지 생성
        A.CenterCrop(height=img_size, width=img_size, p=1.0)   # Center crop을 통해 중심 부분을 강조
    ], p=0.5),

    # 기존 변환
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 800.0), p=0.75),
        A.GaussianBlur(blur_limit=(1, 7), p=0.5)
    ], p=0.75),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.75),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.25),
    A.CoarseDropout(max_holes=6, max_height=32, max_width=32, p=0.5),
    A.ElasticTransform(alpha=1, sigma=30, alpha_affine=30, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.Rotate(limit=30, p=0.75),
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
    A.MotionBlur(blur_limit=5, p=0.5),
    A.OpticalDistortion(p=0.5),
    A.Transpose(p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])


# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [12]:
# Dataset 정의
trn_dataset = ImageDataset(
    f"{data_path}train.csv",
    f"{data_path}train/",
    transform=trn_transform,
    augment_ratio=augment_ratio  # 증강 비율 추가
)
tst_dataset = ImageDataset(
    f"{data_path}sample_submission.csv",
    f"{data_path}test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

3140 3140


In [13]:
# Get targets for stratification
targets = trn_dataset.df['target'].values

# Stratified K-Fold Cross Validation
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=SEED)


In [14]:

best_models = []
best_val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(trn_dataset) // augment_ratio), targets[::augment_ratio])):
    print(f'Fold {fold + 1}/{num_folds}')

    wandb.init(
        project=f"{train_time}_one_vs_all",
        name=f"{train_time}_fold{fold+1}_{model_name}",
        config={
            "learning_rate": LR,
            "architecture": model_name,
            "dataset": "custom-dataset",
            "epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
            "image_size": img_size,
            "num_workers": num_workers,
            'augment_ratio': augment_ratio,
            'early_stopping_patience': early_stopping_patience
        }
    )


    # 증강된 데이터에 맞게 인덱스 조정
    train_idx = np.concatenate([train_idx * augment_ratio + i for i in range(augment_ratio)])
    val_idx = np.concatenate([val_idx * augment_ratio + i for i in range(augment_ratio)])
    
    train_subset = Subset(trn_dataset, train_idx)
    val_subset = Subset(trn_dataset, val_idx)
    
    train_loader = DataLoader(
        train_subset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )
    val_loader = DataLoader(
        val_subset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    # load model
    model = timm.create_model(
        model_name,
        pretrained=True,
        num_classes=17
    ).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=LR)

    best_val_accuracy = 0
    best_model_state = None
    early_stopping = EarlyStopping(patience=early_stopping_patience)

    for epoch in range(EPOCHS):
        train_results = train_one_epoch(train_loader, model, optimizer, loss_fn, device=device)
        
        # 검증 성능 평가 추가
        val_results = validate(val_loader, model, loss_fn, device)
        
        # 결과 합치기
        results = {**train_results, **val_results, 'epoch': epoch, 'fold': fold}

        wandb.log(results)

        log = ""
        for k, v in results.items():
            log += f"{k}: {v:.4f}\n"
        print(log)

        # 최고 성능 모델 저장
        if val_results['val_acc'] > best_val_accuracy:
            best_val_accuracy = val_results['val_acc']
            best_model_state = model.state_dict()

        # Early stopping 체크
        early_stopping(val_results['val_loss'])
        if early_stopping.early_stop:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    # 폴드의 최고 성능 모델 저장
    torch.save(best_model_state, f'best_model_fold_{fold}.pth')
    best_models.append(best_model_state)
    best_val_accuracies.append(best_val_accuracy)

    wandb.finish()

print("Training completed.")

Fold 1/3


Loss: 2.5159: 100%|██████████| 523/523 [00:57<00:00,  9.02it/s] 
Val Loss: 1.9777: 100%|██████████| 262/262 [00:14<00:00, 17.97it/s]       


train_loss: 4.3587
train_acc: 0.1209
train_f1: 0.1134
val_loss: 456852.1068
val_acc: 0.1307
val_f1: 0.1258
epoch: 0.0000
fold: 0.0000



Loss: 2.9237: 100%|██████████| 523/523 [00:58<00:00,  8.91it/s]
Val Loss: 3.6075: 100%|██████████| 262/262 [00:15<00:00, 16.86it/s]        


train_loss: 2.5583
train_acc: 0.2304
train_f1: 0.2057
val_loss: 10825374.6443
val_acc: 0.2099
val_f1: 0.1980
epoch: 1.0000
fold: 0.0000



Loss: 2.3303: 100%|██████████| 523/523 [00:57<00:00,  9.08it/s]
Val Loss: 2.7961: 100%|██████████| 262/262 [00:15<00:00, 17.24it/s]          


train_loss: 2.2779
train_acc: 0.3050
train_f1: 0.2709
val_loss: 1252438573.0387
val_acc: 0.2490
val_f1: 0.2312
epoch: 2.0000
fold: 0.0000



Loss: 1.9027: 100%|██████████| 523/523 [00:57<00:00,  9.12it/s]
Val Loss: 131.1782: 100%|██████████| 262/262 [00:15<00:00, 17.32it/s]       


train_loss: 2.0830
train_acc: 0.3480
train_f1: 0.3144
val_loss: 13927376.1014
val_acc: 0.2605
val_f1: 0.2540
epoch: 3.0000
fold: 0.0000



Loss: 2.6174: 100%|██████████| 523/523 [00:57<00:00,  9.09it/s]
Val Loss: 3.1442: 100%|██████████| 262/262 [00:15<00:00, 17.36it/s]        


train_loss: 1.9377
train_acc: 0.3886
train_f1: 0.3458
val_loss: 4072885.4884
val_acc: 0.3473
val_f1: 0.3449
epoch: 4.0000
fold: 0.0000



Loss: 2.2587: 100%|██████████| 523/523 [00:57<00:00,  9.11it/s]
Val Loss: 1.6092: 100%|██████████| 262/262 [00:14<00:00, 17.62it/s]     


train_loss: 1.8021
train_acc: 0.4168
train_f1: 0.3791
val_loss: 14906.4307
val_acc: 0.4113
val_f1: 0.3889
epoch: 5.0000
fold: 0.0000



Loss: 1.3340: 100%|██████████| 523/523 [00:57<00:00,  9.15it/s]
Val Loss: 3.4415: 100%|██████████| 262/262 [00:15<00:00, 17.26it/s]        


train_loss: 1.7034
train_acc: 0.4565
train_f1: 0.4158
val_loss: 9734612.4643
val_acc: 0.4523
val_f1: 0.4203
epoch: 6.0000
fold: 0.0000



Loss: 2.3905: 100%|██████████| 523/523 [00:56<00:00,  9.22it/s]
Val Loss: 13.4730: 100%|██████████| 262/262 [00:14<00:00, 17.71it/s]      


train_loss: 1.6052
train_acc: 0.4919
train_f1: 0.4532
val_loss: 5135617.5313
val_acc: 0.4160
val_f1: 0.3885
epoch: 7.0000
fold: 0.0000



Loss: 0.6028: 100%|██████████| 523/523 [00:56<00:00,  9.21it/s]
Val Loss: 2.4384: 100%|██████████| 262/262 [00:14<00:00, 17.77it/s]       


train_loss: 1.5261
train_acc: 0.5096
train_f1: 0.4703
val_loss: 1981752.9201
val_acc: 0.4971
val_f1: 0.4711
epoch: 8.0000
fold: 0.0000



Loss: 1.2721: 100%|██████████| 523/523 [00:56<00:00,  9.18it/s]
Val Loss: 2.2212: 100%|██████████| 262/262 [00:14<00:00, 18.05it/s]       


train_loss: 1.4397
train_acc: 0.5335
train_f1: 0.4955
val_loss: 82598.9382
val_acc: 0.5181
val_f1: 0.4861
epoch: 9.0000
fold: 0.0000



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
fold,▁▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▅▆▆▇▇██
train_f1,▁▃▄▅▅▆▇▇██
train_loss,█▄▃▃▂▂▂▁▁▁
val_acc,▁▂▃▃▅▆▇▆██
val_f1,▁▂▃▃▅▆▇▆██
val_loss,▁▁█▁▁▁▁▁▁▁

0,1
epoch,9.0
fold,0.0
train_acc,0.53346
train_f1,0.49547
train_loss,1.43968
val_acc,0.51813
val_f1,0.48611
val_loss,82598.93815


Fold 2/3


Loss: 2.3875: 100%|██████████| 524/524 [00:58<00:00,  9.00it/s]
Val Loss: 3.7917: 100%|██████████| 262/262 [00:15<00:00, 17.43it/s]        


train_loss: 4.2691
train_acc: 0.1313
train_f1: 0.1240
val_loss: 2018417.1926
val_acc: 0.1195
val_f1: 0.1036
epoch: 0.0000
fold: 1.0000



Loss: 2.1777: 100%|██████████| 524/524 [00:57<00:00,  9.10it/s]
Val Loss: 1.7731: 100%|██████████| 262/262 [00:15<00:00, 17.30it/s]        


train_loss: 2.6525
train_acc: 0.2297
train_f1: 0.2005
val_loss: 8780623.7581
val_acc: 0.1979
val_f1: 0.1612
epoch: 1.0000
fold: 1.0000



Loss: 2.3970: 100%|██████████| 524/524 [00:57<00:00,  9.19it/s]
Val Loss: 64.4234: 100%|██████████| 262/262 [00:14<00:00, 17.49it/s]        


train_loss: 2.2960
train_acc: 0.2884
train_f1: 0.2525
val_loss: 13779500.8648
val_acc: 0.2543
val_f1: 0.2216
epoch: 2.0000
fold: 1.0000



Loss: 3.6406: 100%|██████████| 524/524 [00:57<00:00,  9.18it/s]
Val Loss: 2.2141: 100%|██████████| 262/262 [00:14<00:00, 17.53it/s]      


train_loss: 2.0842
train_acc: 0.3524
train_f1: 0.3129
val_loss: 83131.8305
val_acc: 0.3222
val_f1: 0.2808
epoch: 3.0000
fold: 1.0000



Loss: 1.4983: 100%|██████████| 524/524 [00:57<00:00,  9.14it/s]
Val Loss: 2.5608: 100%|██████████| 262/262 [00:15<00:00, 17.39it/s]       


train_loss: 1.8917
train_acc: 0.4054
train_f1: 0.3678
val_loss: 171170.9134
val_acc: 0.3480
val_f1: 0.3037
epoch: 4.0000
fold: 1.0000



Loss: 2.5151: 100%|██████████| 524/524 [00:57<00:00,  9.17it/s]
Val Loss: 2.9953: 100%|██████████| 262/262 [00:15<00:00, 17.12it/s]        


train_loss: 1.7872
train_acc: 0.4351
train_f1: 0.3935
val_loss: 12454434.0894
val_acc: 0.4216
val_f1: 0.3831
epoch: 5.0000
fold: 1.0000



Loss: 2.5739: 100%|██████████| 524/524 [00:57<00:00,  9.09it/s]
Val Loss: 1.8028: 100%|██████████| 262/262 [00:14<00:00, 17.78it/s]       


train_loss: 1.6674
train_acc: 0.4632
train_f1: 0.4244
val_loss: 503122.5414
val_acc: 0.4426
val_f1: 0.4082
epoch: 6.0000
fold: 1.0000



Loss: 0.9743: 100%|██████████| 524/524 [00:57<00:00,  9.10it/s]
Val Loss: 1.2024: 100%|██████████| 262/262 [00:14<00:00, 17.58it/s]       


train_loss: 1.6460
train_acc: 0.4733
train_f1: 0.4430
val_loss: 215939.1698
val_acc: 0.4857
val_f1: 0.4484
epoch: 7.0000
fold: 1.0000



Loss: 2.9598: 100%|██████████| 524/524 [00:56<00:00,  9.21it/s]
Val Loss: 2.2884: 100%|██████████| 262/262 [00:14<00:00, 17.50it/s]        


train_loss: 1.5280
train_acc: 0.5148
train_f1: 0.4750
val_loss: 13124289.0478
val_acc: 0.5096
val_f1: 0.4612
epoch: 8.0000
fold: 1.0000

Early stopping triggered at epoch 9


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▄▅▅▆▇█
fold,▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▅▆▇▇▇█
train_f1,▁▃▄▅▆▆▇▇█
train_loss,█▄▃▂▂▂▁▁▁
val_acc,▁▂▃▅▅▆▇██
val_f1,▁▂▃▄▅▆▇██
val_loss,▂▅█▁▁▇▁▁█

0,1
epoch,8.0
fold,1.0
train_acc,0.5148
train_f1,0.47495
train_loss,1.52804
val_acc,0.50956
val_f1,0.46121
val_loss,13124289.04777


Fold 3/3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Loss: 2.0266: 100%|██████████| 524/524 [00:57<00:00,  9.15it/s]
Val Loss: 2.6673: 100%|██████████| 262/262 [00:15<00:00, 17.30it/s]       


train_loss: 4.3713
train_acc: 0.1323
train_f1: 0.1218
val_loss: 142619.1560
val_acc: 0.1205
val_f1: 0.1142
epoch: 0.0000
fold: 2.0000



Loss: 2.8978: 100%|██████████| 524/524 [00:57<00:00,  9.11it/s]
Val Loss: 3.4750: 100%|██████████| 262/262 [00:15<00:00, 17.43it/s]        


train_loss: 2.6714
train_acc: 0.2273
train_f1: 0.2053
val_loss: 5502073.0724
val_acc: 0.2055
val_f1: 0.1851
epoch: 1.0000
fold: 2.0000



Loss: 1.3648: 100%|██████████| 524/524 [00:57<00:00,  9.12it/s]
Val Loss: 1.6926: 100%|██████████| 262/262 [00:15<00:00, 17.44it/s]         


train_loss: 2.3100
train_acc: 0.2918
train_f1: 0.2611
val_loss: 11489279.7839
val_acc: 0.2055
val_f1: 0.1985
epoch: 2.0000
fold: 2.0000



Loss: 3.0180: 100%|██████████| 524/524 [00:57<00:00,  9.09it/s]
Val Loss: 1.8093: 100%|██████████| 262/262 [00:14<00:00, 17.85it/s]        


train_loss: 2.0951
train_acc: 0.3553
train_f1: 0.3171
val_loss: 2913682.3868
val_acc: 0.3317
val_f1: 0.3135
epoch: 3.0000
fold: 2.0000



Loss: 3.2995: 100%|██████████| 524/524 [01:24<00:00,  6.17it/s]
Val Loss: 2.9374: 100%|██████████| 262/262 [00:13<00:00, 18.75it/s]         


train_loss: 1.8848
train_acc: 0.4007
train_f1: 0.3641
val_loss: 6988615.9077
val_acc: 0.3231
val_f1: 0.3117
epoch: 4.0000
fold: 2.0000



Loss: 2.1565: 100%|██████████| 524/524 [00:54<00:00,  9.63it/s]
Val Loss: 0.3520: 100%|██████████| 262/262 [00:14<00:00, 18.38it/s]     


train_loss: 1.7640
train_acc: 0.4542
train_f1: 0.4171
val_loss: 977.2811
val_acc: 0.4340
val_f1: 0.4078
epoch: 5.0000
fold: 2.0000



Loss: 0.6564: 100%|██████████| 524/524 [00:54<00:00,  9.66it/s]
Val Loss: 0.2651: 100%|██████████| 262/262 [00:14<00:00, 18.50it/s]        


train_loss: 1.6733
train_acc: 0.4642
train_f1: 0.4261
val_loss: 674513.2906
val_acc: 0.4761
val_f1: 0.4568
epoch: 6.0000
fold: 2.0000



Loss: 3.5986: 100%|██████████| 524/524 [00:54<00:00,  9.66it/s]
Val Loss: 0.0180: 100%|██████████| 262/262 [00:13<00:00, 18.75it/s]        


train_loss: 1.5965
train_acc: 0.4971
train_f1: 0.4623
val_loss: 746766.0323
val_acc: 0.3948
val_f1: 0.3839
epoch: 7.0000
fold: 2.0000



Loss: 0.7465: 100%|██████████| 524/524 [00:54<00:00,  9.61it/s]
Val Loss: 21.3326: 100%|██████████| 262/262 [00:14<00:00, 18.25it/s]     


train_loss: 1.4644
train_acc: 0.5282
train_f1: 0.4953
val_loss: 115996.7093
val_acc: 0.4675
val_f1: 0.4490
epoch: 8.0000
fold: 2.0000



Loss: 1.5604: 100%|██████████| 524/524 [00:53<00:00,  9.79it/s]
Val Loss: 0.0140: 100%|██████████| 262/262 [00:14<00:00, 18.33it/s]    


train_loss: 1.3909
train_acc: 0.5540
train_f1: 0.5191
val_loss: 201.1882
val_acc: 0.5182
val_f1: 0.4857
epoch: 9.0000
fold: 2.0000



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
fold,▁▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▅▅▆▇▇██
train_f1,▁▂▃▄▅▆▆▇██
train_loss,█▄▃▃▂▂▂▁▁▁
val_acc,▁▂▂▅▅▇▇▆▇█
val_f1,▁▂▃▅▅▇▇▆▇█
val_loss,▁▄█▃▅▁▁▁▁▁

0,1
epoch,9.0
fold,2.0
train_acc,0.55396
train_f1,0.51908
train_loss,1.39086
val_acc,0.51816
val_f1,0.48567
val_loss,201.18816


Training completed.


In [15]:
# DataLoader 정의
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

In [16]:
# 모든 폴드 완료 후 앙상블 예측
preds_list = []
for fold, model_state in enumerate(best_models):
    model.load_state_dict(model_state)
    model.eval()
    fold_preds = []
    for image, _ in tqdm(tst_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        fold_preds.extend(preds.detach().cpu().numpy())
    preds_list.append(fold_preds)


100%|██████████| 785/785 [00:28<00:00, 27.66it/s]
100%|██████████| 785/785 [00:27<00:00, 28.23it/s]
100%|██████████| 785/785 [00:28<00:00, 28.02it/s]


In [17]:
# 앙상블 (평균)
final_preds = np.mean(preds_list, axis=0).argmax(axis=1) # 함수는 주어진 축(axis)을 따라 가장 큰 값의 인덱스를 반환


In [18]:
# 예측 결과를 데이터프레임으로 만들기
pred_df = pd.DataFrame({
    'ID': tst_dataset.df['ID'],  # ID 열
    'target': final_preds  # 앙상블 예측 결과
})

In [19]:
# sample_submission.csv와 비교 확인
sample_submission_df = pd.read_csv(f"{data_path}/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [20]:
# 결과 저장
pred_df.to_csv("pred.csv", index=False)

In [21]:
pred_df

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,6
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,10
4,00901f504008d884.jpg,2
...,...,...
3135,ffb4b6f619fb60ea.jpg,6
3136,ffb54299b1ad4159.jpg,10
3137,ffc2c91dff8cf2c0.jpg,8
3138,ffc4e330a5353a2a.jpg,0
