In [1]:
import os
import time
import optuna
import gc

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn

from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torch.optim.lr_scheduler import CosineAnnealingLR

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns

import wandb

In [2]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# PATH
TRAIN_AUG_CSV_PATH = '/upstage-cv-classification-cv2/data/train_aug_gaussian.csv'
TRAIN_AUG_IMAGE_PATH = '/upstage-cv-classification-cv2/data/train_aug_gaussian'

VALID_CSV_PATH = '/upstage-cv-classification-cv2/data/valid.csv'
VALID_IMAGE_PATH = '/upstage-cv-classification-cv2/data/valid'

TEST_CSV_PATH = '/upstage-cv-classification-cv2/data/sample_submission.csv'
TEST_IMAGE_PATH = '/upstage-cv-classification-cv2/data/test'

RESULT_CSV_PATH = '/upstage-cv-classification-cv2'

WANDB_PROJECT_NAME = 'cv_competition_optuna'

# 1. DATA LOAD

In [3]:
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image = img)['image']
    
        return img, target

    def get_labels(self):
        return self.df[:, 1] 
    
    

# 2. Model Train

In [4]:
# one epoch 학습
def train_one_epoch(loader, model, optimizer, loss_fn, device, epoch):
    model.train()
    train_loss = 0
    preds_list =[]
    targets_list = []

    pbar = tqdm(loader)
    for step, (image, targets) in enumerate(pbar):
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none = True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

        wandb.log({
            "train_step" : epoch * len(loader) + step,
            "train_loss_step" : loss.item()
        })
        
    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average = 'macro')

    ret = {
        "model" : model,
        "train_epoch" : epoch,
        "train_loss" : train_loss,
        "tarin_acc" : train_acc,
        "train_f1" : train_f1
    }

    wandb.log({
        "train_epoch" : epoch,
        "train_loss_epoch" : train_loss,
        "train_acc" : train_acc,
        "train_f1" : train_f1
    })

    return ret

In [5]:
def valid_one_epoch(loader, model, loss_fn, device, epoch):
    model.eval()
    valid_loss = 0

    preds_list =[]
    targets_list = []

    with torch.no_grad():
        pbar = tqdm(loader)
        for step, (image, targets) in enumerate(pbar):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)
       
            valid_loss += loss.item()
        
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

            pbar.set_description(f"Loss: {loss.item():.4f}")

            wandb.log({
                "valid_step" : epoch * len(loader) + step,
                "valid_loss_step" : loss.item()
            })

    valid_loss /= len(loader)
    valid_acc = accuracy_score(targets_list, preds_list)
    valid_f1 = f1_score(targets_list, preds_list, average = 'macro')

    ret = {
        "epoch" : epoch,
        "valid_loss" : valid_loss,
        "valid_acc" : valid_acc,
        "valid_f1" : valid_f1
    }

    wandb.log({
        "valid_epoch" : epoch,
        "val_loss_epoch" : valid_loss,
        "val_acc" : valid_acc,
        "val_f1" : valid_f1
    })

    return ret

# optuna

In [8]:

# train_one_epoch 및 valid_one_epoch 함수 정의 필요
# 예시: from your_module import train_one_epoch, valid_one_epoch

# 데이터셋 및 데이터로더 정의 필요
# 예시: from your_module import trn_loader, val_loader

# Optuna 목적 함수 정의
def objective(trial):
    # 학습할 하이퍼파라미터 샘플링
    img_size = trial.suggest_int('img_size', 224, 380)
    BATCH_SIZE = trial.suggest_int('BATCH_SIZE', 16, 32)
    
    LR = 0.001
    patience = 5
    min_delta = 0.001

    num_workers = 0
    EPOCHS = 100

    data_transform = A.Compose([
        A.Resize(height = img_size, width = img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

    trn_dataset = ImageDataset(
        TRAIN_AUG_CSV_PATH,
        TRAIN_AUG_IMAGE_PATH,
        transform = data_transform
    )

    val_dataset = ImageDataset(
        VALID_CSV_PATH,
        VALID_IMAGE_PATH,
        transform = data_transform
    )

    labels = trn_dataset.get_labels()
    labels = labels.astype(int)

    # DataLoader
    trn_loader = DataLoader(
        trn_dataset,
        batch_size = BATCH_SIZE,
        shuffle = True,
        num_workers = num_workers,
        pin_memory = True,
        drop_last = False
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size = BATCH_SIZE,
        num_workers = 0,
        pin_memory = True,
        drop_last = False
    )


    # 모델, 손실 함수, 옵티마이저 설정
    model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=LR)

    # WandB 설정
    os.environ['WANDB_SILENT'] = 'true'
    wandb.init(project = WANDB_PROJECT_NAME, name=f"trial_{trial.number}")

    f1_scores = []
    valid_losses = []
    trained_models = []
    patience_counter = 0
    best_valid_loss = float('inf')

    for epoch in range(EPOCHS):
        print(f"{epoch} epoch")
        trn_ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch)
        val_ret = valid_one_epoch(val_loader, model, loss_fn, device, epoch)

        f1_scores.append(val_ret['valid_f1'])
        valid_losses.append(val_ret['valid_loss'])
        trained_models.append(trn_ret['model'])

        print(f"valid loss : {val_ret['valid_loss']}")
        print(f"valid f1 : {val_ret['valid_f1']}")

        # 성능 개선 됨
        if val_ret['valid_loss'] < best_valid_loss - min_delta:
            best_valid_loss = val_ret['valid_loss']
            patience_counter = 0  
        # 성능 개선 되지 않음
        else:
            patience_counter += 1  

        # 성능 개선이 patience 만큼 안되면 학습 중단
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

    wandb.finish()

    del model, optimizer, trn_loader, val_loader
    gc.collect()
    torch.cuda.empty_cache()

    return best_valid_loss

# Optuna 스터디 생성 및 최적화 실행
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# 최적의 하이퍼파라미터 출력
print('Best trial:')
trial = study.best_trial
print('  Value: ', trial.value)
print('  Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')


[I 2024-08-05 16:52:35,718] A new study created in memory with name: no-name-a6e23d23-1d02-479d-80c6-e0fff4cf56c6
INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b4.ra2_in1k)
INFO:timm.models._hub:[timm/efficientnet_b4.ra2_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
INFO:timm.models._builder:Missing keys (classifier.weight, classifier.bias) discovered while loading pretrained weights. This is expected if model is being adapted.
[W 2024-08-05 16:52:36,157] Trial 0 failed with parameters: {'img_size': 258, 'BATCH_SIZE': 19} because of the following error: RuntimeError('CUDA error: out of memory\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n').
Traceback (most recent call la

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


# Best Model

In [None]:
data_transform = A.Compose([
        A.Resize(height = img_size, width = img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

# TEST

In [None]:
tst_dataset = ImageDataset(
    TEST_CSV_PATH,
    TEST_IMAGE_PATH,
    transform = data_transform
)

tst_loader = DataLoader(
    tst_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False,
    num_workers = 0,
    pin_memory = True
)

In [None]:
preds_list = []

best_model.eval()

for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = best_model(image)
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv(f"{RESULT_CSV_PATH}/base_gaussian.csv", index=False)

100%|██████████| 99/99 [00:08<00:00, 11.10it/s]
