## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

https://timm.fast.ai/


In [178]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

In [179]:
from dotenv import load_dotenv
from datetime import datetime
from zoneinfo import ZoneInfo
import wandb

load_dotenv()
api_key = os.getenv('WANDB_API_KEY')

wandb.login(key=api_key)

train_time = datetime.fromtimestamp(time.time(), tz=ZoneInfo("Asia/Seoul")).strftime("%Y%m%d-%H%M%S")
wandb.init(project="competition2-cv", name=f"run-{train_time}")

print(train_time)



VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
final_train_accuracy,▁▁
final_train_f1,▁▁
final_train_loss,▁█
final_valid_accuracy,▁▁
final_valid_f1,▁▁
final_valid_loss,▁▁
train_acc,▁
train_f1,▁
train_loss,▁

0,1
final_train_accuracy,0.43767
final_train_f1,0.36213
final_train_loss,2.0884
final_valid_accuracy,0.3121
final_valid_f1,0.26917
final_valid_loss,2.26323
train_acc,0.19745
train_f1,0.171
train_loss,2.62205


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112309040294753, max=1.0…

20240801-210548


In [180]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [181]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [182]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        
        #print(f"------ preds's shape = {preds.shape},  targets's shape = {targets.shape}")
        #print(f"---------- preds = {preds},  targets = {targets}")
        
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }
    
    # wandb에 학습 과정 로그
    wandb.log(ret)

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [183]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'datasets_fin/'

# model config
model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 32
LR = 1e-3
EPOCHS = 1
BATCH_SIZE = 32
num_workers = 0

wandb.config.update({
    "learning_rate": LR,
    "architecture": model_name,
    "dataset": "custom-dataset",
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE,
    "image_size": img_size
})

In [184]:
device

device(type='cuda')

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [185]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [186]:
# TODO target 분포를 고려해서 분할되도록..

def dataset_split(train_csv_path, img_dir, trn_transform, tst_transform, train_size=0.7, random_state=42):
    # CSV 파일 읽기
    train_df = pd.read_csv(train_csv_path)
    
    # 훈련 세트와 검증 세트로 분할
    train_df, val_df = train_test_split(
        train_df, 
        train_size=train_size, 
        random_state=random_state
    )
    
    print(f"훈련 세트: {len(train_df)} 샘플")
    print(f"검증 세트: {len(val_df)} 샘플")
    
    # 각 데이터프레임을 임시 CSV 파일로 저장
    train_df.to_csv('temp_train.csv', index=False)
    val_df.to_csv('temp_val.csv', index=False)
    
    # ImageDataset 생성
    train_dataset = ImageDataset('temp_train.csv', img_dir, transform=trn_transform)
    val_dataset = ImageDataset('temp_val.csv', img_dir, transform=tst_transform)
    
    # 임시 파일 삭제
    os.remove('temp_train.csv')
    os.remove('temp_val.csv')
    
    return train_dataset, val_dataset

In [187]:
# Dataset 정의

trn_dataset, val_dataset = dataset_split(
    "datasets_fin/train.csv",
    "datasets_fin/train/",
    trn_transform,
    tst_transform
)

tst_dataset = ImageDataset(
    "datasets_fin/sample_submission.csv",
    "datasets_fin/test/",
    transform=tst_transform
)


print(len(trn_dataset), len(val_dataset), len(tst_dataset))

훈련 세트: 1099 샘플
검증 세트: 471 샘플
1099 471 3140


In [188]:
trn_dataset.df

array([['d8324d4abf1bc4d3.jpg', 0],
       ['3400e4d2f3772cdd.jpg', 6],
       ['897eba0b45163f57.jpg', 0],
       ...,
       ['8626caa67beae2f0.jpg', 8],
       ['ebbe01aaff327255.jpg', 14],
       ['b41de9491b14ca00.jpg', 5]], dtype=object)

In [189]:
val_dataset.df

array([['f0d11155d48758de.jpg', 1],
       ['9d538d69133f4a12.jpg', 4],
       ['4620f6e53442f3b6.jpg', 3],
       ['99d7241ecca09eaf.jpg', 3],
       ['bbb3c7c897a279c6.jpg', 3],
       ['a4d5f12e10bc3f77.jpg', 2],
       ['fea6ad156a141a37.jpg', 8],
       ['957a9861a894c253.jpg', 6],
       ['f9ebb85e2929b388.jpg', 16],
       ['5a18fb6fb6906ba3.jpg', 2],
       ['09213af8d64b7e04.jpg', 10],
       ['45ec22e6b1b60734.jpg', 14],
       ['0808c7453316041c.jpg', 3],
       ['b809e8d859f82012.jpg', 6],
       ['a6468b3816c1325c.jpg', 11],
       ['513df60c6920cc57.jpg', 2],
       ['a1d70a5778fc75b2.jpg', 6],
       ['2db21acd1d1402a7.jpg', 10],
       ['f5ea433b1cf5757d.jpg', 13],
       ['2e5acd2c86fc6ad3.jpg', 3],
       ['045ddbf696513987.jpg', 15],
       ['f7b8a56acc3d85c7.jpg', 12],
       ['f15534042b2fe647.jpg', 2],
       ['04320d1d34f005ca.jpg', 14],
       ['1370c2d3fc1dbaa6.jpg', 16],
       ['1c4f24cbd0c3b67f.jpg', 5],
       ['926cb3040efcf9d4.jpg', 4],
       ['85abfc517

In [190]:
tst_dataset.df

array([['0008fdb22ddce0ce.jpg', 0],
       ['00091bffdffd83de.jpg', 0],
       ['00396fbc1f6cc21d.jpg', 0],
       ...,
       ['ffc2c91dff8cf2c0.jpg', 0],
       ['ffc4e330a5353a2a.jpg', 0],
       ['ffc71fed753d90c1.jpg', 0]], dtype=object)

In [191]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,               # TODO ??????
    num_workers=num_workers,    # TODO ??????
    pin_memory=True,            # TODO ??????
    drop_last=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [192]:
len(timm.list_models(pretrained=True))

1298

In [193]:
timm.list_models('efficientnet*', pretrained=True)

['efficientnet_b0.ra_in1k',
 'efficientnet_b1.ft_in1k',
 'efficientnet_b1_pruned.in1k',
 'efficientnet_b2.ra_in1k',
 'efficientnet_b2_pruned.in1k',
 'efficientnet_b3.ra2_in1k',
 'efficientnet_b3_pruned.in1k',
 'efficientnet_b4.ra2_in1k',
 'efficientnet_b5.sw_in12k',
 'efficientnet_b5.sw_in12k_ft_in1k',
 'efficientnet_el.ra_in1k',
 'efficientnet_el_pruned.in1k',
 'efficientnet_em.ra2_in1k',
 'efficientnet_es.ra_in1k',
 'efficientnet_es_pruned.in1k',
 'efficientnet_lite0.ra_in1k',
 'efficientnetv2_rw_m.agc_in1k',
 'efficientnetv2_rw_s.ra2_in1k',
 'efficientnetv2_rw_t.ra2_in1k']

In [194]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [195]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, m

In [196]:
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 2.4235: 100%|██████████| 35/35 [00:04<00:00,  8.06it/s]

train_loss: 2.6063
train_acc: 0.2084
train_f1: 0.1806
epoch: 0.0000






# 평가

In [197]:
def evaluate(loader, model, loss_fn, device=device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for image, targets in tqdm(loader, desc="Evaluating"):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            total_loss += loss.item()
            all_preds.extend(preds.argmax(dim=1).cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds, average='macro')

    # wandb에 평가 메트릭 로깅
    results = {
        "loss": avg_loss,
        "accuracy": accuracy,
        "f1": f1
    }
    #wandb.log(results)

    return avg_loss, accuracy, f1

train_results = evaluate(trn_loader, model, loss_fn)
valid_results = evaluate(val_loader, model, loss_fn)

# 평가 결과 로깅
log_dict = {
    "final_train_loss": train_results[0],
    "final_train_accuracy": train_results[1],
    "final_train_f1": train_results[2],
    "final_valid_loss": valid_results[0],
    "final_valid_accuracy": valid_results[1],
    "final_valid_f1": valid_results[2],
}

wandb.log(log_dict)

print()
for k, v in log_dict.items():
    print(f'{k}: {v}')

Evaluating: 100%|██████████| 35/35 [00:03<00:00,  9.63it/s]
Evaluating: 100%|██████████| 15/15 [00:01<00:00,  9.77it/s]


final_train_loss: 2.0664304631096977
final_train_accuracy: 0.41583257506824384
final_train_f1: 0.34215674508996574
final_valid_loss: 2.2318623860677085
final_valid_accuracy: 0.3630573248407643
final_valid_f1: 0.32455932778205576





# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [198]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:10<00:00,  9.26it/s]


In [199]:
preds_list

[2,
 6,
 9,
 11,
 2,
 16,
 0,
 8,
 10,
 9,
 5,
 3,
 4,
 9,
 11,
 11,
 12,
 9,
 1,
 11,
 4,
 6,
 16,
 11,
 0,
 9,
 12,
 9,
 1,
 12,
 11,
 12,
 1,
 2,
 8,
 16,
 1,
 3,
 2,
 2,
 3,
 9,
 1,
 9,
 0,
 16,
 9,
 0,
 11,
 10,
 6,
 12,
 10,
 12,
 3,
 16,
 5,
 5,
 3,
 2,
 1,
 11,
 11,
 9,
 4,
 12,
 9,
 9,
 0,
 11,
 1,
 9,
 11,
 3,
 10,
 3,
 12,
 12,
 11,
 9,
 8,
 16,
 9,
 12,
 9,
 9,
 5,
 11,
 11,
 0,
 12,
 12,
 9,
 3,
 10,
 9,
 1,
 11,
 9,
 9,
 7,
 11,
 5,
 11,
 0,
 9,
 16,
 10,
 16,
 9,
 11,
 16,
 4,
 10,
 11,
 16,
 16,
 7,
 10,
 0,
 16,
 8,
 5,
 9,
 0,
 2,
 12,
 11,
 12,
 10,
 16,
 6,
 11,
 10,
 16,
 16,
 8,
 11,
 12,
 9,
 1,
 9,
 10,
 4,
 12,
 16,
 0,
 7,
 3,
 3,
 12,
 12,
 4,
 8,
 11,
 9,
 9,
 7,
 12,
 8,
 9,
 9,
 9,
 10,
 3,
 4,
 4,
 12,
 4,
 9,
 16,
 9,
 8,
 9,
 9,
 9,
 9,
 9,
 4,
 7,
 11,
 4,
 9,
 9,
 16,
 0,
 9,
 12,
 9,
 0,
 9,
 9,
 0,
 9,
 12,
 9,
 5,
 11,
 0,
 7,
 12,
 16,
 12,
 16,
 9,
 10,
 0,
 9,
 3,
 12,
 8,
 12,
 11,
 9,
 11,
 16,
 0,
 16,
 0,
 9,
 11,
 11,
 16,
 2,
 11,
 9,
 2,


In [200]:
tst_dataset.df

array([['0008fdb22ddce0ce.jpg', 0],
       ['00091bffdffd83de.jpg', 0],
       ['00396fbc1f6cc21d.jpg', 0],
       ...,
       ['ffc2c91dff8cf2c0.jpg', 0],
       ['ffc4e330a5353a2a.jpg', 0],
       ['ffc71fed753d90c1.jpg', 0]], dtype=object)

In [201]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [202]:
pred_df

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,6
2,00396fbc1f6cc21d.jpg,9
3,00471f8038d9c4b6.jpg,11
4,00901f504008d884.jpg,2
...,...,...
3135,ffb4b6f619fb60ea.jpg,15
3136,ffb54299b1ad4159.jpg,12
3137,ffc2c91dff8cf2c0.jpg,8
3138,ffc4e330a5353a2a.jpg,0


In [203]:
sample_submission_df = pd.read_csv("datasets_fin/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [204]:
pred_df.to_csv("pred.csv", index=False)

In [205]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,6
2,00396fbc1f6cc21d.jpg,9
3,00471f8038d9c4b6.jpg,11
4,00901f504008d884.jpg,2


In [206]:
# wandb 실행 종료
wandb.finish()

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
final_train_accuracy,▁
final_train_f1,▁
final_train_loss,▁
final_valid_accuracy,▁
final_valid_f1,▁
final_valid_loss,▁
train_acc,▁
train_f1,▁
train_loss,▁

0,1
final_train_accuracy,0.41583
final_train_f1,0.34216
final_train_loss,2.06643
final_valid_accuracy,0.36306
final_valid_f1,0.32456
final_valid_loss,2.23186
train_acc,0.20837
train_f1,0.1806
train_loss,2.60633
