In [1]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

In [8]:
# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [9]:
# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

In [10]:
# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [11]:
# 하이퍼파라미터 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = 'datasets_fin/'
model_name = 'efficientnet_b0'
img_size = 384
LR = 1e-3
EPOCHS = 30
BATCH_SIZE = 32
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [14]:
# 데이터 로드 및 분할
df = pd.read_csv("../data/train.csv")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, "../data/train_preprocessed/", transform=train_transform)
val_dataset = ImageDataset(val_df, "../data/train_preprocessed/", transform=val_transform)
test_dataset = ImageDataset("../data/sample_submission.csv", "../data/test_preprocessed/", transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

In [15]:
# 모델 설정
model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

Loss: 0.1130: 100%|██████████| 40/40 [00:05<00:00,  7.45it/s]


Epoch 1/30
Train Loss: 0.9060, Train Acc: 0.7174, Train F1: 0.6925
Val Loss: 0.6001, Val Acc: 0.8153, Val F1: 0.7782


Loss: 0.1416: 100%|██████████| 40/40 [00:04<00:00,  8.37it/s]


Epoch 2/30
Train Loss: 0.4140, Train Acc: 0.8495, Train F1: 0.8396
Val Loss: 0.3469, Val Acc: 0.8822, Val F1: 0.8628


Loss: 0.3600: 100%|██████████| 40/40 [00:04<00:00,  8.39it/s]


Epoch 3/30
Train Loss: 0.2603, Train Acc: 0.9037, Train F1: 0.8929
Val Loss: 0.3150, Val Acc: 0.8726, Val F1: 0.8753


Loss: 0.1306: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 4/30
Train Loss: 0.2103, Train Acc: 0.9252, Train F1: 0.9186
Val Loss: 0.3561, Val Acc: 0.8854, Val F1: 0.8746


Loss: 0.4894: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 5/30
Train Loss: 0.2157, Train Acc: 0.9260, Train F1: 0.9206
Val Loss: 0.3772, Val Acc: 0.8694, Val F1: 0.8661


Loss: 0.0141: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 6/30
Train Loss: 0.2505, Train Acc: 0.9108, Train F1: 0.9001
Val Loss: 0.2373, Val Acc: 0.9172, Val F1: 0.9110


Loss: 0.0061: 100%|██████████| 40/40 [00:04<00:00,  8.30it/s]


Epoch 7/30
Train Loss: 0.1309, Train Acc: 0.9546, Train F1: 0.9538
Val Loss: 0.3174, Val Acc: 0.8854, Val F1: 0.8789


Loss: 1.0385: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 8/30
Train Loss: 0.1872, Train Acc: 0.9411, Train F1: 0.9359
Val Loss: 0.1808, Val Acc: 0.9331, Val F1: 0.9320


Loss: 0.1450: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 9/30
Train Loss: 0.1285, Train Acc: 0.9530, Train F1: 0.9522
Val Loss: 0.2793, Val Acc: 0.9363, Val F1: 0.9321


Loss: 0.0389: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 10/30
Train Loss: 0.1008, Train Acc: 0.9586, Train F1: 0.9560
Val Loss: 0.2669, Val Acc: 0.9427, Val F1: 0.9370


Loss: 0.0446: 100%|██████████| 40/40 [00:04<00:00,  8.39it/s]


Epoch 11/30
Train Loss: 0.0743, Train Acc: 0.9705, Train F1: 0.9688
Val Loss: 0.1879, Val Acc: 0.9490, Val F1: 0.9444


Loss: 0.0410: 100%|██████████| 40/40 [00:04<00:00,  8.38it/s]


Epoch 12/30
Train Loss: 0.0324, Train Acc: 0.9912, Train F1: 0.9901
Val Loss: 0.2511, Val Acc: 0.9268, Val F1: 0.9242


Loss: 0.2802: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 13/30
Train Loss: 0.0279, Train Acc: 0.9920, Train F1: 0.9919
Val Loss: 0.2367, Val Acc: 0.9268, Val F1: 0.9258


Loss: 0.0516: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 14/30
Train Loss: 0.0377, Train Acc: 0.9849, Train F1: 0.9846
Val Loss: 0.2276, Val Acc: 0.9395, Val F1: 0.9379


Loss: 0.0086: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 15/30
Train Loss: 0.0215, Train Acc: 0.9928, Train F1: 0.9930
Val Loss: 0.2322, Val Acc: 0.9331, Val F1: 0.9324


Loss: 0.0789: 100%|██████████| 40/40 [00:04<00:00,  8.35it/s]


Epoch 16/30
Train Loss: 0.0168, Train Acc: 0.9952, Train F1: 0.9956
Val Loss: 0.2281, Val Acc: 0.9427, Val F1: 0.9403


Loss: 0.0026: 100%|██████████| 40/40 [00:04<00:00,  8.37it/s]


Epoch 17/30
Train Loss: 0.0122, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.2909, Val Acc: 0.9204, Val F1: 0.9209


Loss: 0.0008: 100%|██████████| 40/40 [00:04<00:00,  8.24it/s]


Epoch 18/30
Train Loss: 0.0095, Train Acc: 0.9960, Train F1: 0.9953
Val Loss: 0.2420, Val Acc: 0.9331, Val F1: 0.9299


Loss: 0.1733: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 19/30
Train Loss: 0.0150, Train Acc: 0.9960, Train F1: 0.9956
Val Loss: 0.2472, Val Acc: 0.9363, Val F1: 0.9355


Loss: 0.0091: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 20/30
Train Loss: 0.0159, Train Acc: 0.9952, Train F1: 0.9945
Val Loss: 0.2077, Val Acc: 0.9331, Val F1: 0.9326


Loss: 0.1013: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 21/30
Train Loss: 0.0132, Train Acc: 0.9968, Train F1: 0.9963
Val Loss: 0.2194, Val Acc: 0.9331, Val F1: 0.9300


Loss: 0.0219: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 22/30
Train Loss: 0.0046, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2410, Val Acc: 0.9363, Val F1: 0.9327


Loss: 0.0002: 100%|██████████| 40/40 [00:04<00:00,  8.29it/s]


Epoch 23/30
Train Loss: 0.0074, Train Acc: 0.9976, Train F1: 0.9974
Val Loss: 0.2486, Val Acc: 0.9331, Val F1: 0.9310


Loss: 0.0099: 100%|██████████| 40/40 [00:04<00:00,  8.27it/s]


Epoch 24/30
Train Loss: 0.0049, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.2403, Val Acc: 0.9331, Val F1: 0.9296


Loss: 0.0272: 100%|██████████| 40/40 [00:04<00:00,  8.31it/s]


Epoch 25/30
Train Loss: 0.0042, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.2467, Val Acc: 0.9363, Val F1: 0.9325


Loss: 0.0313: 100%|██████████| 40/40 [00:04<00:00,  8.22it/s]


Epoch 26/30
Train Loss: 0.0073, Train Acc: 0.9984, Train F1: 0.9978
Val Loss: 0.2604, Val Acc: 0.9331, Val F1: 0.9316


Loss: 0.0069: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 27/30
Train Loss: 0.0059, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.2480, Val Acc: 0.9299, Val F1: 0.9284


Loss: 0.0111: 100%|██████████| 40/40 [00:04<00:00,  8.35it/s]


Epoch 28/30
Train Loss: 0.0027, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2428, Val Acc: 0.9363, Val F1: 0.9323


Loss: 0.0001: 100%|██████████| 40/40 [00:04<00:00,  8.35it/s]


Epoch 29/30
Train Loss: 0.0046, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.2480, Val Acc: 0.9395, Val F1: 0.9368


Loss: 0.0149: 100%|██████████| 40/40 [00:04<00:00,  8.31it/s]


Epoch 30/30
Train Loss: 0.0053, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.2419, Val Acc: 0.9363, Val F1: 0.9339


100%|██████████| 99/99 [00:03<00:00, 28.18it/s]

Prediction completed and saved to pred.csv



