# CNN 기반 모델

In [66]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

## Simple EfficientNet-B0

In [67]:
# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [68]:
# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

In [69]:
# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [70]:
# 하이퍼파라미터 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'efficientnet_b0'
img_size = 384
LR = 1e-3
EPOCHS = 30
BATCH_SIZE = 32
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [71]:
# 데이터 로드 및 분할
df = pd.read_csv("../data/train.csv")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, "../data/train_preprocessed/", transform=train_transform)
val_dataset = ImageDataset(val_df, "../data/train_preprocessed/", transform=val_transform)
test_dataset = ImageDataset("../data/sample_submission.csv", "../data/test_preprocessed/", transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

In [72]:
# 모델 설정
model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [73]:
from torchsummary import summary

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)
    
# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 모델 아키텍처 출력
print("\nModel architecture:")
print(model)


Model structure of efficientnet_b0:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 192, 192]             864
          Identity-2         [-1, 32, 192, 192]               0
              SiLU-3         [-1, 32, 192, 192]               0
    BatchNormAct2d-4         [-1, 32, 192, 192]              64
            Conv2d-5         [-1, 32, 192, 192]             288
          Identity-6         [-1, 32, 192, 192]               0
              SiLU-7         [-1, 32, 192, 192]               0
    BatchNormAct2d-8         [-1, 32, 192, 192]              64
            Conv2d-9              [-1, 8, 1, 1]             264
             SiLU-10              [-1, 8, 1, 1]               0
           Conv2d-11             [-1, 32, 1, 1]             288
          Sigmoid-12             [-1, 32, 1, 1]               0
    SqueezeExcite-13         [-1, 32, 192, 192]               0
  

In [17]:
# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

Loss: 0.5717: 100%|██████████| 40/40 [00:05<00:00,  7.09it/s]


Epoch 1/30
Train Loss: 0.9814, Train Acc: 0.6998, Train F1: 0.6748
Val Loss: 0.6068, Val Acc: 0.8248, Val F1: 0.8014


Loss: 0.2583: 100%|██████████| 40/40 [00:04<00:00,  8.28it/s]


Epoch 2/30
Train Loss: 0.3801, Train Acc: 0.8774, Train F1: 0.8633
Val Loss: 0.5170, Val Acc: 0.8599, Val F1: 0.8494


Loss: 0.2990: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 3/30
Train Loss: 0.3013, Train Acc: 0.8854, Train F1: 0.8729
Val Loss: 0.3384, Val Acc: 0.8917, Val F1: 0.8819


Loss: 0.1913: 100%|██████████| 40/40 [00:04<00:00,  8.44it/s]


Epoch 4/30
Train Loss: 0.2596, Train Acc: 0.9005, Train F1: 0.8936
Val Loss: 0.2837, Val Acc: 0.9076, Val F1: 0.9004


Loss: 0.0928: 100%|██████████| 40/40 [00:04<00:00,  8.39it/s]


Epoch 5/30
Train Loss: 0.1830, Train Acc: 0.9299, Train F1: 0.9253
Val Loss: 0.3366, Val Acc: 0.8981, Val F1: 0.8972


Loss: 0.2278: 100%|██████████| 40/40 [00:04<00:00,  8.40it/s]


Epoch 6/30
Train Loss: 0.2071, Train Acc: 0.9268, Train F1: 0.9223
Val Loss: 0.4114, Val Acc: 0.8885, Val F1: 0.8767


Loss: 0.2854: 100%|██████████| 40/40 [00:04<00:00,  8.29it/s]


Epoch 7/30
Train Loss: 0.1562, Train Acc: 0.9530, Train F1: 0.9498
Val Loss: 0.3123, Val Acc: 0.9013, Val F1: 0.8946


Loss: 0.0753: 100%|██████████| 40/40 [00:04<00:00,  8.37it/s]


Epoch 8/30
Train Loss: 0.1782, Train Acc: 0.9411, Train F1: 0.9375
Val Loss: 0.2348, Val Acc: 0.9363, Val F1: 0.9331


Loss: 0.0269: 100%|██████████| 40/40 [00:04<00:00,  8.35it/s]


Epoch 9/30
Train Loss: 0.0971, Train Acc: 0.9658, Train F1: 0.9649
Val Loss: 0.3019, Val Acc: 0.9013, Val F1: 0.8947


Loss: 0.0446: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 10/30
Train Loss: 0.0920, Train Acc: 0.9682, Train F1: 0.9656
Val Loss: 0.2258, Val Acc: 0.9236, Val F1: 0.9172


Loss: 0.0209: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 11/30
Train Loss: 0.0641, Train Acc: 0.9801, Train F1: 0.9809
Val Loss: 0.2959, Val Acc: 0.9172, Val F1: 0.9142


Loss: 0.0052: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 12/30
Train Loss: 0.0577, Train Acc: 0.9809, Train F1: 0.9800
Val Loss: 0.2169, Val Acc: 0.9268, Val F1: 0.9249


Loss: 0.0020: 100%|██████████| 40/40 [00:04<00:00,  8.32it/s]


Epoch 13/30
Train Loss: 0.0424, Train Acc: 0.9857, Train F1: 0.9844
Val Loss: 0.2254, Val Acc: 0.9459, Val F1: 0.9411


Loss: 0.0041: 100%|██████████| 40/40 [00:04<00:00,  8.39it/s]


Epoch 14/30
Train Loss: 0.0198, Train Acc: 0.9928, Train F1: 0.9922
Val Loss: 0.2187, Val Acc: 0.9363, Val F1: 0.9296


Loss: 0.0009: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 15/30
Train Loss: 0.0213, Train Acc: 0.9936, Train F1: 0.9920
Val Loss: 0.2422, Val Acc: 0.9395, Val F1: 0.9353


Loss: 0.0615: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 16/30
Train Loss: 0.0148, Train Acc: 0.9952, Train F1: 0.9956
Val Loss: 0.2497, Val Acc: 0.9363, Val F1: 0.9334


Loss: 0.0003: 100%|██████████| 40/40 [00:04<00:00,  8.24it/s]


Epoch 17/30
Train Loss: 0.0127, Train Acc: 0.9960, Train F1: 0.9952
Val Loss: 0.2342, Val Acc: 0.9522, Val F1: 0.9504


Loss: 0.0056: 100%|██████████| 40/40 [00:04<00:00,  8.32it/s]


Epoch 18/30
Train Loss: 0.0084, Train Acc: 0.9960, Train F1: 0.9959
Val Loss: 0.2615, Val Acc: 0.9363, Val F1: 0.9372


Loss: 0.0001: 100%|██████████| 40/40 [00:04<00:00,  8.35it/s]


Epoch 19/30
Train Loss: 0.0085, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.2679, Val Acc: 0.9395, Val F1: 0.9388


Loss: 0.0309: 100%|██████████| 40/40 [00:04<00:00,  8.27it/s]


Epoch 20/30
Train Loss: 0.0111, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.2587, Val Acc: 0.9395, Val F1: 0.9378


Loss: 0.0079: 100%|██████████| 40/40 [00:04<00:00,  8.29it/s]


Epoch 21/30
Train Loss: 0.0076, Train Acc: 0.9976, Train F1: 0.9974
Val Loss: 0.2453, Val Acc: 0.9490, Val F1: 0.9465


Loss: 0.0425: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 22/30
Train Loss: 0.0051, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2542, Val Acc: 0.9490, Val F1: 0.9468


Loss: 0.0113: 100%|██████████| 40/40 [00:04<00:00,  8.36it/s]


Epoch 23/30
Train Loss: 0.0127, Train Acc: 0.9960, Train F1: 0.9963
Val Loss: 0.2502, Val Acc: 0.9459, Val F1: 0.9441


Loss: 0.0999: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 24/30
Train Loss: 0.0095, Train Acc: 0.9968, Train F1: 0.9964
Val Loss: 0.2298, Val Acc: 0.9459, Val F1: 0.9434


Loss: 0.0020: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 25/30
Train Loss: 0.0046, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.2431, Val Acc: 0.9459, Val F1: 0.9462


Loss: 0.0016: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 26/30
Train Loss: 0.0056, Train Acc: 0.9976, Train F1: 0.9967
Val Loss: 0.2382, Val Acc: 0.9427, Val F1: 0.9424


Loss: 0.2913: 100%|██████████| 40/40 [00:04<00:00,  8.31it/s]


Epoch 27/30
Train Loss: 0.0131, Train Acc: 0.9976, Train F1: 0.9971
Val Loss: 0.2442, Val Acc: 0.9427, Val F1: 0.9430


Loss: 0.0001: 100%|██████████| 40/40 [00:04<00:00,  8.33it/s]


Epoch 28/30
Train Loss: 0.0028, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2432, Val Acc: 0.9427, Val F1: 0.9424


Loss: 0.0032: 100%|██████████| 40/40 [00:04<00:00,  8.34it/s]


Epoch 29/30
Train Loss: 0.0035, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.2408, Val Acc: 0.9427, Val F1: 0.9424


Loss: 0.0001: 100%|██████████| 40/40 [00:04<00:00,  8.32it/s]


Epoch 30/30
Train Loss: 0.0019, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2418, Val Acc: 0.9427, Val F1: 0.9433


In [None]:
# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

In [None]:
# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

## EfficientNet-B4

In [2]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((380, 380, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'efficientnet_b4'
    img_size = 380  # EfficientNet-B4에 적합한 이미지 크기
    LR = 5e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 16  # 배치 크기 감소
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

model.safetensors:   0%|          | 0.00/77.9M [00:00<?, ?B/s]


Model structure of efficientnet_b4:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 48, 190, 190]           1,296
          Identity-2         [-1, 48, 190, 190]               0
              SiLU-3         [-1, 48, 190, 190]               0
    BatchNormAct2d-4         [-1, 48, 190, 190]              96
            Conv2d-5         [-1, 48, 190, 190]             432
          Identity-6         [-1, 48, 190, 190]               0
              SiLU-7         [-1, 48, 190, 190]               0
    BatchNormAct2d-8         [-1, 48, 190, 190]              96
            Conv2d-9             [-1, 12, 1, 1]             588
             SiLU-10             [-1, 12, 1, 1]               0
           Conv2d-11             [-1, 48, 1, 1]             624
          Sigmoid-12             [-1, 48, 1, 1]               0
    SqueezeExcite-13         [-1, 48, 190, 190]               0
  

Loss: 0.3497: 100%|██████████| 79/79 [00:13<00:00,  6.01it/s]


Epoch 1/30
Train Loss: 1.1318, Train Acc: 0.6553, Train F1: 0.6248
Val Loss: 0.4453, Val Acc: 0.8567, Val F1: 0.7981


Loss: 1.3697: 100%|██████████| 79/79 [00:12<00:00,  6.32it/s]


Epoch 2/30
Train Loss: 0.3227, Train Acc: 0.8941, Train F1: 0.8863
Val Loss: 0.2545, Val Acc: 0.9076, Val F1: 0.9035


Loss: 0.0292: 100%|██████████| 79/79 [00:12<00:00,  6.32it/s]


Epoch 3/30
Train Loss: 0.2107, Train Acc: 0.9275, Train F1: 0.9217
Val Loss: 0.2675, Val Acc: 0.8981, Val F1: 0.8948


Loss: 0.1284: 100%|██████████| 79/79 [00:12<00:00,  6.32it/s]


Epoch 4/30
Train Loss: 0.1355, Train Acc: 0.9538, Train F1: 0.9504
Val Loss: 0.2736, Val Acc: 0.8949, Val F1: 0.8829


Loss: 0.0188: 100%|██████████| 79/79 [00:12<00:00,  6.30it/s]


Epoch 5/30
Train Loss: 0.1085, Train Acc: 0.9594, Train F1: 0.9569
Val Loss: 0.3185, Val Acc: 0.9045, Val F1: 0.8976


Loss: 0.1565: 100%|██████████| 79/79 [00:12<00:00,  6.29it/s]


Epoch 6/30
Train Loss: 0.1098, Train Acc: 0.9650, Train F1: 0.9619
Val Loss: 0.2225, Val Acc: 0.9299, Val F1: 0.9259


Loss: 0.2154: 100%|██████████| 79/79 [00:12<00:00,  6.29it/s]


Epoch 7/30
Train Loss: 0.0748, Train Acc: 0.9753, Train F1: 0.9743
Val Loss: 0.2327, Val Acc: 0.9204, Val F1: 0.9157


Loss: 0.0337: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 8/30
Train Loss: 0.0514, Train Acc: 0.9769, Train F1: 0.9762
Val Loss: 0.2508, Val Acc: 0.9140, Val F1: 0.9106


Loss: 0.0258: 100%|██████████| 79/79 [00:12<00:00,  6.26it/s]


Epoch 9/30
Train Loss: 0.0599, Train Acc: 0.9801, Train F1: 0.9805
Val Loss: 0.2796, Val Acc: 0.9172, Val F1: 0.9117


Loss: 0.0517: 100%|██████████| 79/79 [00:12<00:00,  6.28it/s]


Epoch 10/30
Train Loss: 0.0374, Train Acc: 0.9904, Train F1: 0.9895
Val Loss: 0.2305, Val Acc: 0.9459, Val F1: 0.9446


Loss: 0.0350: 100%|██████████| 79/79 [00:12<00:00,  6.28it/s]


Epoch 11/30
Train Loss: 0.0282, Train Acc: 0.9896, Train F1: 0.9888
Val Loss: 0.2369, Val Acc: 0.9299, Val F1: 0.9262


Loss: 0.0249: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 12/30
Train Loss: 0.0381, Train Acc: 0.9881, Train F1: 0.9865
Val Loss: 0.3124, Val Acc: 0.9140, Val F1: 0.9083


Loss: 0.2908: 100%|██████████| 79/79 [00:12<00:00,  6.26it/s]


Epoch 13/30
Train Loss: 0.0244, Train Acc: 0.9920, Train F1: 0.9914
Val Loss: 0.2506, Val Acc: 0.9363, Val F1: 0.9304


Loss: 0.0009: 100%|██████████| 79/79 [00:12<00:00,  6.29it/s]


Epoch 14/30
Train Loss: 0.0287, Train Acc: 0.9912, Train F1: 0.9915
Val Loss: 0.2410, Val Acc: 0.9204, Val F1: 0.9136


Loss: 0.0005: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 15/30
Train Loss: 0.0075, Train Acc: 0.9976, Train F1: 0.9975
Val Loss: 0.2818, Val Acc: 0.9363, Val F1: 0.9292


Loss: 0.0000: 100%|██████████| 79/79 [00:12<00:00,  6.26it/s]


Epoch 16/30
Train Loss: 0.0065, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.2787, Val Acc: 0.9363, Val F1: 0.9323


Loss: 0.0670: 100%|██████████| 79/79 [00:12<00:00,  6.26it/s]


Epoch 17/30
Train Loss: 0.0107, Train Acc: 0.9968, Train F1: 0.9966
Val Loss: 0.3376, Val Acc: 0.9331, Val F1: 0.9271


Loss: 0.0272: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 18/30
Train Loss: 0.0241, Train Acc: 0.9912, Train F1: 0.9901
Val Loss: 0.2828, Val Acc: 0.9299, Val F1: 0.9257


Loss: 0.0000: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 19/30
Train Loss: 0.0138, Train Acc: 0.9960, Train F1: 0.9960
Val Loss: 0.3082, Val Acc: 0.9395, Val F1: 0.9339


Loss: 0.0074: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 20/30
Train Loss: 0.0074, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2741, Val Acc: 0.9331, Val F1: 0.9296


Loss: 0.0008: 100%|██████████| 79/79 [00:12<00:00,  6.28it/s]


Epoch 21/30
Train Loss: 0.0034, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2727, Val Acc: 0.9268, Val F1: 0.9207


Loss: 0.0008: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 22/30
Train Loss: 0.0071, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2840, Val Acc: 0.9331, Val F1: 0.9279


Loss: 0.0016: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 23/30
Train Loss: 0.0050, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.3020, Val Acc: 0.9268, Val F1: 0.9209


Loss: 0.0082: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 24/30
Train Loss: 0.0033, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2958, Val Acc: 0.9331, Val F1: 0.9270


Loss: 0.0002: 100%|██████████| 79/79 [00:12<00:00,  6.25it/s]


Epoch 25/30
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3029, Val Acc: 0.9268, Val F1: 0.9218


Loss: 0.0001: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 26/30
Train Loss: 0.0036, Train Acc: 0.9984, Train F1: 0.9978
Val Loss: 0.2988, Val Acc: 0.9299, Val F1: 0.9214


Loss: 0.0721: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 27/30
Train Loss: 0.0021, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2997, Val Acc: 0.9331, Val F1: 0.9265


Loss: 0.0000: 100%|██████████| 79/79 [00:12<00:00,  6.28it/s]


Epoch 28/30
Train Loss: 0.0039, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3103, Val Acc: 0.9299, Val F1: 0.9255


Loss: 0.0001: 100%|██████████| 79/79 [00:12<00:00,  6.28it/s]


Epoch 29/30
Train Loss: 0.0018, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3224, Val Acc: 0.9268, Val F1: 0.9205


Loss: 0.0012: 100%|██████████| 79/79 [00:12<00:00,  6.27it/s]


Epoch 30/30
Train Loss: 0.0019, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3243, Val Acc: 0.9299, Val F1: 0.9229


100%|██████████| 197/197 [00:08<00:00, 22.79it/s]

Prediction completed and saved to pred.csv





## ConvNext V2 Large 모델

In [3]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'convnextv2_large'
    img_size = 224  # ConvNeXt V2 Large에 적합한 이미지 크기
    LR = 1e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "convNext_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("convNext_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

model.safetensors:   0%|          | 0.00/792M [00:00<?, ?B/s]


Model structure of convnextv2_large:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
       LayerNorm2d-2          [-1, 192, 56, 56]             384
          Identity-3          [-1, 192, 56, 56]               0
            Conv2d-4          [-1, 192, 56, 56]           9,600
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6          [-1, 56, 56, 768]         148,224
              GELU-7          [-1, 56, 56, 768]               0
           Dropout-8          [-1, 56, 56, 768]               0
GlobalResponseNorm-9          [-1, 56, 56, 768]           1,536
           Linear-10          [-1, 56, 56, 192]         147,648
          Dropout-11          [-1, 56, 56, 192]               0
GlobalResponseNormMlp-12          [-1, 56, 56, 192]               0
         Identity-13          [-1, 192, 56, 56]              

Loss: 0.2964: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 1/30
Train Loss: 0.9600, Train Acc: 0.6959, Train F1: 0.6740
Val Loss: 0.4322, Val Acc: 0.8631, Val F1: 0.8098


Loss: 0.2529: 100%|██████████| 40/40 [00:29<00:00,  1.33it/s]


Epoch 2/30
Train Loss: 0.3619, Train Acc: 0.8646, Train F1: 0.8471
Val Loss: 0.2831, Val Acc: 0.8854, Val F1: 0.8503


Loss: 0.1453: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 3/30
Train Loss: 0.2100, Train Acc: 0.9156, Train F1: 0.9062
Val Loss: 0.2002, Val Acc: 0.9045, Val F1: 0.8900


Loss: 0.1877: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 4/30
Train Loss: 0.1640, Train Acc: 0.9355, Train F1: 0.9296
Val Loss: 0.2879, Val Acc: 0.9013, Val F1: 0.8905


Loss: 0.1187: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 5/30
Train Loss: 0.1153, Train Acc: 0.9514, Train F1: 0.9525
Val Loss: 0.1906, Val Acc: 0.9299, Val F1: 0.9265


Loss: 0.0913: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 6/30
Train Loss: 0.1035, Train Acc: 0.9594, Train F1: 0.9576
Val Loss: 0.2150, Val Acc: 0.9204, Val F1: 0.9159


Loss: 0.0023: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 7/30
Train Loss: 0.0806, Train Acc: 0.9682, Train F1: 0.9666
Val Loss: 0.2323, Val Acc: 0.9140, Val F1: 0.9072


Loss: 0.0121: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 8/30
Train Loss: 0.0602, Train Acc: 0.9777, Train F1: 0.9782
Val Loss: 0.1830, Val Acc: 0.9363, Val F1: 0.9342


Loss: 0.0047: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 9/30
Train Loss: 0.0543, Train Acc: 0.9785, Train F1: 0.9783
Val Loss: 0.2719, Val Acc: 0.9045, Val F1: 0.8931


Loss: 0.0010: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 10/30
Train Loss: 0.0550, Train Acc: 0.9761, Train F1: 0.9752
Val Loss: 0.2324, Val Acc: 0.9108, Val F1: 0.8992


Loss: 0.2174: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 11/30
Train Loss: 0.0490, Train Acc: 0.9849, Train F1: 0.9856
Val Loss: 0.1743, Val Acc: 0.9363, Val F1: 0.9340


Loss: 0.6106: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 12/30
Train Loss: 0.0573, Train Acc: 0.9849, Train F1: 0.9850
Val Loss: 0.1767, Val Acc: 0.9395, Val F1: 0.9392


Loss: 0.1281: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 13/30
Train Loss: 0.0872, Train Acc: 0.9745, Train F1: 0.9751
Val Loss: 0.2215, Val Acc: 0.9172, Val F1: 0.9072


Loss: 0.0006: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 14/30
Train Loss: 0.0412, Train Acc: 0.9889, Train F1: 0.9886
Val Loss: 0.2170, Val Acc: 0.9331, Val F1: 0.9309


Loss: 0.0009: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 15/30
Train Loss: 0.0256, Train Acc: 0.9912, Train F1: 0.9908
Val Loss: 0.2424, Val Acc: 0.9299, Val F1: 0.9262


Loss: 0.0707: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 16/30
Train Loss: 0.0138, Train Acc: 0.9952, Train F1: 0.9946
Val Loss: 0.2308, Val Acc: 0.9268, Val F1: 0.9233


Loss: 0.0005: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 17/30
Train Loss: 0.0098, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.2616, Val Acc: 0.9204, Val F1: 0.9159


Loss: 0.0017: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 18/30
Train Loss: 0.0083, Train Acc: 0.9976, Train F1: 0.9975
Val Loss: 0.2414, Val Acc: 0.9236, Val F1: 0.9230


Loss: 0.0002: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 19/30
Train Loss: 0.0101, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.2392, Val Acc: 0.9268, Val F1: 0.9243


Loss: 0.0020: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 20/30
Train Loss: 0.0043, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.2168, Val Acc: 0.9331, Val F1: 0.9353


Loss: 0.0002: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 21/30
Train Loss: 0.0036, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.2205, Val Acc: 0.9299, Val F1: 0.9298


Loss: 0.0004: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 22/30
Train Loss: 0.0039, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2330, Val Acc: 0.9268, Val F1: 0.9257


Loss: 0.0010: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 23/30
Train Loss: 0.0026, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.2459, Val Acc: 0.9268, Val F1: 0.9246


Loss: 0.0006: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 24/30
Train Loss: 0.0016, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2468, Val Acc: 0.9268, Val F1: 0.9246


Loss: 0.0017: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 25/30
Train Loss: 0.0010, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2471, Val Acc: 0.9299, Val F1: 0.9289


Loss: 0.0002: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 26/30
Train Loss: 0.0015, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2464, Val Acc: 0.9268, Val F1: 0.9260


Loss: 0.0005: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 27/30
Train Loss: 0.0014, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2477, Val Acc: 0.9268, Val F1: 0.9260


Loss: 0.0004: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 28/30
Train Loss: 0.0011, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2466, Val Acc: 0.9268, Val F1: 0.9260


Loss: 0.0003: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 29/30
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2470, Val Acc: 0.9268, Val F1: 0.9260


Loss: 0.0003: 100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


Epoch 30/30
Train Loss: 0.0008, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2472, Val Acc: 0.9268, Val F1: 0.9260


100%|██████████| 99/99 [00:18<00:00,  5.30it/s]

Prediction completed and saved to pred.csv





## Hyper Parameters Tunning With CNN Based Model

### ConvNeXt V2 Large 모델 + Optuna

In [20]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import optuna

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# Optuna를 이용한 하이퍼파라미터 최적화 함수
def objective(trial):
    # 하이퍼파라미터 탐색 공간 정의
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)

    # 데이터 로더 생성
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델, 손실 함수, 옵티마이저 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 학습 및 검증
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1

    return best_val_f1

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'convnextv2_large'
img_size = 224  # ConvNeXt V2에 적합한 이미지 크기
EPOCHS = 30
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

# Optuna를 이용한 하이퍼파라미터 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
print("Best hyperparameters:", best_params)

# 최적의 하이퍼파라미터로 최종 모델 학습
best_lr = best_params['lr']
best_batch_size = best_params['batch_size']
best_weight_decay = best_params['weight_decay']

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

### ConvNext V2 Large + WanDB Sweep
- pip install wandb
- wandb login

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import wandb
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
#class ImageDataset(Dataset):


# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
#def validate(loader, model, loss_fn, device):

# 모델 구조 출력 함수
#def print_model_summary(model, input_size):


# wandb sweep을 위한 학습 함수
def train():
    # wandb 초기화
    run = wandb.init(entity="cho") #사용자에 따라 자신의 도메인 네임 설정!!!
    config = wandb.config

    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = config.model_name
    img_size = config.img_size
    LR = config.learning_rate
    EPOCHS = config.epochs
    BATCH_SIZE = config.batch_size
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=config.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        # wandb에 로그 기록
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1": train_f1,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1": val_f1,
            "learning_rate": optimizer.param_groups[0]['lr']
        })

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")
            wandb.run.summary["best_val_f1"] = best_val_f1

    wandb.finish()

# wandb sweep 설정
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'val_f1',
        'goal': 'maximize'
    },
    'parameters': {
        'model_name': {
            'values': ['convnextv2_large', 'efficientnet_b4']
        },
        'learning_rate': {
            'distribution': 'uniform',
            'min': 1e-5,
            'max': 1e-3
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'img_size': {
            'values': [224, 256, 288]
        },
        'weight_decay': {
            'values': [1e-5, 1e-4, 1e-3]
        },
        'epochs': {
            'value': 30
        }
    }
}

# wandb sweep 실행 및 최고 성능 모델 찾기
sweep_id = wandb.sweep(sweep_config, project="cvmodel",entity="cho")
wandb.agent(sweep_id, train, count=30)

In [None]:

# 최고 성능 모델의 설정 가져오기
api = wandb.Api()
sweep = api.sweep(f"dl-12/cvmodel/{sweep_id}")
best_run = sweep.best_run()
best_config = best_run.config

  
# 최고 성능 모델의 설정 사용
model_name = best_config['model_name']
img_size = best_config['img_size']
BATCH_SIZE = best_config['batch_size']
num_workers = 4

# 테스트 데이터 변환
test_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 테스트 데이터셋 및 데이터로더 생성
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 최고 성능 모델 생성
model = timm.create_model(model_name, pretrained=False, num_classes=17).to(device)
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

# 테스트 데이터 추론
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

# wandb에 결과 업로드
wandb.init(project="cvmodel", name="best_model_prediction", entity="cho")
wandb.config.update(best_config)
wandb.save("pred.csv")
wandb.finish()

# Transformer 기반 모델

## Swin Transformers

In [4]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'swin_large_patch4_window7_224'  # Swin Transformer Large 모델
    img_size = 224  # Swin Transformer에 적합한 이미지 크기
    LR = 1e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "swin_t_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("swin_t_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred_swin.csv", index=False)
    print("Prediction completed and saved to pred_swin.csv")


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


model.safetensors:   0%|          | 0.00/788M [00:00<?, ?B/s]


Model structure of swin_large_patch4_window7_224:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
         LayerNorm-2          [-1, 56, 56, 192]             384
        PatchEmbed-3          [-1, 56, 56, 192]               0
          Identity-4          [-1, 56, 56, 192]               0
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6              [-1, 49, 576]         111,168
           Softmax-7            [-1, 6, 49, 49]               0
           Dropout-8            [-1, 6, 49, 49]               0
            Linear-9              [-1, 49, 192]          37,056
          Dropout-10              [-1, 49, 192]               0
  WindowAttention-11              [-1, 49, 192]               0
         Identity-12          [-1, 56, 56, 192]               0
        LayerNorm-13            [-1, 3136, 192]     

Loss: 0.4612: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 1/30
Train Loss: 1.0722, Train Acc: 0.6481, Train F1: 0.6209
Val Loss: 0.3306, Val Acc: 0.8631, Val F1: 0.8431


Loss: 0.1875: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Epoch 2/30
Train Loss: 0.3223, Train Acc: 0.8814, Train F1: 0.8692
Val Loss: 0.2665, Val Acc: 0.8981, Val F1: 0.8756


Loss: 0.0757: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 3/30
Train Loss: 0.2419, Train Acc: 0.9061, Train F1: 0.8955
Val Loss: 0.2920, Val Acc: 0.8758, Val F1: 0.8568


Loss: 0.0435: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 4/30
Train Loss: 0.1863, Train Acc: 0.9307, Train F1: 0.9238
Val Loss: 0.2198, Val Acc: 0.9172, Val F1: 0.8998


Loss: 0.0631: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 5/30
Train Loss: 0.1419, Train Acc: 0.9451, Train F1: 0.9424
Val Loss: 0.2341, Val Acc: 0.9172, Val F1: 0.9120


Loss: 0.0065: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 6/30
Train Loss: 0.1203, Train Acc: 0.9506, Train F1: 0.9470
Val Loss: 0.2091, Val Acc: 0.9363, Val F1: 0.9265


Loss: 0.0180: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 7/30
Train Loss: 0.1162, Train Acc: 0.9530, Train F1: 0.9516
Val Loss: 0.2048, Val Acc: 0.9268, Val F1: 0.9151


Loss: 0.0006: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 8/30
Train Loss: 0.0998, Train Acc: 0.9602, Train F1: 0.9608
Val Loss: 0.2000, Val Acc: 0.9363, Val F1: 0.9220


Loss: 0.0749: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 9/30
Train Loss: 0.0846, Train Acc: 0.9658, Train F1: 0.9626
Val Loss: 0.2467, Val Acc: 0.9172, Val F1: 0.9086


Loss: 0.0144: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 10/30
Train Loss: 0.0530, Train Acc: 0.9793, Train F1: 0.9795
Val Loss: 0.2025, Val Acc: 0.9331, Val F1: 0.9259


Loss: 0.1492: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 11/30
Train Loss: 0.0527, Train Acc: 0.9817, Train F1: 0.9815
Val Loss: 0.2261, Val Acc: 0.9268, Val F1: 0.9172


Loss: 0.0052: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 12/30
Train Loss: 0.0810, Train Acc: 0.9658, Train F1: 0.9652
Val Loss: 0.2181, Val Acc: 0.9236, Val F1: 0.9159


Loss: 0.0006: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 13/30
Train Loss: 0.0455, Train Acc: 0.9825, Train F1: 0.9830
Val Loss: 0.2441, Val Acc: 0.9363, Val F1: 0.9295


Loss: 0.0008: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 14/30
Train Loss: 0.0300, Train Acc: 0.9896, Train F1: 0.9893
Val Loss: 0.2459, Val Acc: 0.9204, Val F1: 0.9119


Loss: 0.0009: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 15/30
Train Loss: 0.0431, Train Acc: 0.9857, Train F1: 0.9840
Val Loss: 0.3954, Val Acc: 0.8917, Val F1: 0.8851


Loss: 0.0023: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 16/30
Train Loss: 0.0323, Train Acc: 0.9920, Train F1: 0.9909
Val Loss: 0.2798, Val Acc: 0.9268, Val F1: 0.9196


Loss: 0.0013: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 17/30
Train Loss: 0.0217, Train Acc: 0.9912, Train F1: 0.9901
Val Loss: 0.2950, Val Acc: 0.9299, Val F1: 0.9231


Loss: 0.0069: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 18/30
Train Loss: 0.0322, Train Acc: 0.9849, Train F1: 0.9842
Val Loss: 0.2850, Val Acc: 0.9172, Val F1: 0.9107


Loss: 0.0016: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 19/30
Train Loss: 0.0200, Train Acc: 0.9952, Train F1: 0.9956
Val Loss: 0.2581, Val Acc: 0.9299, Val F1: 0.9223


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 20/30
Train Loss: 0.0074, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2754, Val Acc: 0.9331, Val F1: 0.9269


Loss: 0.0015: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 21/30
Train Loss: 0.0083, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2848, Val Acc: 0.9363, Val F1: 0.9306


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 22/30
Train Loss: 0.0053, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.2906, Val Acc: 0.9299, Val F1: 0.9244


Loss: 0.0005: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 23/30
Train Loss: 0.0047, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2852, Val Acc: 0.9331, Val F1: 0.9282


Loss: 0.0016: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 24/30
Train Loss: 0.0048, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.2906, Val Acc: 0.9331, Val F1: 0.9273


Loss: 0.0030: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 25/30
Train Loss: 0.0045, Train Acc: 0.9984, Train F1: 0.9979
Val Loss: 0.2823, Val Acc: 0.9363, Val F1: 0.9301


Loss: 0.0057: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 26/30
Train Loss: 0.0043, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2812, Val Acc: 0.9363, Val F1: 0.9310


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 27/30
Train Loss: 0.0027, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.2809, Val Acc: 0.9363, Val F1: 0.9310


Loss: 0.0006: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 28/30
Train Loss: 0.0045, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.2802, Val Acc: 0.9363, Val F1: 0.9310


Loss: 0.0023: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 29/30
Train Loss: 0.0032, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2809, Val Acc: 0.9395, Val F1: 0.9342


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 30/30
Train Loss: 0.0024, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.2809, Val Acc: 0.9395, Val F1: 0.9342


100%|██████████| 99/99 [00:18<00:00,  5.42it/s]

Prediction completed and saved to pred.csv





## Swin-T clustering & classification
- 이미지를 유사한 이미지로 5개로 그룹핑 하고 분석하는 모델

In [7]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Swin-B 모델 로드
def load_swin_b_model(num_classes=None):
    model = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=num_classes)
    return model

# 특성 추출 함수
def extract_features(img_path, model):
    transform = A.Compose([
        A.Resize(224, 224),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])
    img = Image.open(img_path).convert('RGB')
    img = np.array(img)
    img = transform(image=img)['image']
    img = img.unsqueeze(0)
    
    with torch.no_grad():
        features = model.forward_features(img)
    return features.squeeze().numpy()

# 이미지 클러스터링 함수
def cluster_images(data_path, n_clusters=5):
    feature_extractor = load_swin_b_model(num_classes=None)
    feature_extractor.eval()
    
    image_files = [f for f in os.listdir(data_path) if f.endswith('.jpg') or f.endswith('.png')]
    features = []
    for img_file in tqdm(image_files, desc="Extracting features"):
        img_path = os.path.join(data_path, img_file)
        feature = extract_features(img_path, feature_extractor)
        features.append(feature.reshape(-1))  # Flatten the feature array
    
    features = np.array(features)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(features_scaled)
    
    return dict(zip(image_files, clusters))

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None, cluster_dict=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform
        self.cluster_dict = cluster_dict

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        
        cluster = self.cluster_dict.get(name, -1) if self.cluster_dict else -1
        return img, target, cluster

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets, _ in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets, _ in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    img_size = 224
    LR = 2e-5
    EPOCHS = 30
    BATCH_SIZE = 32
    num_workers = 4
    n_clusters = 3

    # 클러스터링 수행
    print("Clustering images...")
    cluster_dict = cluster_images(os.path.join(data_path, "train_preprocessed/"), n_clusters)

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform, cluster_dict=cluster_dict)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform, cluster_dict=cluster_dict)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 각 클러스터에 대한 모델 학습
    for cluster in range(n_clusters):
        print(f"\nTraining model for cluster {cluster}")
        
        # 클러스터에 해당하는 데이터만 선택
        train_cluster = [data for data in train_dataset if data[2] == cluster]
        val_cluster = [data for data in val_dataset if data[2] == cluster]
        
        if len(train_cluster) == 0 or len(val_cluster) == 0:
            print(f"Skipping cluster {cluster} due to insufficient data")
            continue
        
        train_cluster_loader = DataLoader(train_cluster, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_cluster_loader = DataLoader(val_cluster, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        # Swin-B 모델 설정
        model = load_swin_b_model(num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

        # Early stopping 설정
        early_stopping = EarlyStopping(patience=5, min_delta=0.001)

        # 모델 구조 출력
        print(f"\nModel structure of Swin-B for cluster {cluster}:")
        print_model_summary(model, (3, img_size, img_size))

        # 학습 루프
        best_val_f1 = 0
        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_cluster_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1 = validate(val_cluster_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"swin_b_model_cluster_{cluster}.pth")

            # Early stopping 체크
            early_stopping(val_loss)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    # 테스트 데이터 추론
    print("\nPerforming inference on test data")
    test_preds = []
    
    for cluster in range(n_clusters):
        model = load_swin_b_model(num_classes=17).to(device)
        model.load_state_dict(torch.load(f"swin_b_model_cluster_{cluster}.pth"))
        model.eval()
        
        cluster_preds = []
        for image, _, _ in tqdm(test_loader, desc=f"Predicting cluster {cluster}"):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            cluster_preds.extend(preds.detach().cpu().numpy())
        
        test_preds.append(cluster_preds)
    
    # 모든 클러스터의 예측을 결합
    final_preds = np.mean(test_preds, axis=0)
    final_preds = np.argmax(final_preds, axis=1)

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = final_preds
    pred_df.to_csv("swin_pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


Clustering images...


Extracting features:  65%|██████▍   | 1015/1570 [03:49<02:08,  4.33it/s]

# 앙상블
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(ViT Large)을 결합한 앙상블 모델
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'vit_large_patch16_224'를 사용합니다.

## convnext v2 + vit

In [22]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 앙상블 모델 클래스 정의
class EnsembleModel(nn.Module):
    def __init__(self, model1, model2):
        super(EnsembleModel, self).__init__()
        self.model1 = model1
        self.model2 = model2

    def forward(self, x):
        out1 = self.model1(x)
        out2 = self.model2(x)
        return (out1 + out2) / 2

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [23]:
# 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
img_size = 224
EPOCHS = 30
BATCH_SIZE = 16
LR = 1e-4
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 모델 설정
model1 = timm.create_model('convnextv2_large', pretrained=True, num_classes=17)
model2 = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=17)

ensemble_model = EnsembleModel(model1, model2).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(ensemble_model.parameters(), lr=LR, weight_decay=1e-5)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, ensemble_model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, ensemble_model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(ensemble_model.state_dict(), "best_ensemble_model.pth")

# 테스트 데이터 추론
ensemble_model.load_state_dict(torch.load("best_ensemble_model.pth"))
ensemble_model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = ensemble_model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Prediction completed and saved to ensemble_pred.csv")

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Loss: 0.4874: 100%|██████████| 79/79 [01:03<00:00,  1.24it/s]


Epoch 1/30
Train Loss: 1.2447, Train Acc: 0.6210, Train F1: 0.5802
Val Loss: 0.6365, Val Acc: 0.7930, Val F1: 0.7082


Loss: 0.4748: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 2/30
Train Loss: 0.4485, Train Acc: 0.8432, Train F1: 0.8210
Val Loss: 0.3519, Val Acc: 0.8535, Val F1: 0.8308


Loss: 0.0189: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 3/30
Train Loss: 0.3265, Train Acc: 0.8734, Train F1: 0.8555
Val Loss: 0.3140, Val Acc: 0.8854, Val F1: 0.8341


Loss: 0.2191: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 4/30
Train Loss: 0.2187, Train Acc: 0.9037, Train F1: 0.8929
Val Loss: 0.2905, Val Acc: 0.8949, Val F1: 0.8817


Loss: 0.4078: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 5/30
Train Loss: 0.2068, Train Acc: 0.9236, Train F1: 0.9159
Val Loss: 0.3957, Val Acc: 0.8726, Val F1: 0.8509


Loss: 0.2037: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 6/30
Train Loss: 0.2214, Train Acc: 0.9132, Train F1: 0.9023
Val Loss: 0.3843, Val Acc: 0.8790, Val F1: 0.8583


Loss: 0.0819: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 7/30
Train Loss: 0.2072, Train Acc: 0.9204, Train F1: 0.9188
Val Loss: 0.2120, Val Acc: 0.9268, Val F1: 0.9185


Loss: 0.1178: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 8/30
Train Loss: 0.1417, Train Acc: 0.9371, Train F1: 0.9341
Val Loss: 0.2074, Val Acc: 0.9268, Val F1: 0.9205


Loss: 0.0011: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 9/30
Train Loss: 0.1199, Train Acc: 0.9538, Train F1: 0.9516
Val Loss: 0.1952, Val Acc: 0.9236, Val F1: 0.9202


Loss: 0.0478: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 10/30
Train Loss: 0.1205, Train Acc: 0.9554, Train F1: 0.9560
Val Loss: 0.1598, Val Acc: 0.9331, Val F1: 0.9301


Loss: 0.0004: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 11/30
Train Loss: 0.0810, Train Acc: 0.9682, Train F1: 0.9671
Val Loss: 0.2216, Val Acc: 0.9172, Val F1: 0.9040


Loss: 0.0742: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 12/30
Train Loss: 0.0840, Train Acc: 0.9689, Train F1: 0.9682
Val Loss: 0.1806, Val Acc: 0.9459, Val F1: 0.9405


Loss: 0.0168: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 13/30
Train Loss: 0.0546, Train Acc: 0.9793, Train F1: 0.9786
Val Loss: 0.1571, Val Acc: 0.9554, Val F1: 0.9496


Loss: 0.0002: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 14/30
Train Loss: 0.0565, Train Acc: 0.9809, Train F1: 0.9805
Val Loss: 0.1615, Val Acc: 0.9331, Val F1: 0.9276


Loss: 0.0007: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 15/30
Train Loss: 0.0368, Train Acc: 0.9857, Train F1: 0.9849
Val Loss: 0.2248, Val Acc: 0.9268, Val F1: 0.9219


Loss: 0.3899: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 16/30
Train Loss: 0.0380, Train Acc: 0.9873, Train F1: 0.9871
Val Loss: 0.1591, Val Acc: 0.9427, Val F1: 0.9399


Loss: 0.0028: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 17/30
Train Loss: 0.0209, Train Acc: 0.9944, Train F1: 0.9949
Val Loss: 0.1868, Val Acc: 0.9363, Val F1: 0.9316


Loss: 0.4257: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 18/30
Train Loss: 0.0212, Train Acc: 0.9936, Train F1: 0.9934
Val Loss: 0.1774, Val Acc: 0.9395, Val F1: 0.9348


Loss: 0.0012: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 19/30
Train Loss: 0.0289, Train Acc: 0.9896, Train F1: 0.9892
Val Loss: 0.1960, Val Acc: 0.9427, Val F1: 0.9372


Loss: 0.0004: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 20/30
Train Loss: 0.0120, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.1968, Val Acc: 0.9268, Val F1: 0.9216


Loss: 0.0600: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 21/30
Train Loss: 0.0219, Train Acc: 0.9920, Train F1: 0.9918
Val Loss: 0.1881, Val Acc: 0.9395, Val F1: 0.9340


Loss: 0.0003: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 22/30
Train Loss: 0.0185, Train Acc: 0.9944, Train F1: 0.9945
Val Loss: 0.1885, Val Acc: 0.9427, Val F1: 0.9389


Loss: 0.0012: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 23/30
Train Loss: 0.0113, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.2007, Val Acc: 0.9363, Val F1: 0.9315


Loss: 0.0018: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 24/30
Train Loss: 0.0130, Train Acc: 0.9944, Train F1: 0.9945
Val Loss: 0.2242, Val Acc: 0.9363, Val F1: 0.9308


Loss: 0.0013: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 25/30
Train Loss: 0.0063, Train Acc: 0.9984, Train F1: 0.9978
Val Loss: 0.1974, Val Acc: 0.9363, Val F1: 0.9305


Loss: 0.0042: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 26/30
Train Loss: 0.0088, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.1919, Val Acc: 0.9427, Val F1: 0.9383


Loss: 0.0152: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 27/30
Train Loss: 0.0079, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.1917, Val Acc: 0.9459, Val F1: 0.9416


Loss: 0.0004: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 28/30
Train Loss: 0.0062, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.1923, Val Acc: 0.9427, Val F1: 0.9383


Loss: 0.0033: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 29/30
Train Loss: 0.0037, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.1917, Val Acc: 0.9427, Val F1: 0.9383


Loss: 0.0006: 100%|██████████| 79/79 [01:04<00:00,  1.23it/s]


Epoch 30/30
Train Loss: 0.0050, Train Acc: 0.9976, Train F1: 0.9967
Val Loss: 0.1915, Val Acc: 0.9427, Val F1: 0.9383


100%|██████████| 197/197 [00:46<00:00,  4.27it/s]

Prediction completed and saved to ensemble_pred.csv





## 앙상블 모델 II - 리더 보드 제출용
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(Swin Transformers)을 결합한 앙상블 모델
- Hyper Parameter tunning이 전혀 되어 있지 않는 기본 모델 : 향후 최적화 필요
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'swin_large_patch4_window7_224'를 사용합니다.
- software voting(기존 저장된 pth 사용)

In [5]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader

# 첫 번째 모델과 두 번째 모델 로드
model1 = timm.create_model('convnextv2_large', pretrained=False, num_classes=17).to(device)
model2 = timm.create_model('swin_large_patch4_window7_224', pretrained=False, num_classes=17).to(device)

# 모델 가중치 로드
model1.load_state_dict(torch.load('convNext_model.pth'))
model2.load_state_dict(torch.load('swin_t_model.pth'))

model1.eval()
model2.eval()

# 테스트 데이터 로드
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 소프트 보팅을 통한 예측
preds_list = []
with torch.no_grad():
    for image, _ in tqdm(test_loader):
        image = image.to(device)
        preds1 = model1(image)
        preds2 = model2(image)
        
        # 소프트 보팅: 예측 확률의 평균
        preds_avg = (torch.softmax(preds1, dim=1) + torch.softmax(preds2, dim=1)) / 2
        preds_list.extend(preds_avg.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Ensemble prediction completed and saved to ensemble_pred.csv")


100%|██████████| 99/99 [00:36<00:00,  2.74it/s]

Ensemble prediction completed and saved to ensemble_pred.csv





# layoutLMv3 모델
- 테스트용.

In [None]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

In [None]:
!pip install git+https://github.com/facebookresearch/segment-anything.git

In [1]:
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import LayoutLMv3Processor, LayoutLMv3ForSequenceClassification
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn

class DocumentDataset(Dataset):
    def __init__(self, csv, path, processor, label_encoder=None, max_length=512):
        if isinstance(csv, pd.DataFrame):
            self.df = csv
        else:
            self.df = pd.read_csv(csv)
        self.path = path
        self.processor = processor
        self.label_encoder = label_encoder
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        name = row['ID']
        img_path = os.path.join(self.path, name)
        image = Image.open(img_path).convert("RGB")
        
        encoding = self.processor(image, return_tensors="pt", max_length=self.max_length, padding="max_length", truncation=True)
        for k, v in encoding.items():
            encoding[k] = v.squeeze()
        
        if 'target_encoded' in row:
            encoding['labels'] = torch.tensor(row['target_encoded'], dtype=torch.long)
        else:
            encoding['labels'] = torch.tensor(-1, dtype=torch.long)  # For test set
        return encoding

class CustomLayoutLMv3(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.layoutlmv3 = LayoutLMv3ForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=num_labels)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(768, num_labels)

    def forward(self, **inputs):
        outputs = self.layoutlmv3(**inputs)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

def train_one_epoch(loader, model, optimizer, criterion, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for batch in pbar:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        optimizer.zero_grad()
        logits = model(**batch)
        loss = criterion(logits, batch['labels'])
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds = logits.argmax(dim=-1)
        preds_list.extend(preds.cpu().numpy())
        targets_list.extend(batch['labels'].cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

def validate(loader, model, criterion, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**batch)
            loss = criterion(logits, batch['labels'])

            val_loss += loss.item()
            preds = logits.argmax(dim=-1)
            preds_list.extend(preds.cpu().numpy())
            targets_list.extend(batch['labels'].cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    LR = 2e-5
    EPOCHS = 30
    BATCH_SIZE = 8
    num_workers = 4

    processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")

    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    
    # 레이블 인코딩
    label_encoder = LabelEncoder()
    df['target_encoded'] = label_encoder.fit_transform(df['target'])
    num_labels = len(df['target'].unique())

    print(f"Number of unique classes: {num_labels}")

    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = DocumentDataset(train_df, os.path.join(data_path, "train_preprocessed/"), processor, label_encoder)
    val_dataset = DocumentDataset(val_df, os.path.join(data_path, "train_preprocessed/"), processor, label_encoder)
    test_dataset = DocumentDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), processor)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)

    model = CustomLayoutLMv3(num_labels=num_labels)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)

    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, criterion, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, criterion, device)
        scheduler.step(val_f1)

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")

    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    preds_list = []

    with torch.no_grad():
        for batch in tqdm(test_loader):
            batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
            logits = model(**batch)
            preds = logits.argmax(dim=-1)
            preds_list.extend(preds.cpu().numpy())

    pred_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
    pred_df['target'] = label_encoder.inverse_transform(preds_list)
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

background 실행을 위해 .py 파일로 분리
- image_text.py
- image_bert.py
- layoutlmv3.py

# 3가지 모델 앙상블