# CNN 기반 모델

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

## Simple EfficientNet-B0

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [None]:
# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

In [None]:
# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [None]:
# 하이퍼파라미터 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'efficientnet_b0'
img_size = 384
LR = 1e-3
EPOCHS = 30
BATCH_SIZE = 32
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# 데이터 로드 및 분할
df = pd.read_csv("../data/train.csv")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, "../data/train_preprocessed/", transform=train_transform)
val_dataset = ImageDataset(val_df, "../data/train_preprocessed/", transform=val_transform)
test_dataset = ImageDataset("../data/sample_submission.csv", "../data/test_preprocessed/", transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

In [None]:
# 모델 설정
model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [None]:
from torchsummary import summary

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)
    
# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 모델 아키텍처 출력
print("\nModel architecture:")
print(model)

In [None]:
# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

In [None]:
# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

In [None]:
# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

## EfficientNet-B4

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()


# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((380, 380, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'efficientnet_b4'
    img_size = 380  # EfficientNet-B4에 적합한 이미지 크기
    LR = 5e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 16  # 배치 크기 감소
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

## ConvNext V2 Large 모델

In [1]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [None]:
import os
import timm
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary


import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.best_f1 = -np.Inf

    def __call__(self, val_loss, f1_score, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, f1_score, model):
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        self.best_f1 = f1_score


# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'convnextv2_large'
    img_size = 224  # ConvNeXt V2 Large에 적합한 이미지 크기
    LR = 1e-4  # 학습률 조정
    EPOCHS = 100
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "augmented_train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "augmented_train/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "augmented_train/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    early_stopping = EarlyStopping(patience=7, verbose=True, delta=0.001, path='aug_conv_model.pth')

    # 모델 구조 출력
    # print(f"\nModel structure of {model_name}:")
    # print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "aug_conv_model.pth")
        
        early_stopping(val_loss, val_f1, model)
        if early_stopping.early_stop:
            print(f"Early stopping. Best validation loss: {early_stopping.val_loss_min:.6f}, "
                  f"Best F1 score: {early_stopping.best_f1:.6f}")
            break

    # 최종 모델 저장
    torch.save(model.state_dict(), "aug_conv_model_final.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("aug_conv_model_final.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("conv_pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


## convNext v2 + fine tunning

In [3]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []
    misclassified = []

    with torch.no_grad():
        for i, (image, targets) in enumerate(loader):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_np = preds.argmax(dim=1).detach().cpu().numpy()
            targets_np = targets.detach().cpu().numpy()
            preds_list.extend(preds_np)
            targets_list.extend(targets_np)

            # 오분류된 데이터의 인덱스 저장
            misclassified.extend(np.where(preds_np != targets_np)[0] + i * loader.batch_size)

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1, misclassified

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# EarlyStopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'convnextv2_large'
    img_size = 224
    LR = 1e-4
    EPOCHS = 30
    BATCH_SIZE = 32
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # EarlyStopping 초기화
    early_stopping = EarlyStopping(patience=5, min_delta=0.001)

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1, misclassified = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "convNext_model.pth")

        # Early Stopping 체크
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    # 오분류된 데이터로 fine-tuning
    misclassified_df = val_df.iloc[misclassified]
    misclassified_dataset = ImageDataset(misclassified_df, os.path.join(data_path, "train/"), transform=train_transform)
    misclassified_loader = DataLoader(misclassified_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)

    print("\nFine-tuning with misclassified data")
    for epoch in range(10):  # 10 에폭 동안 fine-tuning
        train_loss, train_acc, train_f1 = train_one_epoch(misclassified_loader, model, optimizer, loss_fn, device)
        print(f"Fine-tuning Epoch {epoch+1}/5")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")

    # 최종 모델 저장
    torch.save(model.state_dict(), "convNext_model_final.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("convNext_model_final.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("conv_fine_pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


Model structure of convnextv2_large:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
       LayerNorm2d-2          [-1, 192, 56, 56]             384
          Identity-3          [-1, 192, 56, 56]               0
            Conv2d-4          [-1, 192, 56, 56]           9,600
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6          [-1, 56, 56, 768]         148,224
              GELU-7          [-1, 56, 56, 768]               0
           Dropout-8          [-1, 56, 56, 768]               0
GlobalResponseNorm-9          [-1, 56, 56, 768]           1,536
           Linear-10          [-1, 56, 56, 192]         147,648
          Dropout-11          [-1, 56, 56, 192]               0
GlobalResponseNormMlp-12          [-1, 56, 56, 192]               0
         Identity-13          [-1, 192, 56, 56]              

Loss: 0.2666: 100%|██████████| 40/40 [00:29<00:00,  1.36it/s]


Epoch 1/30
Train Loss: 0.9142, Train Acc: 0.7142, Train F1: 0.6951
Val Loss: 0.3429, Val Acc: 0.8758, Val F1: 0.8352


Loss: 0.0162: 100%|██████████| 40/40 [00:29<00:00,  1.35it/s]


Epoch 2/30
Train Loss: 0.2789, Train Acc: 0.8941, Train F1: 0.8816
Val Loss: 0.3312, Val Acc: 0.8662, Val F1: 0.8275


Loss: 0.0052: 100%|██████████| 40/40 [00:29<00:00,  1.35it/s]


Epoch 3/30
Train Loss: 0.2074, Train Acc: 0.9188, Train F1: 0.9083
Val Loss: 0.2308, Val Acc: 0.9204, Val F1: 0.9089


Loss: 0.5555: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.1710, Train Acc: 0.9347, Train F1: 0.9280
Val Loss: 0.2320, Val Acc: 0.9140, Val F1: 0.9007


Loss: 0.2525: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.1384, Train Acc: 0.9443, Train F1: 0.9392
Val Loss: 0.1470, Val Acc: 0.9459, Val F1: 0.9418


Loss: 0.0816: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0985, Train Acc: 0.9642, Train F1: 0.9626
Val Loss: 0.2139, Val Acc: 0.9268, Val F1: 0.9174


Loss: 0.0811: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0926, Train Acc: 0.9697, Train F1: 0.9683
Val Loss: 0.2329, Val Acc: 0.8981, Val F1: 0.8919


Loss: 0.2371: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0625, Train Acc: 0.9809, Train F1: 0.9812
Val Loss: 0.1600, Val Acc: 0.9459, Val F1: 0.9410


Loss: 0.0015: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0628, Train Acc: 0.9793, Train F1: 0.9774
Val Loss: 0.2908, Val Acc: 0.9045, Val F1: 0.8956


Loss: 0.0145: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 10/30
Train Loss: 0.0458, Train Acc: 0.9785, Train F1: 0.9777
Val Loss: 0.2150, Val Acc: 0.9395, Val F1: 0.9314
Early stopping triggered

Fine-tuning with misclassified data


Loss: 2.2694: 100%|██████████| 1/1 [00:00<00:00,  1.35it/s]


Fine-tuning Epoch 1/5
Train Loss: 2.2694, Train Acc: 0.2632, Train F1: 0.1238


Loss: 2.2706: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s]


Fine-tuning Epoch 2/5
Train Loss: 2.2706, Train Acc: 0.4211, Train F1: 0.3792


Loss: 0.8660: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s]


Fine-tuning Epoch 3/5
Train Loss: 0.8660, Train Acc: 0.7368, Train F1: 0.5988


Loss: 0.3658: 100%|██████████| 1/1 [00:00<00:00,  1.36it/s]


Fine-tuning Epoch 4/5
Train Loss: 0.3658, Train Acc: 0.8947, Train F1: 0.8084


Loss: 0.3274: 100%|██████████| 1/1 [00:00<00:00,  1.36it/s]


Fine-tuning Epoch 5/5
Train Loss: 0.3274, Train Acc: 0.8947, Train F1: 0.8639


Loss: 0.3086: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s]


Fine-tuning Epoch 6/5
Train Loss: 0.3086, Train Acc: 0.8947, Train F1: 0.8639


Loss: 0.1315: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s]


Fine-tuning Epoch 7/5
Train Loss: 0.1315, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.2255: 100%|██████████| 1/1 [00:00<00:00,  1.35it/s]


Fine-tuning Epoch 8/5
Train Loss: 0.2255, Train Acc: 0.9474, Train F1: 0.9737


Loss: 0.1177: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


Fine-tuning Epoch 9/5
Train Loss: 0.1177, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.1283: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


Fine-tuning Epoch 10/5
Train Loss: 0.1283, Train Acc: 0.9474, Train F1: 0.9481


  model.load_state_dict(torch.load("convNext_model_final.pth"))
100%|██████████| 99/99 [00:18<00:00,  5.29it/s]

Prediction completed and saved to pred.csv





## convNext v2 + new augmentaion + K-Fold finetunning + ensemble

In [2]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()

class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []
    misclassified = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_np = preds.argmax(dim=1).detach().cpu().numpy()
            targets_np = targets.detach().cpu().numpy()
            preds_list.extend(preds_np)
            targets_list.extend(targets_np)

            misclassified.extend(np.where(preds_np != targets_np)[0])

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1, misclassified

def print_model_summary(model, input_size):
    summary(model, input_size)

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'convnextv2_large'
    img_size = 224
    LR = 1e-4
    EPOCHS = 30
    BATCH_SIZE = 32
    num_workers = 4
    n_splits = 5

    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.RandomGamma(gamma_limit=(80, 120), p=0.5),
        ], p=0.5),
        A.OneOf([
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
            A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5),
        ], p=0.5),
        A.OneOf([
            A.MotionBlur(blur_limit=3, p=0.5),
            A.MedianBlur(blur_limit=3, p=0.5),
            A.GaussianBlur(blur_limit=3, p=0.5),
        ], p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=5, border_mode=0, p=0.5),
        A.CoarseDropout(max_holes=8, max_height=img_size//20, max_width=img_size//20, min_holes=5, fill_value=255, p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['target']), 1):
        print(f"\nFold {fold}")

        train_df = df.iloc[train_idx].reset_index(drop=True)
        val_df = df.iloc[val_idx].reset_index(drop=True)

        train_dataset = ImageDataset(train_df, os.path.join(data_path, "train/"), transform=train_transform)
        val_dataset = ImageDataset(val_df, os.path.join(data_path, "train/"), transform=val_transform)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

        if fold == 1:
            print(f"\nModel structure of {model_name}:")
            print_model_summary(model, (3, img_size, img_size))

        best_val_f1 = 0
        misclassified_data = []
        early_stopping = EarlyStopping(patience=5, min_delta=0.001)

        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1, misclassified = validate(val_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"convNext_model_fold{fold}.pth")

            misclassified_data.extend(val_df.iloc[misclassified].index)

            early_stopping(val_loss)
            if early_stopping.early_stop:
                print("Early stopping")
                break

        misclassified_df = df.loc[misclassified_data]
        misclassified_dataset = ImageDataset(misclassified_df, os.path.join(data_path, "train/"), transform=train_transform)
        misclassified_loader = DataLoader(misclassified_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)

        print("\nFine-tuning with misclassified data")
        for epoch in range(5):
            train_loss, train_acc, train_f1 = train_one_epoch(misclassified_loader, model, optimizer, loss_fn, device)
            print(f"Epoch {epoch+1}/5")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")

        torch.save(model.state_dict(), f"convNext_model_fold{fold}_final.pth")

    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test/"), transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    ensemble_preds = []
    for fold in range(1, n_splits + 1):
        model.load_state_dict(torch.load(f"convNext_model_fold{fold}_final.pth"))
        model.eval()
        fold_preds = []

        for image, _ in tqdm(test_loader, desc=f"Predicting Fold {fold}"):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            fold_preds.extend(preds.softmax(dim=1).detach().cpu().numpy())

        ensemble_preds.append(fold_preds)

    final_preds = np.mean(ensemble_preds, axis=0).argmax(axis=1)

    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = final_preds
    pred_df.to_csv("pred_ensemble.csv", index=False)
    print("Ensemble prediction completed and saved to pred_ensemble.csv")




Fold 1

Model structure of convnextv2_large:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
       LayerNorm2d-2          [-1, 192, 56, 56]             384
          Identity-3          [-1, 192, 56, 56]               0
            Conv2d-4          [-1, 192, 56, 56]           9,600
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6          [-1, 56, 56, 768]         148,224
              GELU-7          [-1, 56, 56, 768]               0
           Dropout-8          [-1, 56, 56, 768]               0
GlobalResponseNorm-9          [-1, 56, 56, 768]           1,536
           Linear-10          [-1, 56, 56, 192]         147,648
          Dropout-11          [-1, 56, 56, 192]               0
GlobalResponseNormMlp-12          [-1, 56, 56, 192]               0
         Identity-13          [-1, 192, 56, 56]      

Loss: 0.3000: 100%|██████████| 40/40 [00:29<00:00,  1.36it/s]


Epoch 1/30
Train Loss: 0.8191, Train Acc: 0.7524, Train F1: 0.7394
Val Loss: 0.3038, Val Acc: 0.8790, Val F1: 0.8583


Loss: 0.0397: 100%|██████████| 40/40 [00:29<00:00,  1.35it/s]


Epoch 2/30
Train Loss: 0.2373, Train Acc: 0.9053, Train F1: 0.8968
Val Loss: 0.2177, Val Acc: 0.9045, Val F1: 0.8850


Loss: 0.0291: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 3/30
Train Loss: 0.1529, Train Acc: 0.9395, Train F1: 0.9360
Val Loss: 0.1973, Val Acc: 0.9108, Val F1: 0.9005


Loss: 0.0148: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.0879, Train Acc: 0.9674, Train F1: 0.9661
Val Loss: 0.1966, Val Acc: 0.9331, Val F1: 0.9209


Loss: 0.0007: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.0640, Train Acc: 0.9777, Train F1: 0.9784
Val Loss: 0.1732, Val Acc: 0.9490, Val F1: 0.9404


Loss: 0.0009: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0454, Train Acc: 0.9841, Train F1: 0.9843
Val Loss: 0.1889, Val Acc: 0.9522, Val F1: 0.9460


Loss: 0.0022: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0230, Train Acc: 0.9928, Train F1: 0.9934
Val Loss: 0.2811, Val Acc: 0.9363, Val F1: 0.9257


Loss: 0.0028: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0569, Train Acc: 0.9801, Train F1: 0.9795
Val Loss: 0.3390, Val Acc: 0.9045, Val F1: 0.8940


Loss: 0.0018: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0415, Train Acc: 0.9833, Train F1: 0.9827
Val Loss: 0.2686, Val Acc: 0.9299, Val F1: 0.9218


Loss: 0.0004: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 10/30
Train Loss: 0.0239, Train Acc: 0.9904, Train F1: 0.9909
Val Loss: 0.2953, Val Acc: 0.9140, Val F1: 0.9039
Early stopping

Fine-tuning with misclassified data


Loss: 0.0003: 100%|██████████| 8/8 [00:06<00:00,  1.28it/s]


Epoch 1/5
Train Loss: 0.0044, Train Acc: 0.9960, Train F1: 0.9962


Loss: 0.0002: 100%|██████████| 8/8 [00:06<00:00,  1.29it/s]


Epoch 2/5
Train Loss: 0.0010, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0007: 100%|██████████| 8/8 [00:06<00:00,  1.29it/s]


Epoch 3/5
Train Loss: 0.0009, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0003: 100%|██████████| 8/8 [00:06<00:00,  1.29it/s]


Epoch 4/5
Train Loss: 0.0003, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0003: 100%|██████████| 8/8 [00:06<00:00,  1.29it/s]


Epoch 5/5
Train Loss: 0.0004, Train Acc: 1.0000, Train F1: 1.0000

Fold 2


Loss: 0.2489: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 1/30
Train Loss: 0.9736, Train Acc: 0.6990, Train F1: 0.6796
Val Loss: 0.4136, Val Acc: 0.8503, Val F1: 0.8294


Loss: 0.0315: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 2/30
Train Loss: 0.2272, Train Acc: 0.9156, Train F1: 0.9069
Val Loss: 0.1553, Val Acc: 0.9268, Val F1: 0.9180


Loss: 0.0192: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 3/30
Train Loss: 0.1267, Train Acc: 0.9498, Train F1: 0.9486
Val Loss: 0.2288, Val Acc: 0.9140, Val F1: 0.9112


Loss: 0.0719: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.1010, Train Acc: 0.9602, Train F1: 0.9585
Val Loss: 0.1379, Val Acc: 0.9459, Val F1: 0.9460


Loss: 0.0226: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.0870, Train Acc: 0.9650, Train F1: 0.9625
Val Loss: 0.1803, Val Acc: 0.9395, Val F1: 0.9383


Loss: 0.1696: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0623, Train Acc: 0.9745, Train F1: 0.9749
Val Loss: 0.1711, Val Acc: 0.9268, Val F1: 0.9167


Loss: 0.2599: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0503, Train Acc: 0.9785, Train F1: 0.9782
Val Loss: 0.1470, Val Acc: 0.9363, Val F1: 0.9358


Loss: 0.0261: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0366, Train Acc: 0.9881, Train F1: 0.9881
Val Loss: 0.1707, Val Acc: 0.9459, Val F1: 0.9425


Loss: 0.0006: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0233, Train Acc: 0.9896, Train F1: 0.9893
Val Loss: 0.2952, Val Acc: 0.9140, Val F1: 0.9135
Early stopping

Fine-tuning with misclassified data


Loss: 0.0355: 100%|██████████| 7/7 [00:05<00:00,  1.26it/s]


Epoch 1/5
Train Loss: 0.0431, Train Acc: 0.9773, Train F1: 0.8854


Loss: 0.0015: 100%|██████████| 7/7 [00:05<00:00,  1.26it/s]


Epoch 2/5
Train Loss: 0.0030, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0031: 100%|██████████| 7/7 [00:05<00:00,  1.26it/s]


Epoch 3/5
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0005: 100%|██████████| 7/7 [00:05<00:00,  1.25it/s]


Epoch 4/5
Train Loss: 0.0009, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0004: 100%|██████████| 7/7 [00:05<00:00,  1.26it/s]


Epoch 5/5
Train Loss: 0.0004, Train Acc: 1.0000, Train F1: 1.0000

Fold 3


Loss: 0.3387: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 1/30
Train Loss: 0.8628, Train Acc: 0.7357, Train F1: 0.7151
Val Loss: 0.2835, Val Acc: 0.9013, Val F1: 0.8785


Loss: 0.2889: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 2/30
Train Loss: 0.2508, Train Acc: 0.8981, Train F1: 0.8829
Val Loss: 0.2340, Val Acc: 0.9045, Val F1: 0.8899


Loss: 0.0812: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 3/30
Train Loss: 0.1691, Train Acc: 0.9363, Train F1: 0.9296
Val Loss: 0.1590, Val Acc: 0.9459, Val F1: 0.9470


Loss: 0.0016: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.1134, Train Acc: 0.9498, Train F1: 0.9447
Val Loss: 0.2332, Val Acc: 0.9076, Val F1: 0.9008


Loss: 0.0057: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.0588, Train Acc: 0.9817, Train F1: 0.9810
Val Loss: 0.1414, Val Acc: 0.9459, Val F1: 0.9432


Loss: 0.0006: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0285, Train Acc: 0.9873, Train F1: 0.9871
Val Loss: 0.1545, Val Acc: 0.9522, Val F1: 0.9537


Loss: 0.0006: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0322, Train Acc: 0.9881, Train F1: 0.9874
Val Loss: 0.2753, Val Acc: 0.9236, Val F1: 0.9242


Loss: 0.0030: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0268, Train Acc: 0.9920, Train F1: 0.9919
Val Loss: 0.1844, Val Acc: 0.9490, Val F1: 0.9537


Loss: 0.0182: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0242, Train Acc: 0.9928, Train F1: 0.9930
Val Loss: 0.2028, Val Acc: 0.9490, Val F1: 0.9383


Loss: 0.0004: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 10/30
Train Loss: 0.0158, Train Acc: 0.9920, Train F1: 0.9915
Val Loss: 0.2649, Val Acc: 0.9427, Val F1: 0.9465
Early stopping

Fine-tuning with misclassified data


Loss: 0.0233: 100%|██████████| 7/7 [00:05<00:00,  1.29it/s]


Epoch 1/5
Train Loss: 0.0192, Train Acc: 0.9953, Train F1: 0.9937


Loss: 0.0121: 100%|██████████| 7/7 [00:05<00:00,  1.29it/s]


Epoch 2/5
Train Loss: 0.0242, Train Acc: 0.9953, Train F1: 0.9964


Loss: 0.0006: 100%|██████████| 7/7 [00:05<00:00,  1.29it/s]


Epoch 3/5
Train Loss: 0.0079, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0005: 100%|██████████| 7/7 [00:05<00:00,  1.28it/s]


Epoch 4/5
Train Loss: 0.0006, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0017: 100%|██████████| 7/7 [00:05<00:00,  1.29it/s]


Epoch 5/5
Train Loss: 0.0009, Train Acc: 1.0000, Train F1: 1.0000

Fold 4


Loss: 0.5893: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 1/30
Train Loss: 1.1355, Train Acc: 0.6425, Train F1: 0.6094
Val Loss: 0.3313, Val Acc: 0.8822, Val F1: 0.8322


Loss: 0.2319: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 2/30
Train Loss: 0.3021, Train Acc: 0.8822, Train F1: 0.8674
Val Loss: 0.2439, Val Acc: 0.9108, Val F1: 0.8810


Loss: 0.0319: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 3/30
Train Loss: 0.1494, Train Acc: 0.9403, Train F1: 0.9340
Val Loss: 0.2276, Val Acc: 0.9236, Val F1: 0.9050


Loss: 0.0891: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.1041, Train Acc: 0.9554, Train F1: 0.9537
Val Loss: 0.1598, Val Acc: 0.9331, Val F1: 0.9349


Loss: 0.1453: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.0634, Train Acc: 0.9737, Train F1: 0.9744
Val Loss: 0.1860, Val Acc: 0.9427, Val F1: 0.9272


Loss: 0.0079: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0479, Train Acc: 0.9817, Train F1: 0.9810
Val Loss: 0.2045, Val Acc: 0.9299, Val F1: 0.9256


Loss: 0.0026: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0210, Train Acc: 0.9920, Train F1: 0.9916
Val Loss: 0.5239, Val Acc: 0.9108, Val F1: 0.9026


Loss: 0.0023: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0996, Train Acc: 0.9674, Train F1: 0.9665
Val Loss: 0.2299, Val Acc: 0.9204, Val F1: 0.9104


Loss: 0.0442: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0272, Train Acc: 0.9912, Train F1: 0.9918
Val Loss: 0.2198, Val Acc: 0.9363, Val F1: 0.9295
Early stopping

Fine-tuning with misclassified data


Loss: 0.0025: 100%|██████████| 7/7 [00:05<00:00,  1.23it/s]


Epoch 1/5
Train Loss: 0.0456, Train Acc: 0.9910, Train F1: 0.9940


Loss: 0.0018: 100%|██████████| 7/7 [00:05<00:00,  1.23it/s]


Epoch 2/5
Train Loss: 0.0036, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0010: 100%|██████████| 7/7 [00:05<00:00,  1.23it/s]


Epoch 3/5
Train Loss: 0.0012, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0003: 100%|██████████| 7/7 [00:05<00:00,  1.23it/s]


Epoch 4/5
Train Loss: 0.0027, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0004: 100%|██████████| 7/7 [00:05<00:00,  1.24it/s]


Epoch 5/5
Train Loss: 0.0003, Train Acc: 1.0000, Train F1: 1.0000

Fold 5


Loss: 0.4951: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 1/30
Train Loss: 0.8277, Train Acc: 0.7532, Train F1: 0.7310
Val Loss: 0.3096, Val Acc: 0.8949, Val F1: 0.8761


Loss: 0.0037: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 2/30
Train Loss: 0.2414, Train Acc: 0.9053, Train F1: 0.8950
Val Loss: 0.3961, Val Acc: 0.8854, Val F1: 0.8565


Loss: 0.0087: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 3/30
Train Loss: 0.1802, Train Acc: 0.9275, Train F1: 0.9228
Val Loss: 0.2693, Val Acc: 0.9013, Val F1: 0.8795


Loss: 0.0332: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 4/30
Train Loss: 0.1195, Train Acc: 0.9459, Train F1: 0.9434
Val Loss: 0.2757, Val Acc: 0.9013, Val F1: 0.8908


Loss: 0.0636: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 5/30
Train Loss: 0.0597, Train Acc: 0.9761, Train F1: 0.9766
Val Loss: 0.1685, Val Acc: 0.9363, Val F1: 0.9290


Loss: 0.0007: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 6/30
Train Loss: 0.0498, Train Acc: 0.9777, Train F1: 0.9785
Val Loss: 0.2059, Val Acc: 0.9299, Val F1: 0.9291


Loss: 0.0080: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 7/30
Train Loss: 0.0233, Train Acc: 0.9928, Train F1: 0.9927
Val Loss: 0.2588, Val Acc: 0.9331, Val F1: 0.9229


Loss: 0.0006: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 8/30
Train Loss: 0.0178, Train Acc: 0.9944, Train F1: 0.9949
Val Loss: 0.3170, Val Acc: 0.9076, Val F1: 0.8968


Loss: 0.0477: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 9/30
Train Loss: 0.0291, Train Acc: 0.9920, Train F1: 0.9920
Val Loss: 0.1705, Val Acc: 0.9459, Val F1: 0.9411


Loss: 0.0031: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Epoch 10/30
Train Loss: 0.0265, Train Acc: 0.9881, Train F1: 0.9880
Val Loss: 0.2058, Val Acc: 0.9331, Val F1: 0.9272
Early stopping

Fine-tuning with misclassified data


Loss: 0.0030: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Epoch 1/5
Train Loss: 0.0170, Train Acc: 0.9962, Train F1: 0.9966


Loss: 0.0005: 100%|██████████| 9/9 [00:06<00:00,  1.38it/s]


Epoch 2/5
Train Loss: 0.0013, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0006: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Epoch 3/5
Train Loss: 0.0008, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0006: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Epoch 4/5
Train Loss: 0.0005, Train Acc: 1.0000, Train F1: 1.0000


Loss: 0.0003: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Epoch 5/5
Train Loss: 0.0006, Train Acc: 1.0000, Train F1: 1.0000


  model.load_state_dict(torch.load(f"convNext_model_fold{fold}_final.pth"))
Predicting Fold 1: 100%|██████████| 99/99 [00:18<00:00,  5.29it/s]
Predicting Fold 2: 100%|██████████| 99/99 [00:18<00:00,  5.28it/s]
Predicting Fold 3: 100%|██████████| 99/99 [00:18<00:00,  5.28it/s]
Predicting Fold 4: 100%|██████████| 99/99 [00:18<00:00,  5.28it/s]
Predicting Fold 5: 100%|██████████| 99/99 [00:18<00:00,  5.28it/s]

Ensemble prediction completed and saved to pred_ensemble.csv





## Hyper Parameters Tunning With CNN Based Model

### ConvNeXt V2 Large 모델 + Optuna

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import optuna
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# Optuna를 이용한 하이퍼파라미터 최적화 함수
def objective(trial):
    # 하이퍼파라미터 탐색 공간 정의
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)

    # 데이터 로더 생성
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델, 손실 함수, 옵티마이저 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 학습 및 검증
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1

    return best_val_f1

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'convnextv2_large'
img_size = 224  # ConvNeXt V2에 적합한 이미지 크기
EPOCHS = 30
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

# Optuna를 이용한 하이퍼파라미터 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
print("Best hyperparameters:", best_params)

# 최적의 하이퍼파라미터로 최종 모델 학습
best_lr = best_params['lr']
best_batch_size = best_params['batch_size']
best_weight_decay = best_params['weight_decay']

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

### ConvNext V2 Large + WanDB Sweep
- pip install wandb
- wandb login

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import wandb
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
#class ImageDataset(Dataset):


# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
#def validate(loader, model, loss_fn, device):

# 모델 구조 출력 함수
#def print_model_summary(model, input_size):


# wandb sweep을 위한 학습 함수
def train():
    # wandb 초기화
    run = wandb.init(entity="cho") #사용자에 따라 자신의 도메인 네임 설정!!!
    config = wandb.config

    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = config.model_name
    img_size = config.img_size
    LR = config.learning_rate
    EPOCHS = config.epochs
    BATCH_SIZE = config.batch_size
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=config.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        # wandb에 로그 기록
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1": train_f1,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1": val_f1,
            "learning_rate": optimizer.param_groups[0]['lr']
        })

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")
            wandb.run.summary["best_val_f1"] = best_val_f1

    wandb.finish()

# wandb sweep 설정
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'val_f1',
        'goal': 'maximize'
    },
    'parameters': {
        'model_name': {
            'values': ['convnextv2_large', 'efficientnet_b4']
        },
        'learning_rate': {
            'distribution': 'uniform',
            'min': 1e-5,
            'max': 1e-3
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'img_size': {
            'values': [224, 256, 288]
        },
        'weight_decay': {
            'values': [1e-5, 1e-4, 1e-3]
        },
        'epochs': {
            'value': 30
        }
    }
}

# wandb sweep 실행 및 최고 성능 모델 찾기
sweep_id = wandb.sweep(sweep_config, project="cvmodel",entity="cho")
wandb.agent(sweep_id, train, count=30)

In [None]:

# 최고 성능 모델의 설정 가져오기
api = wandb.Api()
sweep = api.sweep(f"dl-12/cvmodel/{sweep_id}")
best_run = sweep.best_run()
best_config = best_run.config

  
# 최고 성능 모델의 설정 사용
model_name = best_config['model_name']
img_size = best_config['img_size']
BATCH_SIZE = best_config['batch_size']
num_workers = 4

# 테스트 데이터 변환
test_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 테스트 데이터셋 및 데이터로더 생성
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 최고 성능 모델 생성
model = timm.create_model(model_name, pretrained=False, num_classes=17).to(device)
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

# 테스트 데이터 추론
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

# wandb에 결과 업로드
wandb.init(project="cvmodel", name="best_model_prediction", entity="cho")
wandb.config.update(best_config)
wandb.save("pred.csv")
wandb.finish()

# Transformer 기반 모델

## Swin Transformers

In [13]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [14]:
import os
import gc
import timm
import albumentations as A
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.best_f1 = -np.Inf

    def __call__(self, val_loss, f1_score, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, f1_score, model):
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        self.best_f1 = f1_score

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'swin_large_patch4_window7_224'  # Swin Transformer Large 모델
    img_size = 224  # Swin Transformer에 적합한 이미지 크기
    LR = 2e-5  # 학습률
    EPOCHS = 100  # 에포크 수
    BATCH_SIZE = 32  # 배치 크기
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "augmented_train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "augmented_train/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "augmented_train/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    early_stopping = EarlyStopping(patience=7, verbose=True, delta=0.001, path='aug_swin_model.pth')

    # 모델 구조 출력
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "aug_swin_model.pth")
        
        early_stopping(val_loss, val_f1, model)
        if early_stopping.early_stop:
            print(f"Early stopping. Best validation loss: {early_stopping.val_loss_min:.6f}, "
                  f"Best F1 score: {early_stopping.best_f1:.6f}")
            break

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("aug_swin_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred_swin.csv", index=False)
    print("Prediction completed and saved to pred_swin.csv")


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
         LayerNorm-2          [-1, 56, 56, 192]             384
        PatchEmbed-3          [-1, 56, 56, 192]               0
          Identity-4          [-1, 56, 56, 192]               0
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6              [-1, 49, 576]         111,168
           Softmax-7            [-1, 6, 49, 49]               0
           Dropout-8            [-1, 6, 49, 49]               0
            Linear-9              [-1, 49, 192]          37,056
          Dropout-10              [-1, 49, 192]               0
  WindowAttention-11              [-1, 49, 192]               0
         Identity-12          [-1, 56, 56, 192]               0
        LayerNorm-13            [-1, 3136, 192]             384
           Linear-14            [-1, 31

Loss: 0.1804: 100%|██████████| 259/259 [02:14<00:00,  1.93it/s]


Epoch 1/100
Train Loss: 0.7277, Train Acc: 0.7804, Train F1: 0.7628
Val Loss: 0.2358, Val Acc: 0.9108, Val F1: 0.8978
Validation loss decreased (inf --> 0.235835).  Saving model ...


Loss: 0.0452: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 2/100
Train Loss: 0.2097, Train Acc: 0.9219, Train F1: 0.9141
Val Loss: 0.1192, Val Acc: 0.9531, Val F1: 0.9451
Validation loss decreased (0.235835 --> 0.119184).  Saving model ...


Loss: 0.1793: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 3/100
Train Loss: 0.1320, Train Acc: 0.9513, Train F1: 0.9480
Val Loss: 0.0792, Val Acc: 0.9695, Val F1: 0.9677
Validation loss decreased (0.119184 --> 0.079231).  Saving model ...


Loss: 0.0017: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 4/100
Train Loss: 0.0794, Train Acc: 0.9729, Train F1: 0.9719
Val Loss: 0.0617, Val Acc: 0.9754, Val F1: 0.9744
Validation loss decreased (0.079231 --> 0.061746).  Saving model ...


Loss: 0.0002: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 5/100
Train Loss: 0.0581, Train Acc: 0.9802, Train F1: 0.9799
Val Loss: 0.0360, Val Acc: 0.9870, Val F1: 0.9861
Validation loss decreased (0.061746 --> 0.036009).  Saving model ...


Loss: 0.0316: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 6/100
Train Loss: 0.0386, Train Acc: 0.9880, Train F1: 0.9877
Val Loss: 0.0275, Val Acc: 0.9912, Val F1: 0.9910
Validation loss decreased (0.036009 --> 0.027513).  Saving model ...


Loss: 0.0039: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 7/100
Train Loss: 0.0311, Train Acc: 0.9895, Train F1: 0.9889
Val Loss: 0.0357, Val Acc: 0.9893, Val F1: 0.9882
EarlyStopping counter: 1 out of 7


Loss: 0.0101: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 8/100
Train Loss: 0.0263, Train Acc: 0.9920, Train F1: 0.9916
Val Loss: 0.0382, Val Acc: 0.9879, Val F1: 0.9866
EarlyStopping counter: 2 out of 7


Loss: 0.0002: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 9/100
Train Loss: 0.0237, Train Acc: 0.9921, Train F1: 0.9922
Val Loss: 0.0357, Val Acc: 0.9896, Val F1: 0.9891
EarlyStopping counter: 3 out of 7


Loss: 0.0005: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 10/100
Train Loss: 0.0202, Train Acc: 0.9933, Train F1: 0.9932
Val Loss: 0.0211, Val Acc: 0.9912, Val F1: 0.9907
Validation loss decreased (0.027513 --> 0.021077).  Saving model ...


Loss: 0.0009: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 11/100
Train Loss: 0.0202, Train Acc: 0.9947, Train F1: 0.9944
Val Loss: 0.0348, Val Acc: 0.9887, Val F1: 0.9868
EarlyStopping counter: 1 out of 7


Loss: 0.0015: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 12/100
Train Loss: 0.0139, Train Acc: 0.9953, Train F1: 0.9951
Val Loss: 0.0268, Val Acc: 0.9912, Val F1: 0.9905
EarlyStopping counter: 2 out of 7


Loss: 0.0003: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 13/100
Train Loss: 0.0144, Train Acc: 0.9954, Train F1: 0.9954
Val Loss: 0.0311, Val Acc: 0.9898, Val F1: 0.9892
EarlyStopping counter: 3 out of 7


Loss: 0.0008: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 14/100
Train Loss: 0.0126, Train Acc: 0.9958, Train F1: 0.9954
Val Loss: 0.0224, Val Acc: 0.9932, Val F1: 0.9928
EarlyStopping counter: 4 out of 7


Loss: 0.0001: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 15/100
Train Loss: 0.0144, Train Acc: 0.9956, Train F1: 0.9951
Val Loss: 0.0197, Val Acc: 0.9932, Val F1: 0.9934
Validation loss decreased (0.021077 --> 0.019676).  Saving model ...


Loss: 0.0002: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 16/100
Train Loss: 0.0072, Train Acc: 0.9979, Train F1: 0.9977
Val Loss: 0.0482, Val Acc: 0.9884, Val F1: 0.9877
EarlyStopping counter: 1 out of 7


Loss: 0.0007: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 17/100
Train Loss: 0.0161, Train Acc: 0.9952, Train F1: 0.9950
Val Loss: 0.0243, Val Acc: 0.9921, Val F1: 0.9919
EarlyStopping counter: 2 out of 7


Loss: 0.0000: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 18/100
Train Loss: 0.0130, Train Acc: 0.9959, Train F1: 0.9959
Val Loss: 0.0258, Val Acc: 0.9921, Val F1: 0.9918
EarlyStopping counter: 3 out of 7


Loss: 0.0068: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 19/100
Train Loss: 0.0068, Train Acc: 0.9977, Train F1: 0.9977
Val Loss: 0.0216, Val Acc: 0.9932, Val F1: 0.9931
EarlyStopping counter: 4 out of 7


Loss: 0.0000: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 20/100
Train Loss: 0.0089, Train Acc: 0.9977, Train F1: 0.9975
Val Loss: 0.0194, Val Acc: 0.9949, Val F1: 0.9948
EarlyStopping counter: 5 out of 7


Loss: 0.0156: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 21/100
Train Loss: 0.0095, Train Acc: 0.9966, Train F1: 0.9964
Val Loss: 0.0202, Val Acc: 0.9944, Val F1: 0.9940
EarlyStopping counter: 6 out of 7


Loss: 0.0009: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 22/100
Train Loss: 0.0080, Train Acc: 0.9976, Train F1: 0.9976
Val Loss: 0.0179, Val Acc: 0.9955, Val F1: 0.9953
Validation loss decreased (0.019676 --> 0.017897).  Saving model ...


Loss: 0.0004: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 23/100
Train Loss: 0.0124, Train Acc: 0.9959, Train F1: 0.9957
Val Loss: 0.0238, Val Acc: 0.9932, Val F1: 0.9933
EarlyStopping counter: 1 out of 7


Loss: 0.0041: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 24/100
Train Loss: 0.0102, Train Acc: 0.9969, Train F1: 0.9967
Val Loss: 0.0161, Val Acc: 0.9960, Val F1: 0.9961
Validation loss decreased (0.017897 --> 0.016115).  Saving model ...


Loss: 0.0001: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 25/100
Train Loss: 0.0113, Train Acc: 0.9966, Train F1: 0.9967
Val Loss: 0.0123, Val Acc: 0.9966, Val F1: 0.9968
Validation loss decreased (0.016115 --> 0.012313).  Saving model ...


Loss: 0.0039: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 26/100
Train Loss: 0.0028, Train Acc: 0.9994, Train F1: 0.9993
Val Loss: 0.0116, Val Acc: 0.9975, Val F1: 0.9975
EarlyStopping counter: 1 out of 7


Loss: 0.0000: 100%|██████████| 259/259 [02:15<00:00,  1.91it/s]


Epoch 27/100
Train Loss: 0.0041, Train Acc: 0.9990, Train F1: 0.9990
Val Loss: 0.0130, Val Acc: 0.9960, Val F1: 0.9959
EarlyStopping counter: 2 out of 7


Loss: 0.0002: 100%|██████████| 259/259 [02:16<00:00,  1.90it/s]


Epoch 28/100
Train Loss: 0.0049, Train Acc: 0.9983, Train F1: 0.9984
Val Loss: 0.0162, Val Acc: 0.9958, Val F1: 0.9956
EarlyStopping counter: 3 out of 7


Loss: 0.0002: 100%|██████████| 259/259 [02:15<00:00,  1.92it/s]


Epoch 29/100
Train Loss: 0.0067, Train Acc: 0.9979, Train F1: 0.9978
Val Loss: 0.0123, Val Acc: 0.9955, Val F1: 0.9954
EarlyStopping counter: 4 out of 7


Loss: 0.0000: 100%|██████████| 259/259 [02:17<00:00,  1.88it/s]


Epoch 30/100
Train Loss: 0.0047, Train Acc: 0.9984, Train F1: 0.9983
Val Loss: 0.0124, Val Acc: 0.9946, Val F1: 0.9945
EarlyStopping counter: 5 out of 7


Loss: 0.0001: 100%|██████████| 259/259 [02:19<00:00,  1.86it/s]


Epoch 31/100
Train Loss: 0.0018, Train Acc: 0.9994, Train F1: 0.9993
Val Loss: 0.0122, Val Acc: 0.9963, Val F1: 0.9962
EarlyStopping counter: 6 out of 7


Loss: 0.0000: 100%|██████████| 259/259 [02:23<00:00,  1.80it/s]


Epoch 32/100
Train Loss: 0.0028, Train Acc: 0.9993, Train F1: 0.9992
Val Loss: 0.0116, Val Acc: 0.9977, Val F1: 0.9978
EarlyStopping counter: 7 out of 7
Early stopping. Best validation loss: 0.012313, Best F1 score: 0.996774


100%|██████████| 99/99 [00:20<00:00,  4.78it/s]

Prediction completed and saved to pred_swin.csv





### test data Denoising Autoencoder
- 테스트 데이터를 autoencoder 방식의 이미지를 가지고 평가

In [None]:
import torch
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader

# 데이터셋 클래스 정의(독립실행으로 가정하고 중복 정의)
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 테스트 데이터 경로 설정
data_path = '../data/'

# 테스트 데이터셋 및 데이터 로더 생성
val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "denoise_test/"), transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 모델 로드 및 평가 모드 설정
model.load_state_dict(torch.load("aug_swin_model.pth"))
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred_swin.csv", index=False)
print("Prediction completed and saved to pred_swin.csv")

## Stratified k-fold cross validation 

In [30]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [31]:
import os
import time
import timm

import albumentations as A
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, min_f1_score=0.9):
        self.patience = patience
        self.min_delta = min_delta
        self.min_f1_score = min_f1_score
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, val_f1):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience and val_f1 >= self.min_f1_score:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            
# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'swin_large_patch4_window7_224'  # Swin Transformer Large 모델
    img_size = 224  # Swin Transformer에 적합한 이미지 크기
    LR = 2e-5  # 1e-4 에서 학습률 조정 : 기존이 더 좋음
    EPOCHS = 100 # 30에서 조정 : 기존이 더 좋음
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4
    n_splits = 5  # Number of K-fold splits

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))

    # Stratified K-Fold 설정
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # 전체 결과 저장
    final_preds_list = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['target'])):
        print(f"Fold {fold+1}/{n_splits}")
        
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]

        train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
        val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
        test_dataset = ImageDataset(pd.read_csv(os.path.join(data_path, "sample_submission.csv")), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        # 모델 설정
        model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
        # Early stopping 설정
        early_stopping = EarlyStopping(patience=5, min_delta=0.001, min_f1_score=0.93)

        # 모델 구조 출력
        # print(f"\nModel structure of {model_name}:")
        # print_model_summary(model, (3, img_size, img_size))

        # 학습 루프
        best_val_f1 = 0
        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"swin_t_model_fold_{fold}.pth")
            
            # Early stopping 체크
            early_stopping(val_loss, val_f1)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1} with F1 score: {val_f1:.4f}")
                break

        # 테스트 데이터 추론
        model.load_state_dict(torch.load(f"swin_t_model_fold_{fold}.pth"))
        model.eval()
        fold_preds_list = []

        for image, _ in tqdm(test_loader):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            fold_preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

        # 결과 저장
        final_preds_list.append(fold_preds_list)

    # 결과 앙상블
    final_preds = np.mean(np.array(final_preds_list), axis=0).astype(int)

    pred_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
    pred_df['target'] = final_preds
    pred_df.to_csv("pred_swin_kfold.csv", index=False)
    print("Prediction completed and saved to pred_swin.csv")


Fold 1/5


Loss: 1.1379: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 1/100
Train Loss: 1.9977, Train Acc: 0.4865, Train F1: 0.4503
Val Loss: 0.8827, Val Acc: 0.7739, Val F1: 0.6936


Loss: 0.4577: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 2/100
Train Loss: 0.6732, Train Acc: 0.8049, Train F1: 0.7707
Val Loss: 0.4043, Val Acc: 0.8631, Val F1: 0.8179


Loss: 0.3563: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Epoch 3/100
Train Loss: 0.3740, Train Acc: 0.8814, Train F1: 0.8610
Val Loss: 0.2678, Val Acc: 0.8822, Val F1: 0.8490


Loss: 0.3622: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 4/100
Train Loss: 0.2812, Train Acc: 0.8989, Train F1: 0.8843
Val Loss: 0.2281, Val Acc: 0.8949, Val F1: 0.8587


Loss: 0.1351: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 5/100
Train Loss: 0.2306, Train Acc: 0.9092, Train F1: 0.8988
Val Loss: 0.3207, Val Acc: 0.8885, Val F1: 0.8724


Loss: 0.4066: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 6/100
Train Loss: 0.1993, Train Acc: 0.9307, Train F1: 0.9236
Val Loss: 0.2139, Val Acc: 0.9108, Val F1: 0.8802


Loss: 0.0564: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 7/100
Train Loss: 0.1616, Train Acc: 0.9411, Train F1: 0.9367
Val Loss: 0.2266, Val Acc: 0.9236, Val F1: 0.9109


Loss: 0.0156: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 8/100
Train Loss: 0.1497, Train Acc: 0.9427, Train F1: 0.9381
Val Loss: 0.1785, Val Acc: 0.9076, Val F1: 0.8937


Loss: 0.1676: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 9/100
Train Loss: 0.1273, Train Acc: 0.9570, Train F1: 0.9535
Val Loss: 0.2308, Val Acc: 0.9236, Val F1: 0.9100


Loss: 0.2569: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 10/100
Train Loss: 0.1152, Train Acc: 0.9562, Train F1: 0.9555
Val Loss: 0.2009, Val Acc: 0.9299, Val F1: 0.9218


Loss: 0.0047: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 11/100
Train Loss: 0.0833, Train Acc: 0.9737, Train F1: 0.9735
Val Loss: 0.2476, Val Acc: 0.9236, Val F1: 0.9079


Loss: 0.0017: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 12/100
Train Loss: 0.1018, Train Acc: 0.9586, Train F1: 0.9557
Val Loss: 0.1968, Val Acc: 0.9331, Val F1: 0.9255


Loss: 0.2389: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 13/100
Train Loss: 0.1085, Train Acc: 0.9578, Train F1: 0.9558
Val Loss: 0.2119, Val Acc: 0.9076, Val F1: 0.8933


Loss: 0.0059: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 14/100
Train Loss: 0.0823, Train Acc: 0.9705, Train F1: 0.9685
Val Loss: 0.1973, Val Acc: 0.9268, Val F1: 0.9177


Loss: 0.0007: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 15/100
Train Loss: 0.0565, Train Acc: 0.9825, Train F1: 0.9822
Val Loss: 0.1846, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0499: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 16/100
Train Loss: 0.0534, Train Acc: 0.9817, Train F1: 0.9814
Val Loss: 0.2300, Val Acc: 0.9268, Val F1: 0.9204


Loss: 0.0983: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 17/100
Train Loss: 0.0583, Train Acc: 0.9833, Train F1: 0.9821
Val Loss: 0.1958, Val Acc: 0.9363, Val F1: 0.9308
Early stopping triggered at epoch 17 with F1 score: 0.9308


100%|██████████| 99/99 [00:18<00:00,  5.43it/s]


Fold 2/5


Loss: 1.7443: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 1/100
Train Loss: 2.0241, Train Acc: 0.4682, Train F1: 0.4419
Val Loss: 0.9525, Val Acc: 0.7516, Val F1: 0.7064


Loss: 0.5209: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 2/100
Train Loss: 0.7057, Train Acc: 0.7986, Train F1: 0.7692
Val Loss: 0.4094, Val Acc: 0.8408, Val F1: 0.8119


Loss: 0.3512: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 3/100
Train Loss: 0.3797, Train Acc: 0.8838, Train F1: 0.8663
Val Loss: 0.3192, Val Acc: 0.8631, Val F1: 0.8498


Loss: 0.1091: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 4/100
Train Loss: 0.2739, Train Acc: 0.9037, Train F1: 0.8893
Val Loss: 0.3723, Val Acc: 0.8694, Val F1: 0.8489


Loss: 0.6342: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 5/100
Train Loss: 0.2704, Train Acc: 0.9005, Train F1: 0.8886
Val Loss: 0.2797, Val Acc: 0.8854, Val F1: 0.8749


Loss: 0.1422: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 6/100
Train Loss: 0.2116, Train Acc: 0.9172, Train F1: 0.9051
Val Loss: 0.2317, Val Acc: 0.8949, Val F1: 0.8831


Loss: 0.0532: 100%|██████████| 40/40 [00:20<00:00,  1.91it/s]


Epoch 7/100
Train Loss: 0.1665, Train Acc: 0.9482, Train F1: 0.9401
Val Loss: 0.2309, Val Acc: 0.8949, Val F1: 0.8818


Loss: 0.0940: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 8/100
Train Loss: 0.1499, Train Acc: 0.9530, Train F1: 0.9486
Val Loss: 0.2127, Val Acc: 0.9108, Val F1: 0.9076


Loss: 0.5626: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 9/100
Train Loss: 0.1320, Train Acc: 0.9562, Train F1: 0.9528
Val Loss: 0.2164, Val Acc: 0.8949, Val F1: 0.8864


Loss: 0.0135: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 10/100
Train Loss: 0.1205, Train Acc: 0.9546, Train F1: 0.9499
Val Loss: 0.1943, Val Acc: 0.9045, Val F1: 0.8985


Loss: 0.1353: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 11/100
Train Loss: 0.0913, Train Acc: 0.9745, Train F1: 0.9730
Val Loss: 0.2765, Val Acc: 0.9013, Val F1: 0.8873


Loss: 0.0312:  78%|███████▊  | 31/40 [00:16<00:04,  1.91it/s]

## Swin-T clustering & classification
- 이미지를 유사한 이미지로 5개로 그룹핑 하고 분석하는 모델
- early stoping 코드에서 강제적으로 f1 score boundary 를 줄수 있게 변경(분류하면 과소적합이 되는 구간이 있음.)

In [1]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import gc

gc.collect()
torch.cuda.empty_cache()

# Swin-B 모델 로드
def load_swin_b_model(num_classes=None):
    model = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=num_classes)
    return model

# 특성 추출 함수
def extract_features(img_path, model):
    transform = A.Compose([
        A.Resize(224, 224),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])
    img = Image.open(img_path).convert('RGB')
    img = np.array(img)
    img = transform(image=img)['image']
    img = img.unsqueeze(0)
    
    with torch.no_grad():
        features = model.forward_features(img)
    return features.squeeze().numpy()

# 이미지 클러스터링 함수
def cluster_images(data_path, n_clusters=5):
    feature_extractor = load_swin_b_model(num_classes=None)
    feature_extractor.eval()
    
    image_files = [f for f in os.listdir(data_path) if f.endswith('.jpg') or f.endswith('.png')]
    features = []
    for img_file in tqdm(image_files, desc="Extracting features"):
        img_path = os.path.join(data_path, img_file)
        feature = extract_features(img_path, feature_extractor)
        features.append(feature.reshape(-1))  # Flatten the feature array
    
    features = np.array(features)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(features_scaled)
    
    return dict(zip(image_files, clusters))

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None, cluster_dict=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform
        self.cluster_dict = cluster_dict

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        
        cluster = self.cluster_dict.get(name, -1) if self.cluster_dict else -1
        return img, target, cluster

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.best_f1 = -np.Inf

    def __call__(self, val_loss, f1_score, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, f1_score, model):
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). '
                            f'F1 score: {f1_score:.6f}. Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        self.best_f1 = f1_score

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets, _ in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets, _ in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    img_size = 224
    LR = 2e-5
    EPOCHS = 100
    BATCH_SIZE = 32
    num_workers = 4
    n_clusters = 2

    # 클러스터링 수행
    print("Clustering images...")
    cluster_dict = cluster_images(os.path.join(data_path, "train_preprocessed/"), n_clusters)

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform, cluster_dict=cluster_dict)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform, cluster_dict=cluster_dict)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 각 클러스터에 대한 모델 학습
    for cluster in range(n_clusters):
        print(f"\nTraining model for cluster {cluster}")
        
        # 클러스터에 해당하는 데이터만 선택
        train_cluster = [data for data in train_dataset if data[2] == cluster]
        val_cluster = [data for data in val_dataset if data[2] == cluster]
        
        if len(train_cluster) == 0 or len(val_cluster) == 0:
            print(f"Skipping cluster {cluster} due to insufficient data")
            continue
        
        train_cluster_loader = DataLoader(train_cluster, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_cluster_loader = DataLoader(val_cluster, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        # Swin-B 모델 설정
        model = load_swin_b_model(num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

        # Early stopping 설정
        early_stopping = EarlyStopping(patience=7, verbose=True, delta=0.001, path='cluster_model.pth')


        # 모델 구조 출력
        # print(f"\nModel structure of Swin-B for cluster {cluster}:")
        # print_model_summary(model, (3, img_size, img_size))

        # 학습 루프
        best_val_f1 = 0
        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_cluster_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1 = validate(val_cluster_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"swin_b_model_cluster_{cluster}.pth")
                print(f"New best F1 score: {best_val_f1:.4f}")

            #조기 종료 체크 (validation 에러 기준)
            early_stopping(val_loss, val_f1, model)
            if early_stopping.early_stop:
                print(f"Early stopping. Best validation loss: {early_stopping.val_loss_min:.6f}, "
                    f"Best F1 score: {early_stopping.best_f1:.6f}")
                break
            

    # 테스트 데이터 추론
    print("\nPerforming inference on test data")
    test_preds = []
    
    for cluster in range(n_clusters):
        model = load_swin_b_model(num_classes=17).to(device)
        model.load_state_dict(torch.load(f"swin_b_model_cluster_{cluster}.pth"))
        model.eval()
        
        cluster_preds = []
        for image, _, _ in tqdm(test_loader, desc=f"Predicting cluster {cluster}"):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            cluster_preds.extend(preds.detach().cpu().numpy())
        
        test_preds.append(cluster_preds)
    
    # 모든 클러스터의 예측을 결합
    final_preds = np.mean(test_preds, axis=0)
    final_preds = np.argmax(final_preds, axis=1)

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = final_preds
    pred_df.to_csv("swin_pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


Clustering images...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Extracting features: 100%|██████████| 1570/1570 [08:21<00:00,  3.13it/s]
  super()._check_params_vs_input(X, default_n_init=10)



Training model for cluster 0


Loss: 1.9624: 100%|██████████| 23/23 [00:07<00:00,  3.12it/s]


Epoch 1/100
Train Loss: 2.3894, Train Acc: 0.2272, Train F1: 0.1160
Val Loss: 1.7668, Val Acc: 0.4709, Val F1: 0.3021
New best F1 score: 0.3021
Validation loss decreased (inf --> 1.766814). F1 score: 0.302139. Saving model ...


Loss: 1.1065: 100%|██████████| 23/23 [00:06<00:00,  3.49it/s]


Epoch 2/100
Train Loss: 1.3509, Train Acc: 0.6027, Train F1: 0.4836
Val Loss: 0.8510, Val Acc: 0.7302, Val F1: 0.6464
New best F1 score: 0.6464
Validation loss decreased (1.766814 --> 0.851001). F1 score: 0.646374. Saving model ...


Loss: 0.6082: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 3/100
Train Loss: 0.7066, Train Acc: 0.7891, Train F1: 0.7322
Val Loss: 0.6014, Val Acc: 0.7778, Val F1: 0.6829
New best F1 score: 0.6829
Validation loss decreased (0.851001 --> 0.601394). F1 score: 0.682854. Saving model ...


Loss: 0.3245: 100%|██████████| 23/23 [00:06<00:00,  3.47it/s]


Epoch 4/100
Train Loss: 0.4193, Train Acc: 0.8721, Train F1: 0.8133
Val Loss: 0.4649, Val Acc: 0.8095, Val F1: 0.7426
New best F1 score: 0.7426
Validation loss decreased (0.601394 --> 0.464942). F1 score: 0.742565. Saving model ...


Loss: 0.2456: 100%|██████████| 23/23 [00:06<00:00,  3.47it/s]


Epoch 5/100
Train Loss: 0.3084, Train Acc: 0.9034, Train F1: 0.8855
Val Loss: 0.4378, Val Acc: 0.7989, Val F1: 0.8113
New best F1 score: 0.8113
Validation loss decreased (0.464942 --> 0.437803). F1 score: 0.811311. Saving model ...


Loss: 0.2351: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 6/100
Train Loss: 0.2114, Train Acc: 0.9374, Train F1: 0.9182
Val Loss: 0.4343, Val Acc: 0.8148, Val F1: 0.8266
New best F1 score: 0.8266
Validation loss decreased (0.437803 --> 0.434306). F1 score: 0.826585. Saving model ...


Loss: 0.1550: 100%|██████████| 23/23 [00:06<00:00,  3.44it/s]


Epoch 7/100
Train Loss: 0.1245, Train Acc: 0.9782, Train F1: 0.9810
Val Loss: 0.4091, Val Acc: 0.8307, Val F1: 0.8408
New best F1 score: 0.8408
Validation loss decreased (0.434306 --> 0.409088). F1 score: 0.840833. Saving model ...


Loss: 0.0210: 100%|██████████| 23/23 [00:06<00:00,  3.46it/s]


Epoch 8/100
Train Loss: 0.0843, Train Acc: 0.9823, Train F1: 0.9584
Val Loss: 0.4157, Val Acc: 0.8254, Val F1: 0.8502
New best F1 score: 0.8502
EarlyStopping counter: 1 out of 7


Loss: 0.1330: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 9/100
Train Loss: 0.0708, Train Acc: 0.9932, Train F1: 0.9947
Val Loss: 0.4342, Val Acc: 0.8360, Val F1: 0.8642
New best F1 score: 0.8642
EarlyStopping counter: 2 out of 7


Loss: 0.0200: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 10/100
Train Loss: 0.0580, Train Acc: 0.9891, Train F1: 0.9914
Val Loss: 0.5063, Val Acc: 0.8148, Val F1: 0.8343
EarlyStopping counter: 3 out of 7


Loss: 0.1000: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 11/100
Train Loss: 0.0391, Train Acc: 0.9959, Train F1: 0.9965
Val Loss: 0.5177, Val Acc: 0.8148, Val F1: 0.8329
EarlyStopping counter: 4 out of 7


Loss: 0.0433: 100%|██████████| 23/23 [00:06<00:00,  3.45it/s]


Epoch 12/100
Train Loss: 0.0328, Train Acc: 0.9973, Train F1: 0.9981
Val Loss: 0.5408, Val Acc: 0.8201, Val F1: 0.8485
EarlyStopping counter: 5 out of 7


Loss: 0.0346: 100%|██████████| 23/23 [00:06<00:00,  3.44it/s]


Epoch 13/100
Train Loss: 0.0216, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.5339, Val Acc: 0.8307, Val F1: 0.8589
EarlyStopping counter: 6 out of 7


Loss: 0.0366: 100%|██████████| 23/23 [00:06<00:00,  3.44it/s]


Epoch 14/100
Train Loss: 0.0146, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.5482, Val Acc: 0.8571, Val F1: 0.8697
New best F1 score: 0.8697
EarlyStopping counter: 7 out of 7
Early stopping. Best validation loss: 0.409088, Best F1 score: 0.840833

Training model for cluster 1


Loss: 0.5359: 100%|██████████| 17/17 [00:04<00:00,  3.53it/s]


Epoch 1/100
Train Loss: 1.7745, Train Acc: 0.5969, Train F1: 0.2815
Val Loss: 0.5390, Val Acc: 0.9600, Val F1: 0.8432
New best F1 score: 0.8432
Validation loss decreased (inf --> 0.539037). F1 score: 0.843164. Saving model ...


Loss: 0.5383: 100%|██████████| 17/17 [00:04<00:00,  3.55it/s]


Epoch 2/100
Train Loss: 0.3359, Train Acc: 0.9731, Train F1: 0.7593
Val Loss: 0.0748, Val Acc: 0.9920, Val F1: 0.8684
New best F1 score: 0.8684
Validation loss decreased (0.539037 --> 0.074824). F1 score: 0.868421. Saving model ...


Loss: 0.0186: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 3/100
Train Loss: 0.0658, Train Acc: 0.9942, Train F1: 0.8629
Val Loss: 0.0176, Val Acc: 1.0000, Val F1: 1.0000
New best F1 score: 1.0000
Validation loss decreased (0.074824 --> 0.017606). F1 score: 1.000000. Saving model ...


Loss: 0.0084: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 4/100
Train Loss: 0.0263, Train Acc: 0.9981, Train F1: 0.8877
Val Loss: 0.0055, Val Acc: 1.0000, Val F1: 1.0000
Validation loss decreased (0.017606 --> 0.005532). F1 score: 1.000000. Saving model ...


Loss: 0.0006: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 5/100
Train Loss: 0.0147, Train Acc: 0.9981, Train F1: 0.8877
Val Loss: 0.0039, Val Acc: 1.0000, Val F1: 1.0000
Validation loss decreased (0.005532 --> 0.003931). F1 score: 1.000000. Saving model ...


Loss: 0.0066: 100%|██████████| 17/17 [00:04<00:00,  3.53it/s]


Epoch 6/100
Train Loss: 0.0122, Train Acc: 0.9981, Train F1: 0.8877
Val Loss: 0.0028, Val Acc: 1.0000, Val F1: 1.0000
Validation loss decreased (0.003931 --> 0.002810). F1 score: 1.000000. Saving model ...


Loss: 0.0056: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 7/100
Train Loss: 0.0107, Train Acc: 0.9981, Train F1: 0.8881
Val Loss: 0.0017, Val Acc: 1.0000, Val F1: 1.0000
Validation loss decreased (0.002810 --> 0.001662). F1 score: 1.000000. Saving model ...


Loss: 0.0091: 100%|██████████| 17/17 [00:04<00:00,  3.52it/s]


Epoch 8/100
Train Loss: 0.0080, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0016, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 1 out of 7


Loss: 0.0008: 100%|██████████| 17/17 [00:04<00:00,  3.53it/s]


Epoch 9/100
Train Loss: 0.0027, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0008, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 2 out of 7


Loss: 0.0065: 100%|██████████| 17/17 [00:04<00:00,  3.51it/s]


Epoch 10/100
Train Loss: 0.0042, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0007, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 3 out of 7


Loss: 0.0014: 100%|██████████| 17/17 [00:04<00:00,  3.52it/s]


Epoch 11/100
Train Loss: 0.0027, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0006, Val Acc: 1.0000, Val F1: 1.0000
Validation loss decreased (0.001662 --> 0.000624). F1 score: 1.000000. Saving model ...


Loss: 0.0005: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 12/100
Train Loss: 0.0024, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0005, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 1 out of 7


Loss: 0.0005: 100%|██████████| 17/17 [00:04<00:00,  3.51it/s]


Epoch 13/100
Train Loss: 0.0020, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0004, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 2 out of 7


Loss: 0.0002: 100%|██████████| 17/17 [00:04<00:00,  3.54it/s]


Epoch 14/100
Train Loss: 0.0026, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0005, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 3 out of 7


Loss: 0.0002: 100%|██████████| 17/17 [00:04<00:00,  3.53it/s]


Epoch 15/100
Train Loss: 0.0012, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0004, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 4 out of 7


Loss: 0.0018: 100%|██████████| 17/17 [00:04<00:00,  3.53it/s]


Epoch 16/100
Train Loss: 0.0048, Train Acc: 0.9981, Train F1: 0.8881
Val Loss: 0.0004, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 5 out of 7


Loss: 0.0019: 100%|██████████| 17/17 [00:04<00:00,  3.50it/s]


Epoch 17/100
Train Loss: 0.0014, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0004, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 6 out of 7


Loss: 0.0004: 100%|██████████| 17/17 [00:04<00:00,  3.52it/s]


Epoch 18/100
Train Loss: 0.0008, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.0004, Val Acc: 1.0000, Val F1: 1.0000
EarlyStopping counter: 7 out of 7
Early stopping. Best validation loss: 0.000624, Best F1 score: 1.000000

Performing inference on test data


  model.load_state_dict(torch.load(f"swin_b_model_cluster_{cluster}.pth"))
Predicting cluster 0: 100%|██████████| 99/99 [00:10<00:00,  9.82it/s]
Predicting cluster 1: 100%|██████████| 99/99 [00:10<00:00,  9.88it/s]

Prediction completed and saved to pred.csv





# 앙상블
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(ViT Large)을 결합한 앙상블 모델
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'vit_large_patch16_224'를 사용합니다.

## convnext v2 + vit

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 앙상블 모델 클래스 정의
class EnsembleModel(nn.Module):
    def __init__(self, model1, model2):
        super(EnsembleModel, self).__init__()
        self.model1 = model1
        self.model2 = model2

    def forward(self, x):
        out1 = self.model1(x)
        out2 = self.model2(x)
        return (out1 + out2) / 2

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [None]:
# 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
img_size = 224
EPOCHS = 30
BATCH_SIZE = 16
LR = 1e-4
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 모델 설정
model1 = timm.create_model('convnextv2_large', pretrained=True, num_classes=17)
model2 = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=17)

ensemble_model = EnsembleModel(model1, model2).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(ensemble_model.parameters(), lr=LR, weight_decay=1e-5)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, ensemble_model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, ensemble_model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(ensemble_model.state_dict(), "best_ensemble_model.pth")

# 테스트 데이터 추론
ensemble_model.load_state_dict(torch.load("best_ensemble_model.pth"))
ensemble_model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = ensemble_model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Prediction completed and saved to ensemble_pred.csv")

## 앙상블 모델 II - 리더 보드 제출용
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(Swin Transformers)을 결합한 앙상블 모델
- Hyper Parameter tunning이 전혀 되어 있지 않는 기본 모델 : 향후 최적화 필요
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'swin_large_patch4_window7_224'를 사용합니다.
- software voting(기존 저장된 pth 사용)

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader
import gc

gc.collect()
torch.cuda.empty_cache()

# 첫 번째 모델과 두 번째 모델 로드
model1 = timm.create_model('convnextv2_large', pretrained=False, num_classes=17).to(device)
model2 = timm.create_model('swin_large_patch4_window7_224', pretrained=False, num_classes=17).to(device)

# 모델 가중치 로드
model1.load_state_dict(torch.load('convNext_model.pth'))
model2.load_state_dict(torch.load('swin_t_model.pth'))

model1.eval()
model2.eval()

# 테스트 데이터 로드
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 소프트 보팅을 통한 예측
preds_list = []
with torch.no_grad():
    for image, _ in tqdm(test_loader):
        image = image.to(device)
        preds1 = model1(image)
        preds2 = model2(image)
        
        # 소프트 보팅: 예측 확률의 평균
        preds_avg = (torch.softmax(preds1, dim=1) + torch.softmax(preds2, dim=1)) / 2
        preds_list.extend(preds_avg.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Ensemble prediction completed and saved to ensemble_pred.csv")


# Bert OCR 모델

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tqdm import tqdm
import numpy as np
from PIL import Image

import gc
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

gc.collect()
torch.cuda.empty_cache()

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# BERT 모델 정의
class BERTModel(nn.Module):
    def __init__(self, num_classes):
        super(BERTModel, self).__init__()
        self.bert = AutoModel.from_pretrained('klue/bert-base')
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        x = self.fc(x)
        return x

# 데이터셋 클래스
class TextDataset(Dataset):
    def __init__(self, df, csv_text_data, tokenizer=None, max_len=512):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.csv_text_data = csv_text_data

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        text = self.csv_text_data.get(img_name, "")
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.df.iloc[idx, 1], dtype=torch.long) if 'target' in self.df.columns else torch.tensor(0)
        }

# 학습 함수
def train(model, loader, criterion, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    for batch in tqdm(loader, desc="Training"):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    scheduler.step()
    return total_loss / len(loader)

# 평가 함수
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    predictions = []
    true_labels = []
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    
    avg_loss = total_loss / len(loader)
    f1 = f1_score(true_labels, predictions, average='weighted')
    
    return avg_loss, f1, predictions, true_labels

# CSV에서 텍스트 데이터 로드
def load_text_from_csv(csv_path):
    df = pd.read_csv(csv_path)
    return {row['image']: ' '.join([text for text in eval(row['texts']) if text != '<extra_id_0>']) for _, row in df.iterrows()}

# 메인 함수
def main():
    # 데이터 준비
    data_path = '../data/'
    train_csv_path = './corrected_train_texts.csv'
    test_csv_path = './corrected_test_texts.csv' 

    train_csv_text_data = load_text_from_csv(train_csv_path)
    test_csv_text_data = load_text_from_csv(test_csv_path)

    df = pd.read_csv(f"{data_path}/train_correct_labeling.csv")
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    # 토크나이저 준비
    tokenizer = AutoTokenizer.from_pretrained('klue/bert-base')

    # 데이터셋 및 데이터로더 준비
    train_dataset = TextDataset(train_df, train_csv_text_data, tokenizer)
    val_dataset = TextDataset(val_df, train_csv_text_data, tokenizer)

    BATCH_SIZE = 32
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    num_classes = len(df['target'].unique())
    criterion = nn.CrossEntropyLoss()

    # BERT 모델 학습
    bert_model = BERTModel(num_classes).to(device)
    bert_optimizer = torch.optim.AdamW(bert_model.parameters(), lr=2e-5, weight_decay=0.01)
    bert_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(bert_optimizer, T_max=30)

    best_val_f1 = 0
    for epoch in range(30):
        train_loss = train(bert_model, train_loader, criterion, bert_optimizer, bert_scheduler, device)
        val_loss, val_f1, _, _ = evaluate(bert_model, val_loader, criterion, device)
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")
        
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(bert_model.state_dict(), 'best_bert_model.pth')
            print(f"New best model saved with F1 score: {best_val_f1:.4f}")

    # 테스트 데이터 준비
    test_df = pd.read_csv(f"{data_path}/sample_submission.csv")
    test_dataset = TextDataset(test_df, test_csv_text_data, tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    # 테스트 데이터 추론
    bert_model.load_state_dict(torch.load('best_bert_model.pth'))
    bert_model.eval()
    test_predictions = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Predicting test data"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = bert_model(input_ids, attention_mask)
            _, predicted = torch.max(outputs, 1)
            test_predictions.extend(predicted.cpu().numpy())

    # 결과 저장
    submission_df = pd.DataFrame({'ID': test_df['ID'], 'target': test_predictions})
    submission_df.to_csv("bert_text_pred.csv", index=False)
    print("Test predictions saved to bert_text_pred.csv")

if __name__ == "__main__":
    main()

# 2가지 모델 앙상블

- 앙상블 파라미터 최적화(mento comments)

In [None]:
import os
import timm
import albumentations as A
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, log_loss
from scipy.optimize import minimize
import torch.nn.functional as F
import gc

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# 컨피던스 임계값 설정
CONFIDENCE_THRESHOLD = 0.9

# CUDA 메모리 캐시 정리 함수
def clear_cuda_memory():
    gc.collect()
    torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
            preds = model(image)
            loss = loss_fn(preds, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

        # 메모리 정리
        del image, targets, preds, loss
        clear_cuda_memory()

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
                preds = model(image)
                loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

            # 메모리 정리
            del image, targets, preds, loss
            clear_cuda_memory()

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.best_f1 = -np.Inf

    def __call__(self, val_loss, f1_score, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, f1_score, model):
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        self.best_f1 = f1_score

# 모델 훈련 함수 (메모리 최적화)
def train_model(model_name, train_loader, val_loader, device, epochs, lr, initial_batch_size):
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
    early_stopping = EarlyStopping(patience=5, verbose=True, delta=0.001, path=f'aug_{model_name}_model.pth')
    scaler = torch.amp.GradScaler() 

    best_val_f1 = 0
    current_batch_size = initial_batch_size

    for epoch in range(epochs):
        try:
            train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device, scaler)
            val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{epochs}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"aug_{model_name}_model.pth")
            
            early_stopping(val_loss, val_f1, model)
            if early_stopping.early_stop:
                print(f"Early stopping. Best validation loss: {early_stopping.val_loss_min:.6f}, "
                      f"Best F1 score: {early_stopping.best_f1:.6f}")
                break

        except RuntimeError as e:
            if "out of memory" in str(e):
                clear_cuda_memory()
                current_batch_size = max(current_batch_size // 2, 1)
                print(f"CUDA out of memory. Reducing batch size to {current_batch_size}")
                train_loader = DataLoader(train_loader.dataset, batch_size=current_batch_size, shuffle=True, num_workers=train_loader.num_workers, pin_memory=True)
                val_loader = DataLoader(val_loader.dataset, batch_size=current_batch_size, shuffle=False, num_workers=val_loader.num_workers, pin_memory=True)
            else:
                raise e

    return model

# 예측 확률 계산 함수
def get_predictions(model, loader, device):
    model.eval()
    all_probs = []
    all_confidences = []
    
    with torch.no_grad():
        for images, _ in loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            confidences, _ = torch.max(probs, dim=1)
            all_probs.append(probs.cpu().numpy())
            all_confidences.append(confidences.cpu().numpy())
    
    return np.concatenate(all_probs), np.concatenate(all_confidences)

# 앙상블 최적화 함수
def optimize_ensemble(pred1_prob, pred2_prob, true_labels):
    def objective(weights):
        ensemble_pred_prob = weights[0] * pred1_prob + weights[1] * pred2_prob
        return log_loss(true_labels, ensemble_pred_prob)

    initial_weights = [0.5, 0.5]
    constraints = {'type': 'eq', 'fun': lambda w: 1 - sum(w)}
    bounds = [(0, 1), (0, 1)]

    result = minimize(objective, initial_weights, bounds=bounds, constraints=constraints)
    return result.x

# 기존 모델 로드 할때 필요한 함수
# def load_model(model_name, model_path, device):
#     if model_name == 'swin_large_patch4_window7_224':
#         model = timm.create_model(model_name, pretrained=False, num_classes=17)
#     elif model_name == 'convnextv2_large':
#         model = timm.create_model(model_name, pretrained=False, num_classes=17)
#     else:
#         raise ValueError(f"Unsupported model: {model_name}")
    
#     model.load_state_dict(torch.load(model_path, map_location=device))
#     model = model.to(device)
#     model.eval()
#     return model

# # 메인 실행 코드 내에서
# if __name__ == "__main__":

#     # 훈련된 모델 불러오기
#     swin_model = load_model('swin_large_patch4_window7_224', 'aug_swin_model.pth', device)
#     convnext_model = load_model('convnextv2_large', 'aug_conv_model_final.pth', device)


# 메인 실행 코드
if __name__ == "__main__":
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    img_size = 224
    INITIAL_BATCH_SIZE = 32
    num_workers = 4

    # 데이터 증강 및 변환 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

# 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "augmented_train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "augmented_train/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "augmented_train/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=INITIAL_BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=INITIAL_BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=INITIAL_BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 훈련
    swin_model = train_model('swin_large_patch4_window7_224', train_loader, val_loader, device, epochs=100, lr=2e-5, initial_batch_size=INITIAL_BATCH_SIZE)
    clear_cuda_memory()
    convnext_model = train_model('convnextv2_large', train_loader, val_loader, device, epochs=100, lr=1e-4, initial_batch_size=INITIAL_BATCH_SIZE)

    torch.save(swin_model.state_dict(), "aug_swin_model.pth")
    torch.save(convnext_model.state_dict(), "aug_conv_model.pth")

    # 검증 데이터에 대한 예측
    swin_preds = get_predictions(swin_model, val_loader, device)
    convnext_preds = get_predictions(convnext_model, val_loader, device)

    # 최적의 가중치 계산
    true_labels = val_df['target'].values
    optimized_weights = optimize_ensemble(swin_preds, convnext_preds, true_labels)
    print("Optimized Weights:", optimized_weights)

    # 앙상블 예측
    ensemble_preds = optimized_weights[0] * swin_preds + optimized_weights[1] * convnext_preds
    # 정규화 추가
    epsilon = 1e-7
    ensemble_preds = (ensemble_preds + epsilon) / np.sum(ensemble_preds + epsilon, axis=1, keepdims=True)
    ensemble_classes = np.argmax(ensemble_preds, axis=1)

    # 성능 평가
    accuracy = accuracy_score(true_labels, ensemble_classes)
    f1 = f1_score(true_labels, ensemble_classes, average='macro')
    print(f"Ensemble Accuracy: {accuracy:.4f}")
    print(f"Ensemble F1 Score: {f1:.4f}")

    # 테스트 데이터 추론
    swin_test_preds = get_predictions(swin_model, test_loader, device)
    convnext_test_preds = get_predictions(convnext_model, test_loader, device)
    ensemble_test_preds = optimized_weights[0] * swin_test_preds + optimized_weights[1] * convnext_test_preds
    ensemble_test_classes = np.argmax(ensemble_test_preds, axis=1)

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = ensemble_test_classes
    pred_df.to_csv("ensemble_pred.csv", index=False)
    print("Prediction completed and saved to ensemble_pred.csv")

### 우선 모델 선택 (성능 향상 없음...)

In [10]:
from tqdm import tqdm
import torch.nn.functional as F

# 컨피던스 임계값 설정
CONFIDENCE_THRESHOLD = 0.95

def get_predictions(model, loader, device):
    model.eval()
    all_probs = []
    all_confidences = []
    
    with torch.no_grad():
        for images, _ in loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            confidences, _ = torch.max(probs, dim=1)
            all_probs.append(probs.cpu().numpy())
            all_confidences.append(confidences.cpu().numpy())
    
    return np.concatenate(all_probs), np.concatenate(all_confidences)

def optimize_ensemble(pred1_prob, pred2_prob, true_labels):
    def objective(weights):
        ensemble_pred_prob = weights[0] * pred1_prob + weights[1] * pred2_prob
        return log_loss(true_labels, ensemble_pred_prob)

    initial_weights = [0.5, 0.5]
    constraints = {'type': 'eq', 'fun': lambda w: 1 - sum(w)}
    bounds = [(0, 1), (0, 1)]

    result = minimize(objective, initial_weights, bounds=bounds, constraints=constraints)
    return result.x

# 검증 데이터에 대한 예측
print("Predicting on validation data...")
convnext_preds, convnext_confidences = get_predictions(convnext_model, tqdm(val_loader, desc="ConvNeXt predictions"), device)
swin_preds, swin_confidences = get_predictions(swin_model, tqdm(val_loader, desc="Swin predictions"), device)

print("Calculating optimal weights...")
true_labels = val_df['target'].values
optimized_weights = optimize_ensemble(convnext_preds, swin_preds, true_labels)
print("Optimized Weights:", optimized_weights)

print("Performing ensemble prediction...")
ensemble_preds = np.zeros_like(swin_preds)
for i in range(len(swin_preds)):
    if swin_confidences[i] > CONFIDENCE_THRESHOLD:
        ensemble_preds[i] = swin_preds[i]
    else:
        ensemble_preds[i] = optimized_weights[0] * swin_preds[i] + optimized_weights[1] * convnext_preds[i]

# 정규화
epsilon = 1e-7
ensemble_preds = (ensemble_preds + epsilon) / np.sum(ensemble_preds + epsilon, axis=1, keepdims=True)
ensemble_classes = np.argmax(ensemble_preds, axis=1)

# 성능 평가
print("Evaluating ensemble performance...")
accuracy = accuracy_score(true_labels, ensemble_classes)
f1 = f1_score(true_labels, ensemble_classes, average='macro')
print(f"Ensemble Accuracy: {accuracy:.4f}")
print(f"Ensemble F1 Score: {f1:.4f}")

# 테스트 데이터 추론
print("Predicting on test data...")
swin_test_preds, swin_test_confidences = get_predictions(swin_model, tqdm(test_loader, desc="Swin test predictions"), device)
convnext_test_preds, convnext_test_confidences = get_predictions(convnext_model, tqdm(test_loader, desc="ConvNeXt test predictions"), device)

ensemble_test_preds = np.zeros_like(swin_test_preds)
for i in range(len(swin_test_preds)):
    if swin_test_confidences[i] > CONFIDENCE_THRESHOLD:
        ensemble_test_preds[i] = swin_test_preds[i]
    else:
        ensemble_test_preds[i] = optimized_weights[0] * swin_test_preds[i] + optimized_weights[1] * convnext_test_preds[i]

# 정규화
ensemble_test_preds = (ensemble_test_preds + epsilon) / np.sum(ensemble_test_preds + epsilon, axis=1, keepdims=True)
ensemble_test_classes = np.argmax(ensemble_test_preds, axis=1)

# 결과 저장
print("Saving predictions...")
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = ensemble_test_classes
pred_df.to_csv("ensemble_pred_3.csv", index=False)
print("Prediction completed and saved to ensemble_pred.csv")

Predicting on validation data...


ConvNeXt predictions: 100%|██████████| 111/111 [00:20<00:00,  5.34it/s]
Swin predictions: 100%|██████████| 111/111 [00:20<00:00,  5.43it/s]


Calculating optimal weights...
Optimized Weights: [0.5 0.5]
Performing ensemble prediction...
Evaluating ensemble performance...
Ensemble Accuracy: 0.9983
Ensemble F1 Score: 0.9979
Predicting on test data...


Swin test predictions: 100%|██████████| 99/99 [00:18<00:00,  5.43it/s]
ConvNeXt test predictions: 100%|██████████| 99/99 [00:18<00:00,  5.29it/s]

Saving predictions...
Prediction completed and saved to ensemble_pred.csv





# 3가지 모델 앙상블

## LayoutLM v3 Model
- 설치가 조금 까다로울수 있음
- g++ 설치 : 소스 받아서, "pip install -e ."

In [1]:
import os
import gc
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.nn.utils.rnn import pad_sequence
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import transformers
from transformers import AutoModelForSequenceClassification, AutoProcessor
from torchsummary import summary
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
transformers.logging.set_verbosity_error()

gc.collect()
torch.cuda.empty_cache()

# LayoutLM 데이터셋 클래스
class LayoutLMDataset(Dataset):
    def __init__(self, csv, img_dir, processor, max_length=512, is_test=False):
        if isinstance(csv, pd.DataFrame):
            self.df = csv
        else:
            self.df = pd.read_csv(csv)
        self.img_dir = img_dir
        self.processor = processor
        self.max_length = max_length
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.df.iloc[idx]['ID'])
        
        image = Image.open(img_name).convert("RGB")
        
        encoding = self.processor(image, return_tensors="pt", truncation=True, max_length=self.max_length)
        
        for key in ['input_ids', 'attention_mask', 'bbox']:
            encoding[key] = encoding[key].squeeze(0)
        
        encoding['pixel_values'] = encoding['pixel_values'].squeeze(0)
        
        if not self.is_test:
            label = self.df.iloc[idx]['target']
            encoding['labels'] = torch.tensor(label, dtype=torch.long)
        else:
            encoding['labels'] = torch.tensor(0, dtype=torch.long)  # dummy label for test set
        
        return encoding

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for batch in pbar:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        optimizer.zero_grad()

        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(outputs.logits.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(batch['labels'].detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

def custom_collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    bbox = [item['bbox'] for item in batch]
    pixel_values = [item['pixel_values'] for item in batch]
    labels = [item['labels'] for item in batch]

    # Pad sequences
    input_ids = pad_sequence(input_ids, batch_first=True, padding_value=processor.tokenizer.pad_token_id)
    attention_mask = pad_sequence(attention_mask, batch_first=True, padding_value=0)
    bbox = pad_sequence(bbox, batch_first=True, padding_value=0)

    # Stack other tensors
    pixel_values = torch.stack(pixel_values)
    labels = torch.stack(labels)

    return {
        'pixel_values': pixel_values,
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'bbox': bbox,
        'labels': labels,
    }

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}

            outputs = model(**batch)
            loss = outputs.loss

            val_loss += loss.item()
            preds_list.extend(outputs.logits.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(batch['labels'].detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.best_f1 = -np.Inf

    def __call__(self, val_loss, f1_score, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, f1_score, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, f1_score, model):
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        self.best_f1 = f1_score

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = "microsoft/layoutlmv3-base"
    global processor  # processor를 전역 변수로 선언
    LR = 1e-5
    EPOCHS = 2
    BATCH_SIZE = 8
    num_workers = 4

    # 모델과 프로세서 초기화
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=17)
    processor = AutoProcessor.from_pretrained(model_name)
    model = model.to(device)


    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "augmented_train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['target'])

    train_dataset = LayoutLMDataset(train_df, os.path.join(data_path, "augmented_train/"), processor)
    val_dataset = LayoutLMDataset(val_df, os.path.join(data_path, "augmented_train/"), processor)
    test_dataset = LayoutLMDataset(os.path.join(data_path, "sample_submission.csv"), 
                                os.path.join(data_path, "test/"), 
                                processor, 
                                is_test=True)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True, collate_fn=custom_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=custom_collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=custom_collate_fn)
    # 모델 설정
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    early_stopping = EarlyStopping(patience=7, verbose=True, delta=0.001, path='aug_layoutlmv3_model.pth')

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "aug_layoutlmv3_model.pth")
        
        early_stopping(val_loss, val_f1, model)
        if early_stopping.early_stop:
            print(f"Early stopping. Best validation loss: {early_stopping.val_loss_min:.6f}, "
                  f"Best F1 score: {early_stopping.best_f1:.6f}")
            break

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("aug_layoutlmv3_model.pth"))
    model.eval()
    preds_list = []

    for batch in tqdm(test_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        preds_list.extend(outputs.logits.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
    pred_df['target'] = preds_list
    pred_df.to_csv("pred_layoutlmv3.csv", index=False)
    print("Prediction completed and saved to pred_layoutlmv3.csv")

Loss: 2.9540:   6%|▌         | 57/1031 [00:24<04:56,  3.29it/s]

## OCR 멀티모달

In [8]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from timm import create_model
import pandas as pd
from tqdm import tqdm
from PIL import Image

# 멀티모달 데이터셋 클래스
class MultimodalDataset(Dataset):
    def __init__(self, df, image_dir, csv_text_data, transform=None, tokenizer=None, max_len=512):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.csv_text_data = csv_text_data

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        img_path = f"{self.image_dir}/{img_name}"
        
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        text = self.csv_text_data.get(img_name, "")
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        
        return {
            'image': image,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'text': text
        }

# CSV에서 텍스트 데이터 로드
def load_text_from_csv(csv_path):
    df = pd.read_csv(csv_path)
    return {row['image']: ' '.join([text for text in eval(row['texts']) if text != '<extra_id_0>']) for _, row in df.iterrows()}

# state_dict 키 변경 함수
def rename_keys(state_dict, key_map):
    for old_key in list(state_dict.keys()):
        if old_key in key_map:
            state_dict[key_map[old_key]] = state_dict.pop(old_key)
    return state_dict

# 테스트 데이터 추론
def predict_test_data():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 데이터 준비
    data_path = '../data/'
    test_csv_path = './corrected_test_texts.csv'
    test_csv_text_data = load_text_from_csv(test_csv_path)
    test_df = pd.read_csv(f"{data_path}/sample_submission.csv")

    # 모델 로드
    num_classes = 17  # 클래스 수에 맞게 조정
    bert_model = AutoModelForSequenceClassification.from_pretrained('klue/bert-base', num_labels=num_classes)
    state_dict = torch.load('best_bert_model.pth')
    
    # 키 변경 맵 정의
    key_map = {
        "fc.weight": "classifier.weight",
        "fc.bias": "classifier.bias"
    }
    
    # state_dict 키 변경
    state_dict = rename_keys(state_dict, key_map)
    
    # state_dict 로드
    bert_model.load_state_dict(state_dict)
    bert_model.to(device)
    bert_model.eval()

    swin_model = create_model('swin_large_patch4_window7_224', pretrained=False, num_classes=num_classes)
    swin_model.load_state_dict(torch.load('swin_large.pt'))
    swin_model.to(device)
    swin_model.eval()

    convnext_model = create_model('convnextv2_large', pretrained=False, num_classes=num_classes)
    convnext_model.load_state_dict(torch.load('convnextv2_large.pt'))
    convnext_model.to(device)
    convnext_model.eval()

    # 토크나이저 및 변환 준비
    tokenizer = AutoTokenizer.from_pretrained('klue/bert-base')
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 데이터셋 및 데이터로더 준비
    test_dataset = MultimodalDataset(test_df, f"{data_path}/test_preprocessed", test_csv_text_data, transform, tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    test_predictions = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Predicting test data"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            images = batch['image'].to(device)
            texts = batch['text']

            bert_outputs = bert_model(input_ids, attention_mask).logits
            swin_outputs = swin_model(images)
            convnext_outputs = convnext_model(images)

            for i in range(len(texts)):
                word_count = len(texts[i].split())
                if word_count >= 20:
                    ensemble_outputs = 0.4 * bert_outputs[i] + 0.4 * swin_outputs[i] + 0.2 * convnext_outputs[i]
                else:
                    ensemble_outputs = 0.7 * swin_outputs[i] + 0.3 * convnext_outputs[i]

                _, predicted = torch.max(ensemble_outputs, 0)
                test_predictions.append(predicted.item())

    # 결과 저장
    submission_df = pd.DataFrame({'ID': test_df['ID'], 'target': test_predictions})
    submission_df.to_csv("ensemble_pred.csv", index=False)
    print("Test predictions saved to ensemble_pred.csv")

if __name__ == "__main__":
    predict_test_data()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at klue/bert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting test data: 100%|██████████| 99/99 [00:58<00:00,  1.70it/s]

Test predictions saved to ensemble_pred.csv



