# CNN 기반 모델

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

## Simple EfficientNet-B0

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [None]:
# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

In [None]:
# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [None]:
# 하이퍼파라미터 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'efficientnet_b0'
img_size = 384
LR = 1e-3
EPOCHS = 30
BATCH_SIZE = 32
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# 데이터 로드 및 분할
df = pd.read_csv("../data/train.csv")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, "../data/train_preprocessed/", transform=train_transform)
val_dataset = ImageDataset(val_df, "../data/train_preprocessed/", transform=val_transform)
test_dataset = ImageDataset("../data/sample_submission.csv", "../data/test_preprocessed/", transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

In [None]:
# 모델 설정
model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [None]:
from torchsummary import summary

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)
    
# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 모델 아키텍처 출력
print("\nModel architecture:")
print(model)

In [None]:
# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

In [None]:
# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

In [None]:
# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

## EfficientNet-B4

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()


# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((380, 380, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'efficientnet_b4'
    img_size = 380  # EfficientNet-B4에 적합한 이미지 크기
    LR = 5e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 16  # 배치 크기 감소
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

## ConvNext V2 Large 모델

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'convnextv2_large'
    img_size = 224  # ConvNeXt V2 Large에 적합한 이미지 크기
    LR = 1e-4  # 학습률 조정
    EPOCHS = 30
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "convNext_model.pth")

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("convNext_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")

## Hyper Parameters Tunning With CNN Based Model

### ConvNeXt V2 Large 모델 + Optuna

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import optuna
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# Optuna를 이용한 하이퍼파라미터 최적화 함수
def objective(trial):
    # 하이퍼파라미터 탐색 공간 정의
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)

    # 데이터 로더 생성
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델, 손실 함수, 옵티마이저 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 학습 및 검증
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1

    return best_val_f1

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
model_name = 'convnextv2_large'
img_size = 224  # ConvNeXt V2에 적합한 이미지 크기
EPOCHS = 30
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

# Optuna를 이용한 하이퍼파라미터 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
print("Best hyperparameters:", best_params)

# 최적의 하이퍼파라미터로 최종 모델 학습
best_lr = best_params['lr']
best_batch_size = best_params['batch_size']
best_weight_decay = best_params['weight_decay']

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 모델 구조 출력
print(f"\nModel structure of {model_name}:")
print_model_summary(model, (3, img_size, img_size))

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

# 테스트 데이터 추론
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

### ConvNext V2 Large + WanDB Sweep
- pip install wandb
- wandb login

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import wandb
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
#class ImageDataset(Dataset):


# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
#def validate(loader, model, loss_fn, device):

# 모델 구조 출력 함수
#def print_model_summary(model, input_size):


# wandb sweep을 위한 학습 함수
def train():
    # wandb 초기화
    run = wandb.init(entity="cho") #사용자에 따라 자신의 도메인 네임 설정!!!
    config = wandb.config

    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = config.model_name
    img_size = config.img_size
    LR = config.learning_rate
    EPOCHS = config.epochs
    BATCH_SIZE = config.batch_size
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=config.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # 모델 구조 출력
    print(f"\nModel structure of {model_name}:")
    print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        # wandb에 로그 기록
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1": train_f1,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1": val_f1,
            "learning_rate": optimizer.param_groups[0]['lr']
        })

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")
            wandb.run.summary["best_val_f1"] = best_val_f1

    wandb.finish()

# wandb sweep 설정
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'val_f1',
        'goal': 'maximize'
    },
    'parameters': {
        'model_name': {
            'values': ['convnextv2_large', 'efficientnet_b4']
        },
        'learning_rate': {
            'distribution': 'uniform',
            'min': 1e-5,
            'max': 1e-3
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'img_size': {
            'values': [224, 256, 288]
        },
        'weight_decay': {
            'values': [1e-5, 1e-4, 1e-3]
        },
        'epochs': {
            'value': 30
        }
    }
}

# wandb sweep 실행 및 최고 성능 모델 찾기
sweep_id = wandb.sweep(sweep_config, project="cvmodel",entity="cho")
wandb.agent(sweep_id, train, count=30)

In [None]:

# 최고 성능 모델의 설정 가져오기
api = wandb.Api()
sweep = api.sweep(f"dl-12/cvmodel/{sweep_id}")
best_run = sweep.best_run()
best_config = best_run.config

  
# 최고 성능 모델의 설정 사용
model_name = best_config['model_name']
img_size = best_config['img_size']
BATCH_SIZE = best_config['batch_size']
num_workers = 4

# 테스트 데이터 변환
test_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 테스트 데이터셋 및 데이터로더 생성
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 최고 성능 모델 생성
model = timm.create_model(model_name, pretrained=False, num_classes=17).to(device)
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

# 테스트 데이터 추론
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("pred.csv", index=False)
print("Prediction completed and saved to pred.csv")

# wandb에 결과 업로드
wandb.init(project="cvmodel", name="best_model_prediction", entity="cho")
wandb.config.update(best_config)
wandb.save("pred.csv")
wandb.finish()

# Transformer 기반 모델

## Swin Transformers

In [2]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [3]:
import os
import time
import timm

import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary


# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, min_f1_score=0.9):
        self.patience = patience
        self.min_delta = min_delta
        self.min_f1_score = min_f1_score
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, val_f1):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience and val_f1 >= self.min_f1_score:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            
# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)


# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'swin_large_patch4_window7_224'  # Swin Transformer Large 모델
    img_size = 224  # Swin Transformer에 적합한 이미지 크기
    LR = 2e-5  # 1e-4 에서 학습률 조정 : 기존이 더 좋음
    EPOCHS = 100 # 30에서 조정 : 기존이 더 좋음
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 모델 설정
    model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    # Early stopping 설정
    early_stopping = EarlyStopping(patience=5, min_delta=0.001, min_f1_score=0.93)

    # 모델 구조 출력
    # print(f"\nModel structure of {model_name}:")
    # print_model_summary(model, (3, img_size, img_size))

    # 학습 루프
    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "swin_t_model.pth")
        
        # Early stopping 체크
        early_stopping(val_loss, val_f1)
        if early_stopping.early_stop:
            print(f"Early stopping triggered at epoch {epoch+1} with F1 score: {val_f1:.4f}")
            break

    # 테스트 데이터 추론
    model.load_state_dict(torch.load("swin_t_model.pth"))
    model.eval()
    preds_list = []

    for image, _ in tqdm(test_loader):
        image = image.to(device)
        with torch.no_grad():
            preds = model(image)
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = preds_list
    pred_df.to_csv("pred_swin.csv", index=False)
    print("Prediction completed and saved to pred_swin.csv")


Loss: 1.2255: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 1/100
Train Loss: 2.1225, Train Acc: 0.4387, Train F1: 0.4000
Val Loss: 0.9821, Val Acc: 0.7452, Val F1: 0.6543


Loss: 0.2761: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 2/100
Train Loss: 0.7152, Train Acc: 0.8089, Train F1: 0.7800
Val Loss: 0.3570, Val Acc: 0.8758, Val F1: 0.8396


Loss: 0.2044: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Epoch 3/100
Train Loss: 0.3883, Train Acc: 0.8718, Train F1: 0.8491
Val Loss: 0.3231, Val Acc: 0.8790, Val F1: 0.8662


Loss: 0.4099: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Epoch 4/100
Train Loss: 0.2965, Train Acc: 0.8949, Train F1: 0.8821
Val Loss: 0.2384, Val Acc: 0.9076, Val F1: 0.8900


Loss: 0.0110: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 5/100
Train Loss: 0.2494, Train Acc: 0.9005, Train F1: 0.8933
Val Loss: 0.2322, Val Acc: 0.9108, Val F1: 0.9017


Loss: 0.2672: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 6/100
Train Loss: 0.2011, Train Acc: 0.9291, Train F1: 0.9252
Val Loss: 0.2229, Val Acc: 0.9108, Val F1: 0.8936


Loss: 0.1630: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 7/100
Train Loss: 0.1676, Train Acc: 0.9403, Train F1: 0.9354
Val Loss: 0.2018, Val Acc: 0.9172, Val F1: 0.9033


Loss: 0.1298: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 8/100
Train Loss: 0.1479, Train Acc: 0.9451, Train F1: 0.9407
Val Loss: 0.2498, Val Acc: 0.9076, Val F1: 0.8998


Loss: 0.1022: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 9/100
Train Loss: 0.1438, Train Acc: 0.9475, Train F1: 0.9438
Val Loss: 0.1896, Val Acc: 0.9204, Val F1: 0.9067


Loss: 0.0310: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 10/100
Train Loss: 0.1052, Train Acc: 0.9618, Train F1: 0.9584
Val Loss: 0.2061, Val Acc: 0.9140, Val F1: 0.9071


Loss: 0.0733: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 11/100
Train Loss: 0.1105, Train Acc: 0.9562, Train F1: 0.9558
Val Loss: 0.2442, Val Acc: 0.9140, Val F1: 0.9016


Loss: 0.3388: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 12/100
Train Loss: 0.1048, Train Acc: 0.9713, Train F1: 0.9706
Val Loss: 0.2352, Val Acc: 0.9204, Val F1: 0.9091


Loss: 0.0013: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 13/100
Train Loss: 0.0934, Train Acc: 0.9658, Train F1: 0.9641
Val Loss: 0.2637, Val Acc: 0.9076, Val F1: 0.8941


Loss: 0.6437: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 14/100
Train Loss: 0.0917, Train Acc: 0.9753, Train F1: 0.9746
Val Loss: 0.1850, Val Acc: 0.9236, Val F1: 0.9178


Loss: 0.0015: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 15/100
Train Loss: 0.0817, Train Acc: 0.9761, Train F1: 0.9754
Val Loss: 0.1790, Val Acc: 0.9299, Val F1: 0.9200


Loss: 0.0182: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 16/100
Train Loss: 0.0501, Train Acc: 0.9825, Train F1: 0.9820
Val Loss: 0.2016, Val Acc: 0.9204, Val F1: 0.9149


Loss: 0.0443: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 17/100
Train Loss: 0.0478, Train Acc: 0.9857, Train F1: 0.9862
Val Loss: 0.2867, Val Acc: 0.9140, Val F1: 0.9079


Loss: 0.1051: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 18/100
Train Loss: 0.0495, Train Acc: 0.9865, Train F1: 0.9863
Val Loss: 0.2206, Val Acc: 0.9236, Val F1: 0.9178


Loss: 0.0077: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 19/100
Train Loss: 0.0449, Train Acc: 0.9833, Train F1: 0.9815
Val Loss: 0.3247, Val Acc: 0.9076, Val F1: 0.8966


Loss: 0.0073: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 20/100
Train Loss: 0.0382, Train Acc: 0.9881, Train F1: 0.9874
Val Loss: 0.3680, Val Acc: 0.8885, Val F1: 0.8835


Loss: 0.0020: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 21/100
Train Loss: 0.0435, Train Acc: 0.9865, Train F1: 0.9868
Val Loss: 0.2642, Val Acc: 0.9268, Val F1: 0.9163


Loss: 0.0558: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 22/100
Train Loss: 0.0300, Train Acc: 0.9912, Train F1: 0.9906
Val Loss: 0.2738, Val Acc: 0.9140, Val F1: 0.9092


Loss: 0.0294: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 23/100
Train Loss: 0.0204, Train Acc: 0.9968, Train F1: 0.9964
Val Loss: 0.2552, Val Acc: 0.9299, Val F1: 0.9243


Loss: 0.0047: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 24/100
Train Loss: 0.0296, Train Acc: 0.9928, Train F1: 0.9923
Val Loss: 0.2686, Val Acc: 0.9299, Val F1: 0.9265


Loss: 0.0081: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 25/100
Train Loss: 0.0191, Train Acc: 0.9960, Train F1: 0.9956
Val Loss: 0.2523, Val Acc: 0.9268, Val F1: 0.9223


Loss: 0.0059: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 26/100
Train Loss: 0.0162, Train Acc: 0.9968, Train F1: 0.9970
Val Loss: 0.2865, Val Acc: 0.9204, Val F1: 0.9107


Loss: 0.0277: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 27/100
Train Loss: 0.0171, Train Acc: 0.9960, Train F1: 0.9956
Val Loss: 0.2908, Val Acc: 0.9236, Val F1: 0.9141


Loss: 0.0019: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 28/100
Train Loss: 0.0179, Train Acc: 0.9928, Train F1: 0.9930
Val Loss: 0.2719, Val Acc: 0.9268, Val F1: 0.9172


Loss: 0.0022: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 29/100
Train Loss: 0.0181, Train Acc: 0.9960, Train F1: 0.9963
Val Loss: 0.2940, Val Acc: 0.9172, Val F1: 0.9105


Loss: 0.0362: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 30/100
Train Loss: 0.0182, Train Acc: 0.9976, Train F1: 0.9974
Val Loss: 0.2784, Val Acc: 0.9331, Val F1: 0.9268


Loss: 0.0245: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 31/100
Train Loss: 0.0142, Train Acc: 0.9976, Train F1: 0.9974
Val Loss: 0.3143, Val Acc: 0.9172, Val F1: 0.9124


Loss: 0.0113: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 32/100
Train Loss: 0.0207, Train Acc: 0.9952, Train F1: 0.9952
Val Loss: 0.2538, Val Acc: 0.9363, Val F1: 0.9330


Loss: 0.0529: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 33/100
Train Loss: 0.0159, Train Acc: 0.9968, Train F1: 0.9971
Val Loss: 0.2646, Val Acc: 0.9140, Val F1: 0.9092


Loss: 0.0164: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 34/100
Train Loss: 0.0151, Train Acc: 0.9968, Train F1: 0.9967
Val Loss: 0.3317, Val Acc: 0.9299, Val F1: 0.9262


Loss: 0.0020: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 35/100
Train Loss: 0.0148, Train Acc: 0.9952, Train F1: 0.9942
Val Loss: 0.3107, Val Acc: 0.9236, Val F1: 0.9195


Loss: 0.0003: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 36/100
Train Loss: 0.0147, Train Acc: 0.9960, Train F1: 0.9956
Val Loss: 0.3082, Val Acc: 0.9268, Val F1: 0.9214


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 37/100
Train Loss: 0.0108, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.3278, Val Acc: 0.9172, Val F1: 0.9077


Loss: 0.0077: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 38/100
Train Loss: 0.0119, Train Acc: 0.9968, Train F1: 0.9971
Val Loss: 0.3541, Val Acc: 0.9268, Val F1: 0.9170


Loss: 0.0129: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 39/100
Train Loss: 0.0153, Train Acc: 0.9968, Train F1: 0.9971
Val Loss: 0.3484, Val Acc: 0.9204, Val F1: 0.9127


Loss: 0.0028: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 40/100
Train Loss: 0.0128, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.3474, Val Acc: 0.9299, Val F1: 0.9243


Loss: 0.0125: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 41/100
Train Loss: 0.0056, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3402, Val Acc: 0.9268, Val F1: 0.9203


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 42/100
Train Loss: 0.0055, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.3320, Val Acc: 0.9268, Val F1: 0.9219


Loss: 0.0112: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 43/100
Train Loss: 0.0056, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.3775, Val Acc: 0.9172, Val F1: 0.9116


Loss: 0.0055: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 44/100
Train Loss: 0.0103, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.3604, Val Acc: 0.9236, Val F1: 0.9195


Loss: 0.0003: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 45/100
Train Loss: 0.0081, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.3985, Val Acc: 0.9204, Val F1: 0.9136


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 46/100
Train Loss: 0.0197, Train Acc: 0.9936, Train F1: 0.9941
Val Loss: 0.3747, Val Acc: 0.9204, Val F1: 0.9111


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 47/100
Train Loss: 0.0133, Train Acc: 0.9960, Train F1: 0.9960
Val Loss: 0.3457, Val Acc: 0.9363, Val F1: 0.9296


Loss: 0.0089: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 48/100
Train Loss: 0.0135, Train Acc: 0.9960, Train F1: 0.9960
Val Loss: 0.4326, Val Acc: 0.9076, Val F1: 0.8976


Loss: 0.0010: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 49/100
Train Loss: 0.0065, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3269, Val Acc: 0.9331, Val F1: 0.9274


Loss: 0.0017: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 50/100
Train Loss: 0.0066, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3312, Val Acc: 0.9331, Val F1: 0.9288


Loss: 0.0003: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 51/100
Train Loss: 0.0047, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3688, Val Acc: 0.9172, Val F1: 0.9077


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 52/100
Train Loss: 0.0104, Train Acc: 0.9976, Train F1: 0.9978
Val Loss: 0.3268, Val Acc: 0.9236, Val F1: 0.9169


Loss: 0.0010: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 53/100
Train Loss: 0.0066, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.3180, Val Acc: 0.9299, Val F1: 0.9250


Loss: 0.0049: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 54/100
Train Loss: 0.0031, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3223, Val Acc: 0.9268, Val F1: 0.9200


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 55/100
Train Loss: 0.0039, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3420, Val Acc: 0.9268, Val F1: 0.9223


Loss: 0.0178: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 56/100
Train Loss: 0.0071, Train Acc: 0.9984, Train F1: 0.9985
Val Loss: 0.3553, Val Acc: 0.9331, Val F1: 0.9253


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 57/100
Train Loss: 0.0052, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3424, Val Acc: 0.9299, Val F1: 0.9231


Loss: 0.0009: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 58/100
Train Loss: 0.0040, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3258, Val Acc: 0.9299, Val F1: 0.9205


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 59/100
Train Loss: 0.0030, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3232, Val Acc: 0.9299, Val F1: 0.9217


Loss: 0.0009: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 60/100
Train Loss: 0.0033, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3365, Val Acc: 0.9268, Val F1: 0.9188


Loss: 0.0026: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 61/100
Train Loss: 0.0047, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.3441, Val Acc: 0.9299, Val F1: 0.9233


Loss: 0.0035: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 62/100
Train Loss: 0.0028, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3552, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 63/100
Train Loss: 0.0024, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3527, Val Acc: 0.9363, Val F1: 0.9294


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 64/100
Train Loss: 0.0028, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3452, Val Acc: 0.9236, Val F1: 0.9160


Loss: 0.0048: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 65/100
Train Loss: 0.0044, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3479, Val Acc: 0.9236, Val F1: 0.9174


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 66/100
Train Loss: 0.0040, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.3735, Val Acc: 0.9363, Val F1: 0.9301


Loss: 0.0006: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 67/100
Train Loss: 0.0025, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3519, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 68/100
Train Loss: 0.0025, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3649, Val Acc: 0.9395, Val F1: 0.9330


Loss: 0.0016: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 69/100
Train Loss: 0.0029, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3491, Val Acc: 0.9331, Val F1: 0.9279


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 70/100
Train Loss: 0.0041, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.3639, Val Acc: 0.9331, Val F1: 0.9265


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 71/100
Train Loss: 0.0025, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3557, Val Acc: 0.9363, Val F1: 0.9311


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 72/100
Train Loss: 0.0021, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3575, Val Acc: 0.9363, Val F1: 0.9311


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 73/100
Train Loss: 0.0016, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3603, Val Acc: 0.9363, Val F1: 0.9307


Loss: 0.0037: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 74/100
Train Loss: 0.0023, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3558, Val Acc: 0.9331, Val F1: 0.9275


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 75/100
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3554, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 76/100
Train Loss: 0.0023, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3593, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 77/100
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3585, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 78/100
Train Loss: 0.0024, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3571, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0009: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 79/100
Train Loss: 0.0027, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3614, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 80/100
Train Loss: 0.0016, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3604, Val Acc: 0.9331, Val F1: 0.9272


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 81/100
Train Loss: 0.0021, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3574, Val Acc: 0.9331, Val F1: 0.9258


Loss: 0.0015: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 82/100
Train Loss: 0.0012, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3585, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 83/100
Train Loss: 0.0021, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3643, Val Acc: 0.9331, Val F1: 0.9275


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 84/100
Train Loss: 0.0040, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.3703, Val Acc: 0.9299, Val F1: 0.9252


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 85/100
Train Loss: 0.0015, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3706, Val Acc: 0.9299, Val F1: 0.9257


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 86/100
Train Loss: 0.0039, Train Acc: 0.9984, Train F1: 0.9982
Val Loss: 0.3734, Val Acc: 0.9331, Val F1: 0.9285


Loss: 0.0014: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 87/100
Train Loss: 0.0026, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3696, Val Acc: 0.9268, Val F1: 0.9224


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 88/100
Train Loss: 0.0021, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3685, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 89/100
Train Loss: 0.0014, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3691, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0006: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 90/100
Train Loss: 0.0019, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3700, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 91/100
Train Loss: 0.0018, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3707, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0003: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 92/100
Train Loss: 0.0029, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3704, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0007: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 93/100
Train Loss: 0.0030, Train Acc: 0.9992, Train F1: 0.9989
Val Loss: 0.3707, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0002: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 94/100
Train Loss: 0.0014, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3710, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0001: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 95/100
Train Loss: 0.0020, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3709, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0000: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 96/100
Train Loss: 0.0012, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3708, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0017: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 97/100
Train Loss: 0.0014, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3706, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0014: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 98/100
Train Loss: 0.0017, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3705, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0011: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 99/100
Train Loss: 0.0019, Train Acc: 0.9992, Train F1: 0.9993
Val Loss: 0.3706, Val Acc: 0.9363, Val F1: 0.9303


Loss: 0.0004: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 100/100
Train Loss: 0.0013, Train Acc: 1.0000, Train F1: 1.0000
Val Loss: 0.3706, Val Acc: 0.9363, Val F1: 0.9303


  model.load_state_dict(torch.load("swin_t_model.pth"))
100%|██████████| 99/99 [00:18<00:00,  5.43it/s]

Prediction completed and saved to pred_swin.csv





## Stratified k-fold cross validation 

In [30]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [31]:
import os
import time
import timm

import albumentations as A
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, min_f1_score=0.9):
        self.patience = patience
        self.min_delta = min_delta
        self.min_f1_score = min_f1_score
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, val_f1):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience and val_f1 >= self.min_f1_score:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            
# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    model_name = 'swin_large_patch4_window7_224'  # Swin Transformer Large 모델
    img_size = 224  # Swin Transformer에 적합한 이미지 크기
    LR = 2e-5  # 1e-4 에서 학습률 조정 : 기존이 더 좋음
    EPOCHS = 100 # 30에서 조정 : 기존이 더 좋음
    BATCH_SIZE = 32  # 배치 크기 조정
    num_workers = 4
    n_splits = 5  # Number of K-fold splits

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))

    # Stratified K-Fold 설정
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # 전체 결과 저장
    final_preds_list = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['target'])):
        print(f"Fold {fold+1}/{n_splits}")
        
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]

        train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
        val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
        test_dataset = ImageDataset(pd.read_csv(os.path.join(data_path, "sample_submission.csv")), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        # 모델 설정
        model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
        # Early stopping 설정
        early_stopping = EarlyStopping(patience=5, min_delta=0.001, min_f1_score=0.93)

        # 모델 구조 출력
        # print(f"\nModel structure of {model_name}:")
        # print_model_summary(model, (3, img_size, img_size))

        # 학습 루프
        best_val_f1 = 0
        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1 = validate(val_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"swin_t_model_fold_{fold}.pth")
            
            # Early stopping 체크
            early_stopping(val_loss, val_f1)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1} with F1 score: {val_f1:.4f}")
                break

        # 테스트 데이터 추론
        model.load_state_dict(torch.load(f"swin_t_model_fold_{fold}.pth"))
        model.eval()
        fold_preds_list = []

        for image, _ in tqdm(test_loader):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            fold_preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

        # 결과 저장
        final_preds_list.append(fold_preds_list)

    # 결과 앙상블
    final_preds = np.mean(np.array(final_preds_list), axis=0).astype(int)

    pred_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
    pred_df['target'] = final_preds
    pred_df.to_csv("pred_swin_kfold.csv", index=False)
    print("Prediction completed and saved to pred_swin.csv")


Fold 1/5


Loss: 1.1379: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 1/100
Train Loss: 1.9977, Train Acc: 0.4865, Train F1: 0.4503
Val Loss: 0.8827, Val Acc: 0.7739, Val F1: 0.6936


Loss: 0.4577: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Epoch 2/100
Train Loss: 0.6732, Train Acc: 0.8049, Train F1: 0.7707
Val Loss: 0.4043, Val Acc: 0.8631, Val F1: 0.8179


Loss: 0.3563: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Epoch 3/100
Train Loss: 0.3740, Train Acc: 0.8814, Train F1: 0.8610
Val Loss: 0.2678, Val Acc: 0.8822, Val F1: 0.8490


Loss: 0.3622: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 4/100
Train Loss: 0.2812, Train Acc: 0.8989, Train F1: 0.8843
Val Loss: 0.2281, Val Acc: 0.8949, Val F1: 0.8587


Loss: 0.1351: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 5/100
Train Loss: 0.2306, Train Acc: 0.9092, Train F1: 0.8988
Val Loss: 0.3207, Val Acc: 0.8885, Val F1: 0.8724


Loss: 0.4066: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Epoch 6/100
Train Loss: 0.1993, Train Acc: 0.9307, Train F1: 0.9236
Val Loss: 0.2139, Val Acc: 0.9108, Val F1: 0.8802


Loss: 0.0564: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 7/100
Train Loss: 0.1616, Train Acc: 0.9411, Train F1: 0.9367
Val Loss: 0.2266, Val Acc: 0.9236, Val F1: 0.9109


Loss: 0.0156: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 8/100
Train Loss: 0.1497, Train Acc: 0.9427, Train F1: 0.9381
Val Loss: 0.1785, Val Acc: 0.9076, Val F1: 0.8937


Loss: 0.1676: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 9/100
Train Loss: 0.1273, Train Acc: 0.9570, Train F1: 0.9535
Val Loss: 0.2308, Val Acc: 0.9236, Val F1: 0.9100


Loss: 0.2569: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 10/100
Train Loss: 0.1152, Train Acc: 0.9562, Train F1: 0.9555
Val Loss: 0.2009, Val Acc: 0.9299, Val F1: 0.9218


Loss: 0.0047: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 11/100
Train Loss: 0.0833, Train Acc: 0.9737, Train F1: 0.9735
Val Loss: 0.2476, Val Acc: 0.9236, Val F1: 0.9079


Loss: 0.0017: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 12/100
Train Loss: 0.1018, Train Acc: 0.9586, Train F1: 0.9557
Val Loss: 0.1968, Val Acc: 0.9331, Val F1: 0.9255


Loss: 0.2389: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 13/100
Train Loss: 0.1085, Train Acc: 0.9578, Train F1: 0.9558
Val Loss: 0.2119, Val Acc: 0.9076, Val F1: 0.8933


Loss: 0.0059: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 14/100
Train Loss: 0.0823, Train Acc: 0.9705, Train F1: 0.9685
Val Loss: 0.1973, Val Acc: 0.9268, Val F1: 0.9177


Loss: 0.0007: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 15/100
Train Loss: 0.0565, Train Acc: 0.9825, Train F1: 0.9822
Val Loss: 0.1846, Val Acc: 0.9299, Val F1: 0.9248


Loss: 0.0499: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 16/100
Train Loss: 0.0534, Train Acc: 0.9817, Train F1: 0.9814
Val Loss: 0.2300, Val Acc: 0.9268, Val F1: 0.9204


Loss: 0.0983: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 17/100
Train Loss: 0.0583, Train Acc: 0.9833, Train F1: 0.9821
Val Loss: 0.1958, Val Acc: 0.9363, Val F1: 0.9308
Early stopping triggered at epoch 17 with F1 score: 0.9308


100%|██████████| 99/99 [00:18<00:00,  5.43it/s]


Fold 2/5


Loss: 1.7443: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 1/100
Train Loss: 2.0241, Train Acc: 0.4682, Train F1: 0.4419
Val Loss: 0.9525, Val Acc: 0.7516, Val F1: 0.7064


Loss: 0.5209: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 2/100
Train Loss: 0.7057, Train Acc: 0.7986, Train F1: 0.7692
Val Loss: 0.4094, Val Acc: 0.8408, Val F1: 0.8119


Loss: 0.3512: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 3/100
Train Loss: 0.3797, Train Acc: 0.8838, Train F1: 0.8663
Val Loss: 0.3192, Val Acc: 0.8631, Val F1: 0.8498


Loss: 0.1091: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 4/100
Train Loss: 0.2739, Train Acc: 0.9037, Train F1: 0.8893
Val Loss: 0.3723, Val Acc: 0.8694, Val F1: 0.8489


Loss: 0.6342: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 5/100
Train Loss: 0.2704, Train Acc: 0.9005, Train F1: 0.8886
Val Loss: 0.2797, Val Acc: 0.8854, Val F1: 0.8749


Loss: 0.1422: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 6/100
Train Loss: 0.2116, Train Acc: 0.9172, Train F1: 0.9051
Val Loss: 0.2317, Val Acc: 0.8949, Val F1: 0.8831


Loss: 0.0532: 100%|██████████| 40/40 [00:20<00:00,  1.91it/s]


Epoch 7/100
Train Loss: 0.1665, Train Acc: 0.9482, Train F1: 0.9401
Val Loss: 0.2309, Val Acc: 0.8949, Val F1: 0.8818


Loss: 0.0940: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 8/100
Train Loss: 0.1499, Train Acc: 0.9530, Train F1: 0.9486
Val Loss: 0.2127, Val Acc: 0.9108, Val F1: 0.9076


Loss: 0.5626: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 9/100
Train Loss: 0.1320, Train Acc: 0.9562, Train F1: 0.9528
Val Loss: 0.2164, Val Acc: 0.8949, Val F1: 0.8864


Loss: 0.0135: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 10/100
Train Loss: 0.1205, Train Acc: 0.9546, Train F1: 0.9499
Val Loss: 0.1943, Val Acc: 0.9045, Val F1: 0.8985


Loss: 0.1353: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Epoch 11/100
Train Loss: 0.0913, Train Acc: 0.9745, Train F1: 0.9730
Val Loss: 0.2765, Val Acc: 0.9013, Val F1: 0.8873


Loss: 0.0312:  78%|███████▊  | 31/40 [00:16<00:04,  1.91it/s]

## Swin-T clustering & classification
- 이미지를 유사한 이미지로 5개로 그룹핑 하고 분석하는 모델
- early stoping 코드에서 강제적으로 f1 score boundary 를 줄수 있게 변경(분류하면 과소적합이 되는 구간이 있음.)

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import gc

gc.collect()
torch.cuda.empty_cache()

# Swin-B 모델 로드
def load_swin_b_model(num_classes=None):
    model = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=num_classes)
    return model

# 특성 추출 함수
def extract_features(img_path, model):
    transform = A.Compose([
        A.Resize(224, 224),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])
    img = Image.open(img_path).convert('RGB')
    img = np.array(img)
    img = transform(image=img)['image']
    img = img.unsqueeze(0)
    
    with torch.no_grad():
        features = model.forward_features(img)
    return features.squeeze().numpy()

# 이미지 클러스터링 함수
def cluster_images(data_path, n_clusters=5):
    feature_extractor = load_swin_b_model(num_classes=None)
    feature_extractor.eval()
    
    image_files = [f for f in os.listdir(data_path) if f.endswith('.jpg') or f.endswith('.png')]
    features = []
    for img_file in tqdm(image_files, desc="Extracting features"):
        img_path = os.path.join(data_path, img_file)
        feature = extract_features(img_path, feature_extractor)
        features.append(feature.reshape(-1))  # Flatten the feature array
    
    features = np.array(features)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(features_scaled)
    
    return dict(zip(image_files, clusters))

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None, cluster_dict=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform
        self.cluster_dict = cluster_dict

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        
        cluster = self.cluster_dict.get(name, -1) if self.cluster_dict else -1
        return img, target, cluster

# Early Stopping 클래스
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, min_f1_score=0.9):
        self.patience = patience
        self.min_delta = min_delta
        self.min_f1_score = min_f1_score
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, val_f1):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience and val_f1 >= self.min_f1_score:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets, _ in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets, _ in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

# 모델 구조 출력 함수
def print_model_summary(model, input_size):
    summary(model, input_size)

# 메인 실행 코드
if __name__ == "__main__":
    # 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    img_size = 224
    LR = 2e-5
    EPOCHS = 100
    BATCH_SIZE = 32
    num_workers = 4
    n_clusters = 3

    # 클러스터링 수행
    print("Clustering images...")
    cluster_dict = cluster_images(os.path.join(data_path, "train_preprocessed/"), n_clusters)

    # 데이터 증강 설정
    train_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    val_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    # 데이터 로드 및 분할
    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform, cluster_dict=cluster_dict)
    val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform, cluster_dict=cluster_dict)
    test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

    # 각 클러스터에 대한 모델 학습
    for cluster in range(n_clusters):
        print(f"\nTraining model for cluster {cluster}")
        
        # 클러스터에 해당하는 데이터만 선택
        train_cluster = [data for data in train_dataset if data[2] == cluster]
        val_cluster = [data for data in val_dataset if data[2] == cluster]
        
        if len(train_cluster) == 0 or len(val_cluster) == 0:
            print(f"Skipping cluster {cluster} due to insufficient data")
            continue
        
        train_cluster_loader = DataLoader(train_cluster, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
        val_cluster_loader = DataLoader(val_cluster, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

        # Swin-B 모델 설정
        model = load_swin_b_model(num_classes=17).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

        # Early stopping 설정
        early_stopping = EarlyStopping(patience=5, min_delta=0.001, min_f1_score=0.95)


        # 모델 구조 출력
        # print(f"\nModel structure of Swin-B for cluster {cluster}:")
        # print_model_summary(model, (3, img_size, img_size))

        # 학습 루프
        best_val_f1 = 0
        for epoch in range(EPOCHS):
            train_loss, train_acc, train_f1 = train_one_epoch(train_cluster_loader, model, optimizer, loss_fn, device)
            val_loss, val_acc, val_f1 = validate(val_cluster_loader, model, loss_fn, device)
            scheduler.step()

            print(f"Epoch {epoch+1}/{EPOCHS}")
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"swin_b_model_cluster_{cluster}.pth")

            # Early stopping 체크
            early_stopping(val_loss, val_f1)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1} with F1 score: {val_f1:.4f}")
                break

    # 테스트 데이터 추론
    print("\nPerforming inference on test data")
    test_preds = []
    
    for cluster in range(n_clusters):
        model = load_swin_b_model(num_classes=17).to(device)
        model.load_state_dict(torch.load(f"swin_b_model_cluster_{cluster}.pth"))
        model.eval()
        
        cluster_preds = []
        for image, _, _ in tqdm(test_loader, desc=f"Predicting cluster {cluster}"):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            cluster_preds.extend(preds.detach().cpu().numpy())
        
        test_preds.append(cluster_preds)
    
    # 모든 클러스터의 예측을 결합
    final_preds = np.mean(test_preds, axis=0)
    final_preds = np.argmax(final_preds, axis=1)

    # 결과 저장
    pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
    pred_df['target'] = final_preds
    pred_df.to_csv("swin_pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


# 앙상블
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(ViT Large)을 결합한 앙상블 모델
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'vit_large_patch16_224'를 사용합니다.

## convnext v2 + vit

In [None]:
import os
import time
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torchsummary import summary
import gc

gc.collect()
torch.cuda.empty_cache()

# 데이터셋 클래스 정의
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        if isinstance(csv, pd.DataFrame):
            self.df = csv.values
        else:
            self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)
        if not os.path.exists(img_path):
            print(f"Warning: Image not found: {img_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            img = np.array(Image.open(img_path).convert('RGB'))
        
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 앙상블 모델 클래스 정의
class EnsembleModel(nn.Module):
    def __init__(self, model1, model2):
        super(EnsembleModel, self).__init__()
        self.model1 = model1
        self.model2 = model2

    def forward(self, x):
        out1 = self.model1(x)
        out2 = self.model2(x)
        return (out1 + out2) / 2

# 학습 함수 정의
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

# 검증 함수 정의
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for image, targets in loader:
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)

            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

In [None]:
# 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = '../data/'
img_size = 224
EPOCHS = 30
BATCH_SIZE = 16
LR = 1e-4
num_workers = 4

# 데이터 증강 설정
train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터 로드 및 분할
df = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

train_dataset = ImageDataset(train_df, os.path.join(data_path, "train_preprocessed/"), transform=train_transform)
val_dataset = ImageDataset(val_df, os.path.join(data_path, "train_preprocessed/"), transform=val_transform)
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 모델 설정
model1 = timm.create_model('convnextv2_large', pretrained=True, num_classes=17)
model2 = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=17)

ensemble_model = EnsembleModel(model1, model2).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(ensemble_model.parameters(), lr=LR, weight_decay=1e-5)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

# 학습 루프
best_val_f1 = 0
for epoch in range(EPOCHS):
    train_loss, train_acc, train_f1 = train_one_epoch(train_loader, ensemble_model, optimizer, loss_fn, device)
    val_loss, val_acc, val_f1 = validate(val_loader, ensemble_model, loss_fn, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(ensemble_model.state_dict(), "best_ensemble_model.pth")

# 테스트 데이터 추론
ensemble_model.load_state_dict(torch.load("best_ensemble_model.pth"))
ensemble_model.eval()
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)
    with torch.no_grad():
        preds = ensemble_model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Prediction completed and saved to ensemble_pred.csv")

## 앙상블 모델 II - 리더 보드 제출용
CNN 모델(ConvNeXt V2 Large)과 Transformer 모델(Swin Transformers)을 결합한 앙상블 모델
- Hyper Parameter tunning이 전혀 되어 있지 않는 기본 모델 : 향후 최적화 필요
- CNN 모델로 'convnextv2_large'를 사용합니다.
- Transformer 모델로 'swin_large_patch4_window7_224'를 사용합니다.
- software voting(기존 저장된 pth 사용)

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader
import gc

gc.collect()
torch.cuda.empty_cache()

# 첫 번째 모델과 두 번째 모델 로드
model1 = timm.create_model('convnextv2_large', pretrained=False, num_classes=17).to(device)
model2 = timm.create_model('swin_large_patch4_window7_224', pretrained=False, num_classes=17).to(device)

# 모델 가중치 로드
model1.load_state_dict(torch.load('convNext_model.pth'))
model2.load_state_dict(torch.load('swin_t_model.pth'))

model1.eval()
model2.eval()

# 테스트 데이터 로드
test_dataset = ImageDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 소프트 보팅을 통한 예측
preds_list = []
with torch.no_grad():
    for image, _ in tqdm(test_loader):
        image = image.to(device)
        preds1 = model1(image)
        preds2 = model2(image)
        
        # 소프트 보팅: 예측 확률의 평균
        preds_avg = (torch.softmax(preds1, dim=1) + torch.softmax(preds2, dim=1)) / 2
        preds_list.extend(preds_avg.argmax(dim=1).detach().cpu().numpy())

# 결과 저장
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv("ensemble_pred.csv", index=False)
print("Ensemble prediction completed and saved to ensemble_pred.csv")


# layoutLMv3 모델
- 테스트용.

In [None]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

In [None]:
!pip install git+https://github.com/facebookresearch/segment-anything.git

In [None]:
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import AutoProcessor, AutoModelForSequenceClassification
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn

class DocumentDataset(Dataset):
    def __init__(self, csv, path, processor, label_encoder=None, max_length=512):
        if isinstance(csv, pd.DataFrame):
            self.df = csv
        else:
            self.df = pd.read_csv(csv)
        self.path = path
        self.processor = processor
        self.label_encoder = label_encoder
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        name = row['ID']
        img_path = os.path.join(self.path, name)
        image = Image.open(img_path).convert("RGB")
        
        encoding = self.processor(image, return_tensors="pt", max_length=self.max_length, padding="max_length", truncation=True)
        for k, v in encoding.items():
            encoding[k] = v.squeeze()
        
        if 'target_encoded' in row:
            encoding['labels'] = torch.tensor(row['target_encoded'], dtype=torch.long)
        else:
            encoding['labels'] = torch.tensor(-1, dtype=torch.long)  # For test set
        return encoding

class CustomLayoutLMv3(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.layoutlmv3 = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=num_labels)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(768, num_labels)

    def forward(self, **inputs):
        outputs = self.layoutlmv3(**inputs)
        logits = outputs.logits
        return logits

def train_one_epoch(loader, model, optimizer, criterion, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for batch in pbar:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        optimizer.zero_grad()
        logits = model(**batch)
        loss = criterion(logits, batch['labels'])
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds = logits.argmax(dim=-1)
        preds_list.extend(preds.cpu().numpy())
        targets_list.extend(batch['labels'].cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return train_loss, train_acc, train_f1

def validate(loader, model, criterion, device):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**batch)
            loss = criterion(logits, batch['labels'])

            val_loss += loss.item()
            preds = logits.argmax(dim=-1)
            preds_list.extend(preds.cpu().numpy())
            targets_list.extend(batch['labels'].cpu().numpy())

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return val_loss, val_acc, val_f1

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_path = '../data/'
    LR = 2e-5
    EPOCHS = 30
    BATCH_SIZE = 8
    num_workers = 4

    processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")

    df = pd.read_csv(os.path.join(data_path, "train_correct_labeling.csv"))
    
    # 레이블 인코딩
    label_encoder = LabelEncoder()
    df['target_encoded'] = label_encoder.fit_transform(df['target'])
    num_labels = len(df['target'].unique())

    print(f"Number of unique classes: {num_labels}")

    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])

    train_dataset = DocumentDataset(train_df, os.path.join(data_path, "train_preprocessed/"), processor, label_encoder)
    val_dataset = DocumentDataset(val_df, os.path.join(data_path, "train_preprocessed/"), processor, label_encoder)
    test_dataset = DocumentDataset(os.path.join(data_path, "sample_submission.csv"), os.path.join(data_path, "test_preprocessed/"), processor)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)

    model = CustomLayoutLMv3(num_labels=num_labels)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)

    best_val_f1 = 0
    for epoch in range(EPOCHS):
        train_loss, train_acc, train_f1 = train_one_epoch(train_loader, model, optimizer, criterion, device)
        val_loss, val_acc, val_f1 = validate(val_loader, model, criterion, device)
        scheduler.step(val_f1)

        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), "best_model.pth")

    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    preds_list = []

    with torch.no_grad():
        for batch in tqdm(test_loader):
            batch = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
            logits = model(**batch)
            preds = logits.argmax(dim=-1)
            preds_list.extend(preds.cpu().numpy())

    pred_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
    pred_df['target'] = label_encoder.inverse_transform(preds_list)
    pred_df.to_csv("pred.csv", index=False)
    print("Prediction completed and saved to pred.csv")


background 실행을 위해 .py 파일로 분리
- image_text.py
- image_bert.py
- layoutlmv3.py

# 3가지 모델 앙상블