모델: swin_tiny_patch4_window7_224

데이터 100% + 온라인 어그멘테이션 배치에 따라 적용

이미지 사이즈 224

폴드 5개

에폭 10번

In [15]:
#필요한 라이브러리 임포트
 
import os
import time
import random
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import random_split

In [16]:
# 데이터셋 클래스를 정의

class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values # CSV 파일에서 데이터 로드
        self.path = path # 이미지 파일 경로
        self.transform = transform # 데이터 변환 함수

    def __len__(self):
        return len(self.df) # 데이터셋의 길이 반환

    def __getitem__(self, idx):
        name, target = self.df[idx] # 이미지 이름과 타겟 레이블 추출
        img = np.array(Image.open(os.path.join(self.path, name))) # 이미지 로드
        if self.transform:
            img = self.transform(image=img)['image'] # 변환 적용
        return img, target # 이미지와 레이블 반환

In [20]:
# 랜덤 시드 고정: 결과 재현성을 위해 시드를 고정

SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED) # 해시 시드 고정
random.seed(SEED) # 파이썬 랜덤 시드 고정
np.random.seed(SEED) # NumPy 랜덤 시드 고정
torch.manual_seed(SEED) # PyTorch CPU 시드 고정
torch.cuda.manual_seed(SEED) # PyTorch CUDA 시드 고정
torch.cuda.manual_seed_all(SEED) # 모든 GPU에 대한 PyTorch CUDA 시드 고정
torch.backends.cudnn.benchmark = True # CUDA의 성능 최적화

# 이미지 크기 및 변환 설정
img_size = 224

# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

#데이터 로딩 최적화
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import StratifiedKFold
import numpy as np

def create_data_loaders(full_dataset, indices, batch_size=8, num_workers=4):
    sampler = SubsetRandomSampler(indices)
    return DataLoader(
        full_dataset,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=num_workers,
        pin_memory=True  # GPU 메모리 전송 속도 향상
    )

def get_subset_indices(total_size, percentage):
    """데이터셋의 특정 비율만큼의 인덱스를 반환"""
    indices = np.random.permutation(total_size)
    subset_size = int(total_size * percentage)
    return indices[:subset_size]

"""
온라인 어그멘테이션을 위한 작업
"""


#온라인 어그멘테이션
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam
    
def mixup_loss(loss_fn, pred, labels_a, labels_b, lam):
    return lam * loss_fn(pred, labels_a) + (1 - lam) * loss_fn(pred, labels_b)


def cutout(x, n_holes=1, length=50):
    """
    텐서에 cutout을 적용합니다.
    Args:
        x: 입력 텐서 (B, C, H, W)
        n_holes: 구멍의 개수
        length: 구멍의 길이
    """
    x = x.clone()
    b, c, h, w = x.shape
    
    for i in range(b):
        # 각 이미지마다 mask 생성
        mask = torch.ones((h, w), device=x.device)
        
        for _ in range(n_holes):
            # 랜덤 위치 선택
            y = torch.randint(h, (1,), device=x.device)
            x_pos = torch.randint(w, (1,), device=x.device)
            
            # 영역 계산
            y1 = torch.clamp(y - length // 2, 0, h)
            y2 = torch.clamp(y + length // 2, 0, h)
            x1 = torch.clamp(x_pos - length // 2, 0, w)
            x2 = torch.clamp(x_pos + length // 2, 0, w)
            
            # 마스크에 구멍 뚫기
            mask[y1:y2, x1:x2] = 0
        
        # 모든 채널에 마스크 적용
        mask = mask.expand(c, h, w)
        x[i] = x[i] * mask
    
    return x

def cutmix(x, y, beta=1.0):
    """배치 단위로 cutmix를 적용합니다."""
    batch_size = x.size()[0]
    lam = np.random.beta(beta, beta)
    
    # 랜덤하게 이미지 인덱스를 섞음
    rand_index = torch.randperm(batch_size, device=x.device)
    
    # target a와 b
    y_a = y
    y_b = y[rand_index]
    
    # 이미지 크기
    _, _, h, w = x.size()
    
    # random 영역 선택
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(w * cut_rat)
    cut_h = int(h * cut_rat)
    
    # 랜덤 중심점
    cx = torch.randint(w, (1,), device=x.device)
    cy = torch.randint(h, (1,), device=x.device)
    
    # 영역 좌표
    x1 = torch.clamp(cx - cut_w // 2, 0, w)
    x2 = torch.clamp(cx + cut_w // 2, 0, w)
    y1 = torch.clamp(cy - cut_h // 2, 0, h)
    y2 = torch.clamp(cy + cut_h // 2, 0, h)
    
    # 이미지 혼합
    mixed_x = x.clone()
    mixed_x[:, :, y1:y2, x1:x2] = x[rand_index, :, y1:y2, x1:x2]
    
    # 면적 비율 계산
    lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h))
    
    return mixed_x, y_a, y_b, lam









# 에폭당 학습을 위한 함수
criterion = nn.CrossEntropyLoss() 

def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train() # 모델을 학습 모드로 설정
    train_loss = 0
    preds_list = [] # 예측 결과 리스트 초기화
    targets_list = [] # 타겟 리스트 초기화

    pbar = tqdm(loader) # 진행 상황을 표시하기 위한 tqdm    
    for batch_idx, (image, targets) in enumerate(pbar): 
        image = image.to(device) # 이미지 텐서를 지정한 장치로 이동
        targets = targets.to(device) # 타겟 텐서를 지정한 장치로 이동

        model.zero_grad(set_to_none=True) # 그래디언트 초기화

        # augmentation 선택: 15배치마다 순환 (5: mixup, 10: cutout, 15: cutmix)
        if (batch_idx + 1) % 9 == 3:  # mixup
            mixed_images, targets_a, targets_b, lam = mixup_data(image, targets)
            preds = model(mixed_images)
            loss = mixup_loss(criterion, preds, targets_a, targets_b, lam)
                
            # 평가를 위한 원본 이미지 예측
            with torch.no_grad():
                real_preds = model(image)
                preds_list.extend(real_preds.argmax(dim=1).detach().cpu().numpy())
                targets_list.extend(targets.detach().cpu().numpy())

        elif (batch_idx + 1) % 9 == 6:  # cutout
            cutout_images = cutout(image, n_holes=1, length=30)
            preds = model(cutout_images)
            loss = criterion(preds, targets)
                
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

        elif (batch_idx + 1) % 9 == 0:  # cutmix
            mixed_images, targets_a, targets_b, lam = cutmix(image, targets)
            preds = model(mixed_images)
            loss = mixup_loss(criterion, preds, targets_a, targets_b, lam)
                
            # 평가를 위한 원본 이미지 예측
            with torch.no_grad():
                real_preds = model(image)
                preds_list.extend(real_preds.argmax(dim=1).detach().cpu().numpy())
                targets_list.extend(targets.detach().cpu().numpy())

        # 일반적인 학습
        else:
            preds = model(image) # 모델 예측
            loss = loss_fn(preds, targets) # 손실 계산
                   
        loss.backward() # 역전파
        optimizer.step() # 최적화 단계

        train_loss += loss.item() # 손실 누적
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy()) # 예측 결과 추가
        targets_list.extend(targets.detach().cpu().numpy()) # 타겟 추가

        pbar.set_description(f"Loss: {loss.item():.4f}") # 진행 바에 손실 출력

    train_loss /= len(loader) # 평균 손실 계산
    train_acc = accuracy_score(targets_list, preds_list) # 정확도 계산
    train_f1 = f1_score(targets_list, preds_list, average='macro') # F1 점수 계산

    ret = {
        "train_loss": train_loss, # 평균 손실
        "train_acc": train_acc, # 정확도
        "train_f1": train_f1, # F1 점수
    }

    return ret # 결과 반환

def validation(model, val_loader, loss_fn):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []
    
    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            val_outputs = model(val_images)
            loss = loss_fn(val_outputs, val_labels)
            val_loss += loss.item()
            preds_list.extend(val_outputs.argmax(dim=1).cpu().numpy())
            targets_list.extend(val_labels.cpu().numpy())
    
    val_loss /= len(val_loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')
    
    return val_loss, val_acc, val_f1


In [24]:
import mlflow
import mlflow.pytorch
from torch.optim.lr_scheduler import ReduceLROnPlateau

# 경로 설정
csv_path = '/data/ephemeral/home/exp5/data/train_with_aug.csv'
img_path = '/data/ephemeral/home/exp5/data/train'

# MLflow 관련 파라미터 설정
model_name = 'swin_tiny_patch4_window7_224'
run_name = 'swin_tiny_augmentation'  # MLflow run 이름
# learning_rate = 1e-3  # 학습률
pretrained = True  # 사전학습 모델 사용 여부
epochs = 10
# BATCH_SIZE = 8

# 학습 파라미터 설정
num_workers = 4
dropout = 0.2


mlflow.set_tracking_uri('/data/ephemeral/home/exp/mlruns')
mlflow.set_experiment('Docu classification5')

def run_kfold_experiment(full_dataset, data_percentage, n_splits=5,  min_val_acc=0.5):
    """K-fold 교차 검증을 수행하는 함수"""
    # 전체 데이터셋 크기의 특정 비율만 사용
    total_size = len(full_dataset)
    subset_indices = get_subset_indices(total_size, data_percentage)
    
    # 라벨 추출 (full_dataset의 DataFrame 사용)
    labels = full_dataset.df[subset_indices, 1].astype(int)  # 두 번째 열이 라벨
    
    # StratifiedKFold 설정
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    fold_results = []
          
    with mlflow.start_run(run_name=run_name) as run:
        # Log parameters
        mlflow.log_params({
                "model_name": model_name,
                "learning_rate": learning_rate,
                "epochs": epochs,
                "batch_size": BATCH_SIZE,
                "img_size": img_size,
                "pretrained": pretrained,
            })    


        for fold, (train_idx, val_idx) in enumerate(skf.split(subset_indices, labels), 1):
            print(f"\nFold {fold}/{n_splits}")
            
            # 데이터 로더 생성
            train_loader = create_data_loaders(full_dataset, subset_indices[train_idx], batch_size=BATCH_SIZE)
            val_loader = create_data_loaders(full_dataset, subset_indices[val_idx], batch_size=BATCH_SIZE)
            
            best_val_loss = float('inf')
            patience = 5
            early_stop_counter = 0
            fold_metrics = []
            
            for epoch in range(EPOCHS):
                # 학습
                train_results = train_one_epoch(train_loader, model, optimizer, loss_fn, device)
                # 검증
                val_loss, val_accuracy, val_f1 = validation(model, val_loader, loss_fn)
                
                # 첫 에폭에서 최소 성능 체크
                if epoch == 0 and val_accuracy < min_val_acc:
                    print(f"Initial validation accuracy ({val_accuracy:.4f}) below threshold ({min_val_acc})")
                    print("Stopping training for this fold...")
                    break

                # 현재 에폭 결과 저장
                metrics = {
                    'epoch': epoch + 1,
                    'train_loss': train_results['train_loss'],
                    'train_acc': train_results['train_acc'],
                    'train_f1': train_results['train_f1'],
                    'val_loss': val_loss,
                    'val_acc': val_accuracy,
                    'val_f1': val_f1
                }
                fold_metrics.append(metrics)
                
                # 결과 출력
                print(f"Epoch [{epoch + 1}/{EPOCHS}]")
                print(f"Train Loss: {train_results['train_loss']:.4f}, "
                    f"Train Acc: {train_results['train_acc']:.4f}, "
                    f"Train F1: {train_results['train_f1']:.4f}")
                print(f"Val Loss: {val_loss:.4f}, "
                    f"Val Acc: {val_accuracy:.4f}, "
                    f"Val F1: {val_f1:.4f}")
                
                # Early stopping 체크
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    torch.save(model.state_dict(), f"model_fold{fold}_best.pth")
                    early_stop_counter = 0
                else:
                    early_stop_counter += 1
                    
                if early_stop_counter >= patience:
                    print(f"Early stopping triggered at epoch {epoch + 1}")
                    break
            
            fold_results.append(fold_metrics)
    
    return fold_results


In [25]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

model = timm.create_model(
        model_name, 
        pretrained=True, 
        num_classes=17,
        drop_rate=dropout
    ).to(device)

# 학습 관련 파라미터
learning_rate = 1e-4  # 1e-3에서 낮춤
BATCH_SIZE = 32  # 8에서 증가
weight_decay = 1e-5  # L2 정규화 추가

loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

#데이터셋 생성
print("Creating dataset...")
full_dataset = ImageDataset(csv=csv_path, path=img_path, transform=trn_transform)
print(f"Dataset created with {len(full_dataset)} samples")

results = run_kfold_experiment(full_dataset, data_percentage=50, min_val_acc=0.5) 
torch.save(model.state_dict(), f"./model_swin_tiny_augup.pth")


Creating dataset...
Dataset created with 147616 samples

Fold 1/5


Loss: 0.0109: 100%|██████████| 3691/3691 [07:38<00:00,  8.06it/s]


Epoch [1/10]
Train Loss: 0.3885, Train Acc: 0.8763, Train F1: 0.8758
Val Loss: 0.0539, Val Acc: 0.9825, Val F1: 0.9826


Loss: 0.0045: 100%|██████████| 3691/3691 [07:38<00:00,  8.05it/s]


Epoch [2/10]
Train Loss: 0.2391, Train Acc: 0.9234, Train F1: 0.9233
Val Loss: 0.0311, Val Acc: 0.9895, Val F1: 0.9895


Loss: 0.0023: 100%|██████████| 3691/3691 [07:38<00:00,  8.04it/s]


Epoch [3/10]
Train Loss: 0.2104, Train Acc: 0.9299, Train F1: 0.9298
Val Loss: 0.0249, Val Acc: 0.9928, Val F1: 0.9928


Loss: 0.2446: 100%|██████████| 3691/3691 [07:38<00:00,  8.05it/s]


Epoch [4/10]
Train Loss: 0.2007, Train Acc: 0.9312, Train F1: 0.9311
Val Loss: 0.0211, Val Acc: 0.9938, Val F1: 0.9938


Loss: 0.0019: 100%|██████████| 3691/3691 [07:38<00:00,  8.05it/s]


Epoch [5/10]
Train Loss: 0.1868, Train Acc: 0.9375, Train F1: 0.9374
Val Loss: 0.0277, Val Acc: 0.9907, Val F1: 0.9908


Loss: 0.0077: 100%|██████████| 3691/3691 [07:38<00:00,  8.05it/s]


Epoch [6/10]
Train Loss: 0.1858, Train Acc: 0.9363, Train F1: 0.9363
Val Loss: 0.0135, Val Acc: 0.9962, Val F1: 0.9962


Loss: 0.0011: 100%|██████████| 3691/3691 [07:39<00:00,  8.04it/s]


Epoch [7/10]
Train Loss: 0.1790, Train Acc: 0.9369, Train F1: 0.9369
Val Loss: 0.0133, Val Acc: 0.9964, Val F1: 0.9964


Loss: 0.0016: 100%|██████████| 3691/3691 [07:39<00:00,  8.03it/s]


Epoch [8/10]
Train Loss: 0.1785, Train Acc: 0.9352, Train F1: 0.9351
Val Loss: 0.0179, Val Acc: 0.9942, Val F1: 0.9942


Loss: 0.0010: 100%|██████████| 3691/3691 [07:39<00:00,  8.03it/s]


Epoch [9/10]
Train Loss: 0.1720, Train Acc: 0.9394, Train F1: 0.9393
Val Loss: 0.0118, Val Acc: 0.9959, Val F1: 0.9959


Loss: 0.0023: 100%|██████████| 3691/3691 [07:39<00:00,  8.04it/s]


Epoch [10/10]
Train Loss: 0.1703, Train Acc: 0.9385, Train F1: 0.9385
Val Loss: 0.0170, Val Acc: 0.9951, Val F1: 0.9951

Fold 2/5


Loss: 0.0445: 100%|██████████| 3691/3691 [07:37<00:00,  8.06it/s]


Epoch [1/10]
Train Loss: 0.1712, Train Acc: 0.9411, Train F1: 0.9410
Val Loss: 0.0051, Val Acc: 0.9983, Val F1: 0.9983


Loss: 0.0016: 100%|██████████| 3691/3691 [07:44<00:00,  7.95it/s]


Epoch [2/10]
Train Loss: 0.1688, Train Acc: 0.9420, Train F1: 0.9420
Val Loss: 0.0035, Val Acc: 0.9987, Val F1: 0.9987


Loss: 0.0013: 100%|██████████| 3691/3691 [07:39<00:00,  8.04it/s]


Epoch [3/10]
Train Loss: 0.1674, Train Acc: 0.9452, Train F1: 0.9451
Val Loss: 0.0041, Val Acc: 0.9991, Val F1: 0.9991


Loss: 0.0016:  70%|███████   | 2585/3691 [05:22<02:14,  8.21it/s]

In [None]:
# 테스트 데이터셋 로드
tst_dataset = ImageDataset("/data/ephemeral/home/exp5/data/sample_submission.csv", 
"/data/ephemeral/home/exp5/data/test", transform=tst_transform)
tst_loader = DataLoader(tst_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)

# 베스트 모델 로드
model.load_state_dict(torch.load("/data/ephemeral/home/exp6/model_fold1_best.pth"))  # 저장한 모델 로드
model.to(device)  # 모델을 장치로 이동

# 예측 리스트 초기화
preds_list = []

# 모델을 평가 모드로 설정
model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)

    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

# 결과 DataFrame 생성
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

# 샘플 제출 파일 로드 및 확인
sample_submission_df = pd.read_csv("/data/ephemeral/home/dataset/data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

# 예측 결과를 CSV 파일로 저장
pred_df.to_csv("fold1_result.csv", index=False)