# 1.Import Library

In [None]:
import os
import time
import random
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import cv2
import easyocr
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam,NAdam
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from torch.amp import GradScaler,autocast
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import StratifiedKFold

import augraphy
from augraphy import AugraphyPipeline, NoiseTexturize,DirtyDrum,InkBleed,LightingGradient,SubtleNoise,BleedThrough, BadPhotoCopy

import pickle

import torch.nn.functional as F
from torchvision import datasets, transforms
from timm import create_model

from datetime import datetime
import wandb

# 2. Parameter, Const Setting

In [None]:
# 시드를 고정합니다.
SEED = 2024
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [None]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# data config
data_path = '../data/'
# model config
model_name = 'efficientnetv2_rw_m' # 'resnet50' 'efficientnet-b0', ...
# training config
img_size = 480
LR = 1e-4
EPOCHS = 5
BATCH_SIZE = 16
num_workers = 64

# 3. Class Define

In [None]:
class ImageDataset(Dataset):
    def __init__(self, csv, path, pipeline = None, transform=None, is_train = True):
       # CSV 파일에서 데이터 로드
        self.df = pd.read_csv(csv)
        self.df = self._fix_train_dataframe(self.df).values
        self.path = path
        self.pipeline = pipeline
        self.transform = transform
        self.is_train = is_train
        self.targets = self._fix_train_dataframe(pd.read_csv(csv))['target'].values
        
    def _fix_train_dataframe(self, df):
        train_df = df
        train_df.loc[train_df['ID'] == '45f0d2dfc7e47c03.jpg', 'target'] = 7  #from 3
        train_df.loc[train_df['ID'] == 'aec62dced7af97cd.jpg', 'target'] = 14 #from 3
        train_df.loc[train_df['ID'] == '8646f2c3280a4f49.jpg', 'target'] = 3  #from 7
        train_df.loc[train_df['ID'] == '1ec14a14bbe633db.jpg', 'target'] = 7  #from 14
        return train_df

    def __len__(self):
            return len(self.df)
            
    def __getitem__(self, idx):
        # 이미지 파일 경로 구성
 
        img_name, label = self.df[idx]
        img_path = os.path.join(self.path, img_name)
        image = cv2.imread(img_path, cv2.COLOR_BGR2RGB)
        #augraphy
        if self.pipeline:
            image = self.pipeline(image)
        # 변환 적용
        if self.transform:
            image = self.transform(image=np.array(image))['image']  # 'image=image'로 albumentations 호출
 
        return image, label

In [None]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    #A.Resize(height=img_size, width=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=cv2.BORDER_CONSTANT, value=(255,255,255)),
    A.Transpose(always_apply=False, p=0.5),
    A.Rotate(limit=90, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.5),
    A.RGBShift(p=0.5),
    A.ChannelShuffle(p=0.5),
    A.GlassBlur( sigma = 0.7 , max_delta = 4 , iterations = 2 , always_apply = False , mode = 'fast' , p = 0.5 ),
    A.Cutout(p=0.5, num_holes=8, max_h_size=48, max_w_size=48),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=cv2.BORDER_CONSTANT, value=(255,255,255)),
    A.Resize(height=img_size, width=img_size),

    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# Dataset 정의
trn_dataset = ImageDataset(
    "../data/train_augmented.csv",
    "../data/train_augmented/",
    transform=trn_transform,
    #pipeline=augraphy_pipeline,
    is_train = True
)

tst_dataset = ImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    transform=tst_transform,
    is_train = False
)
print(len(trn_dataset), len(tst_dataset))

In [None]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)

tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

# 4. Function Define

In [None]:

def save_model(model, save_path, filename):
    """
    모델을 pkl 파일로 저장합니다.
    
    Args:
        model: 저장할 모델
        save_path: 저장할 디렉토리 경로
        filename: 저장할 파일 이름
    """
    # 저장 경로가 없으면 생성
    os.makedirs(save_path, exist_ok=True)
    
    # 전체 경로 생성
    full_path = os.path.join(save_path, filename)
    
    # 모델의 state_dict와 추가 정보를 저장
    save_dict = {
        'model_name': model.default_cfg['architecture'],  # timm 모델의 아키텍처 이름
        'state_dict': model.state_dict(),
        'num_classes': model.num_classes,
    }
    
    # pkl 파일로 저장
    with open(full_path, 'wb') as f:
        pickle.dump(save_dict, f)
    
    print(f"Model saved successfully to {full_path}")

def load_model(load_path, filename, device='cuda'):
    """
    저장된 pkl 파일에서 모델을 불러옵니다.
    
    Args:
        load_path: 불러올 파일이 있는 디렉토리 경로
        filename: 불러올 파일 이름
        device: 모델을 로드할 디바이스
    
    Returns:
        loaded_model: 불러온 모델
    """
    # 전체 경로
    full_path = os.path.join(load_path, filename)
    
    # pkl 파일 로드
    with open(full_path, 'rb') as f:
        save_dict = pickle.load(f)
    
    # 동일한 구조의 모델 생성
    model = timm.create_model(
        save_dict['model_name'],
        pretrained=False,
        num_classes=save_dict['num_classes']
    )
    
    # 저장된 가중치 로드
    model.load_state_dict(save_dict['state_dict'])
    
    # 지정된 디바이스로 모델 이동
    model = model.to(device)
    
    print(f"Model loaded successfully from {full_path}")
    return model

In [None]:
# Mixup 적용 함수
def mixup_data(x, y, alpha=0.8):
    """Mixup 데이터 생성 함수"""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

# CutMix 적용 함수
def cutmix_data(x, y, alpha=1.0):
    """CutMix 데이터 생성 함수"""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    # 이미지의 사각형 영역을 자르고 섞기
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

def rand_bbox(size, lam):
    """CutMix bounding box 생성"""
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)  # np.int 대신 int로 변경
    cut_h = int(H * cut_rat)  # np.int 대신 int로 변경

    # 무작위 위치에서 시작하는 좌표
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

# 손실 함수에서 Mixup/CutMix용 가중치를 적용
def mixup_cutmix_criterion(criterion, pred, y_a, y_b, lam):
    """Mixup/CutMix 손실 함수"""
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [None]:
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler=None, accumulation_steps=1, augmentation_func=None):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader, total=len(loader))
    for step, (image, targets) in enumerate(pbar):
        image = image.to(device)
        targets = targets.to(device)
        
        with autocast(device_type='cuda',enabled=scaler is not None):  # Mixed precision training if scaler is provided

            # Mixup 또는 CutMix 적용
            if augmentation_func:
                images, targets_a, targets_b, lam = augmentation_func(image, targets)
                preds = model(images) / accumulation_steps
                loss = mixup_cutmix_criterion(loss_fn, preds, targets_a, targets_b, lam)
                train_loss += loss.item() * accumulation_steps
            else:
                # Augmentation이 없을 경우 일반 학습
                preds = model(image)
                loss = loss_fn(preds, targets) / accumulation_steps
                train_loss += loss.item() * accumulation_steps
        
        scaler.scale(loss).backward() if scaler else loss.backward()
        
        if (step + 1) % accumulation_steps == 0:
            if scaler:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad(set_to_none=True)

        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        # Progress bar with running loss
        avg_loss = train_loss / (step + 1)
        pbar.set_description(f"Batch Loss: {loss.item() * accumulation_steps:.4f}, Avg Loss: {avg_loss:.4f}")

    # Final metrics
    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    return {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

In [None]:
# 검증 함수 정의
def validate_one_epoch(loader, model, loss_fn, device):
    model.eval()  # 평가 모드로 전환
    val_loss = 0
    preds_list = []    
    targets_list = []

    with torch.no_grad():  # 검증 시에는 gradient 계산 필요 없음
        for images, targets in loader:
            images = images.to(device)
            targets = targets.to(device)

            # 예측
            preds = model(images)
            logits = preds.logits if hasattr(preds, 'logits') else preds  # logits 추출
            loss = loss_fn(logits, targets)
            val_loss += loss.item()

            # 예측값과 실제값을 리스트에 저장
            preds_list.extend(logits.argmax(dim=1).cpu().numpy())
            targets_list.extend(targets.cpu().numpy())

    # 평균 손실과 성능 지표 계산
    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')

    return {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }

In [None]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None

    def __call__(self, val_loss):
        if self.best_score is None or val_loss < self.best_score - self.min_delta:
            self.best_score = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False


In [None]:
# 예측 결과 저장을 위한 리스트 초기화
all_predictions = []

normal_score_list = []
best_score = 0
best_normal_score = 0
save_path = '../output/models'

# K-fold 교차 검증 루프
k_folds = 3
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=2024)

for fold, (train_idx, val_idx) in enumerate(skf.split(trn_dataset, trn_dataset.targets)):
    print(f'Fold {fold + 1}/{k_folds}')
    best_score = 0
    best_normal_score = 0
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    run_name = f"fold_{fold+1}_{current_time}"  # 예: fold_1_20231105_153045 형식
    
    # Train/Validation Subset 생성
    train_subset = Subset(trn_dataset, train_idx)
    val_subset = Subset(trn_dataset, val_idx)
    
    # DataLoader 생성
    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers)

    # 모델, 옵티마이저, 손실 함수 초기화
    model = timm.create_model(model_name,pretrained=True,num_classes=17).to(device)
    
    optimizer = Adam(model.parameters(), lr=LR, weight_decay=1e-5, amsgrad=True)
    loss_fn = nn.CrossEntropyLoss()
    scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-4)
    early_stopping = EarlyStopping(patience=3, min_delta=0.01)

    wandb.init(project=f"{model_name}_batch{BATCH_SIZE}_worker{num_workers}", 
               name=run_name,  # 폴드 번호와 날짜 및 시간 포함
               group=f"{model_name}_kfold",
               config={
                   "fold": fold + 1,
                   "model_name": model_name,
                   "img_size": img_size,
                   "epochs": EPOCHS,
                   "batch_size": BATCH_SIZE,
                   "learning_rate": LR,
                   "optimizer": "Adam",
                   "scheduler": "CosineAnnealingLR",
                   "loss_function": "CrossEntropyLoss",
               })

    # 학습 및 검증 루프
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}: Applying Mixup")
        augmentation_func = mixup_data
            
        # Training
        ret = train_one_epoch(train_loader, model, optimizer, loss_fn, device=device, augmentation_func=augmentation_func)
        ret['epoch'] = epoch

        val_metrics = validate_one_epoch(val_loader, model, loss_fn, device=device)
        val_metrics['epoch'] = epoch

        # 스케줄러 스텝 (CosineAnnealingLR 사용 시)
        scheduler.step()  # 또는 ReduceLROnPlateau를 사용하는 경우: scheduler.step(val_metrics['val_loss'])

        # 로그 출력
        log = f"Fold {fold + 1}, Epoch {epoch+1}/{EPOCHS}\n"
        for k, v in ret.items():
            log += f"Train {k}: {v:.4f}\n"
        for k, v in val_metrics.items():
            log += f"Val {k}: {v:.4f}\n"
        print(log)

        save_model(model, save_path, f'model_name.pkl')
    
        preds_list = []
        model.eval()
        for image, _ in tqdm(tst_loader):
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

        pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
        pred_df['target'] = preds_list

        pred_df.to_csv(f'../output/output/pred_name.csv', index=False)

        wandb.log({
            "train_loss": ret['train_loss'],
            "train_acc": ret['train_acc'],
            "train_f1": ret['train_f1'],
            "valid_loss": val_metrics['val_loss'],
            "valid_acc": val_metrics['val_acc'],
            "valid_f1": val_metrics['val_f1'],
            "epoch": epoch,
            "best_custom_score": best_normal_score
        })
        # Early Stopping 체크
        if early_stopping(val_metrics["val_loss"]):
            # 각 폴드의 최적 예측 결과를 수집
            print(f"Early stopping triggered for fold {fold + 1}")
            break
    
    # 각 폴드의 학습 완료 후 전체 테스트 데이터셋(tst_loader)에서 예측 수행
    fold_predictions = []
    model.eval()
    with torch.no_grad():
        for images, _ in tst_loader:  # targets는 사용하지 않으므로 _로 처리
            images = images.to(device)
            preds = model(images)  # 직접 예측 값 사용
            fold_predictions.extend(preds.argmax(dim=1).cpu().numpy())
    
    # 테스트 데이터에 대한 폴드 예측 결과 저장
    all_predictions.append(fold_predictions)    
    wandb.finish()
    print(f"Fold {fold + 1} completed\n\n")

In [None]:
from collections import Counter

# 최종 예측값을 저장할 리스트 초기화
final_predictions = []

 #각 인덱스별로 최빈값을 찾아서 저장 #하드보팅
for idx in range(len(all_predictions[0])):  # 각 테스트 데이터의 인덱스
    # 해당 인덱스에 대한 각 폴드의 예측값 리스트 생성
    preds_for_idx = [all_predictions[fold][idx] for fold in range(len(all_predictions))]
    
    # 최빈값 계산
    counts = Counter(preds_for_idx)
    most_common = counts.most_common()
    max_count = most_common[0][1]
    
    # 최빈값 중 가장 마지막 값 선택
    candidates = [val for val, count in most_common if count == max_count]
    final_value = candidates[-1]  # 가장 마지막에 등장한 값을 선택
    
    # 최종 예측에 저장
    final_predictions.append(final_value)

# 결과 확인
print(f"Final predictions length: {len(final_predictions)}")  # 3140인지 확인

In [None]:

# 예측 결과를 DataFrame으로 저장
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])  # test 데이터셋의 ID와 target 컬럼 사용
pred_df['target'] = final_predictions  # 예측 값을 'target' 열에 삽입

# sample_submission.csv와 ID 일치 여부 확인 (선택사항)
sample_submission_df = pd.read_csv("../data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all(), "ID가 일치하지 않습니다!"

# CSV로 저장
output_path = "../output/output"
os.makedirs(output_path, exist_ok=True)
pred_df.to_csv(os.path.join(output_path, "pred_result.csv"), index=False)
print("예측 결과가 '../output/output/pred_result.csv'에 저장되었습니다.")