In [None]:
import os
import time
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import StratifiedShuffleSplit
import random

import wandb

import timm
from dotenv import load_dotenv
from datetime import datetime
from zoneinfo import ZoneInfo

In [None]:
# 사용할 수 있는 모델 목록 확인
model_names = timm.list_models()
print(model_names)


1. **Lightweight MobileNets and GhostNets**:
   - `mobilenetv2_035`, `mobilenetv2_050`, `mobilenetv2_075`, `mobilenetv2_100`, `mobilenetv3_small_050`, `mobilenetv3_small_075`, `mobilenetv3_small_100`, `ghostnet_050`, `ghostnet_100`, `ghostnetv2_100`, `ghostnetv2_130`, `ghostnetv2_160`

2. **EfficientNets (Lite models)**:
   - `efficientnet_lite0`, `efficientnet_lite1`, `efficientnet_lite2`, `efficientnet_lite3`, `efficientnet_lite4`

3. **Tiny and Small ViT Models**:
   - `vit_tiny_patch16_224`, `vit_tiny_patch16_384`, `vit_small_patch16_224`, `vit_small_patch16_384`

4. **Small EfficientNets and MobileViTs**:
   - `efficientnet_b0`, `efficientnet_b1`, `efficientnet_b2`, `efficientnet_b3`, `efficientnetv2_rw_s`, `mobilevit_xs`, `mobilevit_s`

5. **CoaT Models (Lite variants)**:
   - `coat_lite_tiny`, `coat_lite_mini`, `coat_lite_small`, `coat_lite_medium`

6. **Cait and ResNet Variants**:
   - `cait_xxs24_224`, `cait_xxs24_384`, `resnet18`, `resnet34`

7. **MixNet and EfficientNets (Base models)**:
   - `mixnet_s`, `mixnet_m`, `mixnet_l`, `efficientnet_b4`, `efficientnet_b5`

8. **DLA Models**:
   - `dla34`, `dla46_c`, `dla46x_c`, `dla60`, `dla60_res2net`, `dla60_res2next`, `dla60x`, `dla60x_c`

9. **RegNet and RexNet Models**:
   - `regnetx_002`, `regnetx_004`, `regnetx_006`, `rexnet_100`, `rexnet_130`, `rexnetr_100`, `rexnetr_130`

10. **ViT Base and Larger EfficientNets**:
    - `vit_base_patch16_224`, `vit_base_patch16_384`, `efficientnet_b6`, `efficientnet_b7`

11. **Larger and Deeper Networks**:
    - `resnet50`, `resnet101`, `resnext50_32x4d`, `resnext101_32x4d`, `densenet121`, `densenet169`

12. **Heavier and State-of-the-Art Models**:
    - `beit_large_patch16_224`, `beit_large_patch16_384`, `beit_large_patch16_512`, `swin_large_patch4_window12_384`, `swinv2_base_window12to24_192to384`


In [None]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [None]:
# 학습에 사용할 장치를 설정합니다.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 데이터 설정
data_path = '../data/'

# 모델 설정
model_name = 'efficientnet_b5'

In [None]:
# 학습 설정
img_size = 256  
LR = 1e-5
EPOCHS = 1
BATCH_SIZE = 4
num_workers = 4  # 일단 0으로 설정하여 멀티프로세싱 비활성화
early_stopping_patience = 5  # Early Stopping 설정
augment_ratio = 1

In [None]:
# # wandb 연동
# load_dotenv()
# api_key = os.getenv('WANDB_API_KEY')

# wandb.login(key=api_key)

# train_time = datetime.fromtimestamp(time.time(), tz=ZoneInfo("Asia/Seoul")).strftime("%Y%m%d-%H%M%S")
# wandb.init(project="document_classification-cv", name=f"one_vs_all-{train_time}")

# print(train_time)

In [None]:
# wandb 연동
load_dotenv()
api_key = os.getenv('WANDB_API_KEY')

wandb.login(key=api_key)

train_time = datetime.fromtimestamp(time.time(), tz=ZoneInfo("Asia/Seoul")).strftime("%Y%m%d-%H%M%S")

# wandb 프로젝트 초기화
# wandb.init(
#     project="document_classification-cv",
#     name=f"one_vs_all-{train_time}"
#     config={
#         "learning_rate": LR,
#         "architecture": model_name,
#         "dataset": "custom-dataset",
#         "epochs": EPOCHS,
#         "batch_size": BATCH_SIZE,
#         "image_size": img_size,
#         "num_workers" : num_workers,
#         'augment_ratio' : augment_ratio
#     }
# )

In [None]:
torch.cuda.is_available()

In [None]:
# 데이터 변환 설정
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 800.0), p=0.75),
        A.GaussianBlur(blur_limit=(1, 7), p=0.5)
    ], p=0.75),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.75),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.25),
    A.CoarseDropout(max_holes=6, max_height=32, max_width=32, p=0.5),
    A.ElasticTransform(alpha=1, sigma=30, alpha_affine=30, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.Rotate(limit=30, p=0.75),
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
    A.MotionBlur(blur_limit=5, p=0.5),
    A.OpticalDistortion(p=0.5),
    A.Transpose(p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [None]:
# Dataset 정의
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img_path = os.path.join(self.path, name)

        try:
            img = np.array(Image.open(img_path))
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            img = np.zeros((img_size, img_size, 3), dtype=np.uint8)  # 빈 이미지로 대체

        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# 데이터 로드
train_df = pd.read_csv(f"{data_path}train.csv")

# 데이터 증강
augmented_data = []
for _, row in tqdm(train_df.iterrows(), total=len(train_df), desc="Augmenting data"):
    img_path = os.path.join(f"{data_path}train/", row['ID'])
    img = np.array(Image.open(img_path))
    
    for _ in range(augment_ratio):
        augmented_img = trn_transform(image=img)['image']
        augmented_data.append((row['ID'], row['target']))

In [None]:
# 증강된 데이터를 원본 데이터와 합침
augmented_df = pd.DataFrame(augmented_data, columns=['ID', 'target'])
full_train_df = pd.concat([train_df, augmented_df], ignore_index=True)

In [None]:
# StratifiedShuffleSplit을 사용하여 데이터 분할
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=SEED)

In [None]:
# 단일 분할 수행
for train_index, val_index in sss.split(full_train_df, full_train_df['target']):
    train_df_split = full_train_df.iloc[train_index]
    val_df = full_train_df.iloc[val_index]

In [None]:
# Dataset 및 DataLoader 생성
train_dataset = ImageDataset(train_df_split, f"{data_path}train/", transform=trn_transform)
val_dataset = ImageDataset(val_df, f"{data_path}train/", transform=tst_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)

# 테스트 데이터셋 생성
tst_dataset = ImageDataset(
    pd.read_csv(f"{data_path}sample_submission.csv"),
    f"{data_path}test/",
    transform=tst_transform
)

tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

# 클래스 비율 확인
print("Train set class distribution:")
print(train_df_split['target'].value_counts(normalize=True))
print("\nValidation set class distribution:")
print(val_df['target'].value_counts(normalize=True))

# 데이터 수 출력
print(f"Original training data count: {len(train_df)}")
print(f"Augmented training data count: {len(train_df_split)}")
print(f"Validation data count: {len(val_df)}")
print(f"Test data count: {len(tst_dataset)}")

In [None]:

# # 시각화 함수
# def visualize_augmented_images(dataset, num_images=5, augmentations_per_image=4, data_gap=1570):
#     fig, axes = plt.subplots(num_images, augmentations_per_image + 1, figsize=(20, num_images * 4))
    
#     for i in range(num_images):
#         # 원본 이미지 인덱스
#         original_img_index = random.randint(0,ori_traindata_num)
        
#         # 원본 이미지 로드
#         original_img_path = os.path.join(dataset.path, dataset.df[original_img_index][0])
#         original_img = np.array(Image.open(original_img_path))
        
#         # 원본 이미지 표시
#         axes[i, 0].imshow(original_img)
#         axes[i, 0].set_title("Original Image")
#         axes[i, 0].axis("off")
        
#         # 증강된 이미지 최대 4개 표시
#         for j in range(1, augmentations_per_image + 1):
#             # 증강된 이미지를 올바르게 가져오기 위해, 증강 비율을 사용하여 인덱스를 계산
#             augmented_idx = original_img_index + data_gap * (j - 1)
#             augmented_img, _ = dataset[augmented_idx]
#             augmented_img = augmented_img.permute(1, 2, 0).numpy()  # CHW -> HWC
            
#             # Normalize 된 이미지를 다시 원래 범위로 변환
#             mean = np.array([0.485, 0.456, 0.406])
#             std = np.array([0.229, 0.224, 0.225])
#             augmented_img = std * augmented_img + mean
#             augmented_img = np.clip(augmented_img, 0, 1)
            
#             axes[i, j].imshow(augmented_img)
#             axes[i, j].set_title(f"Augmented Image {j}")
#             axes[i, j].axis("off")
    
#     plt.tight_layout()
#     plt.show()

# # 트레이닝 데이터 시각화
# visualize_augmented_images(trn_dataset)

In [None]:

# trn_loader = DataLoader(
#     trn_dataset,
#     batch_size=BATCH_SIZE,
#     shuffle=True,
#     num_workers=num_workers,
#     pin_memory=True,
#     drop_last=False
# )

# tst_loader = DataLoader(
#     tst_dataset,
#     batch_size=BATCH_SIZE,
#     shuffle=False,
#     num_workers=num_workers,
#     pin_memory=True
# )


In [None]:

# # 모델을 불러옵니다.
# model = timm.create_model(
#     model_name,
#     pretrained=True,
#     num_classes=17,
#     drop_rate=0.2  # Dropout 설정
# ).to(device)

# # 손실 함수를 정의합니다.
# loss_fn = nn.CrossEntropyLoss()

# # 옵티마이저를 정의합니다.
# optimizer = AdamW(model.parameters(), lr=LR)

# # Learning Rate Scheduler를 정의합니다.
# scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

# # Early Stopping을 위한 변수 초기화
# best_loss = float('inf')
# early_stopping_counter = 0


In [None]:
def train_model_for_class(class_num):
    wandb.init(
        project=f"{train_time}_one_vs_all",
        name=f"{train_time}_class_{class_num}",
        config={
            "learning_rate": LR,
            "architecture": model_name,
            "dataset": "custom-dataset",
            "epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
            "image_size": img_size,
            "num_workers": num_workers,
            'augment_ratio': augment_ratio,
            "class_num": class_num
        }
    )

    model = timm.create_model(model_name, pretrained=True, num_classes=2, drop_rate=0.2).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    best_val_loss = float('inf')
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        # 학습
        model.train()
        train_loss = 0
        train_preds, train_targets = [], []

        for image, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
            image = image.to(device)
            targets = (targets == class_num).long().to(device)

            optimizer.zero_grad()
            preds = model(image)
            loss = loss_fn(preds, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(preds.argmax(dim=1).cpu().numpy())
            train_targets.extend(targets.cpu().numpy())

        train_loss /= len(train_loader)
        train_acc = accuracy_score(train_targets, train_preds)
        train_f1 = f1_score(train_targets, train_preds, average='binary')

        # 검증
        model.eval()
        val_loss = 0
        val_preds, val_targets = [], []

        with torch.no_grad():
            for image, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Validation"):
                image = image.to(device)
                targets = (targets == class_num).long().to(device)

                preds = model(image)
                loss = loss_fn(preds, targets)

                val_loss += loss.item()
                val_preds.extend(preds.argmax(dim=1).cpu().numpy())
                val_targets.extend(targets.cpu().numpy())

        val_loss /= len(val_loader)
        val_acc = accuracy_score(val_targets, val_preds)
        val_f1 = f1_score(val_targets, val_preds, average='binary')

        print(f"Class {class_num} - Epoch {epoch+1}/{EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1: {val_f1:.4f}")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_acc,
            "train_f1_score": train_f1,
            "val_loss": val_loss,
            "val_accuracy": val_acc,
            "val_f1_score": val_f1
        })

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stopping_counter = 0
            torch.save(model.state_dict(), f"{model_name}_class{class_num}_best_model.pth")
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered for class {class_num} after {epoch+1} epochs.")
                break

    wandb.finish()
    return model


In [None]:
# 모든 클래스에 대해 모델 학습
models = []
for class_num in range(17):
    print(f"Training model for class {class_num}")
    model = train_model_for_class(class_num)
    models.append(model)

In [None]:
# 앙상블 예측 함수
def ensemble_predict(models, loader):
    predictions = []
    for image, _ in tqdm(loader):
        image = image.to(device)
        class_predictions = []
        for model in models:
            model.eval()
            with torch.no_grad():
                preds = model(image)
                class_predictions.append(preds[:, 1].cpu().numpy())  # 클래스에 속할 확률
        class_predictions = np.array(class_predictions).T
        predictions.extend(np.argmax(class_predictions, axis=1))
    return predictions

In [None]:
# 테스트 데이터에 대한 앙상블 예측 수행
ensemble_preds = ensemble_predict(models, tst_loader)

In [None]:
# 예측 결과를 데이터프레임으로 저장
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = ensemble_preds


In [None]:
# 제출 형식 파일을 읽어와 ID 열이 일치하는지 확인
sample_submission_df = pd.read_csv(f"{data_path}sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

# 예측 결과를 CSV 파일로 저장
pred_df.to_csv(f"{model_name}_ensemble_pred.csv", index=False)
print(pred_df.head())