<a href="https://colab.research.google.com/github/YeongjaeSSHIN/SW.AI_DL_17/blob/main/DR_OnDAT_swin_2_ipynb_%EC%B5%9C%EC%A2%85.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
'''class0 1805장
class1 370장
class2 999장
class3 193장
class4 295장'''

In [None]:
#!pip install torch==2.0.1 torchvision==0.15.2 timm optuna albumentations

# 1) Colab 런타임을 재시작한 뒤 실행할 셀
!pip install numpy opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations optuna

In [3]:
'''swin transformer optuna'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations optuna

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler
import optuna

# -------------------------------
# 0) Reproducibility: Seed 고정
# -------------------------------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# -------------------------------
# 1) 전처리 함수 정의
# -------------------------------
def preprocess_fundus_image(img_path, output_size=(384,384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h,w = img_bgr.shape[:2]
    if contours and cv2.contourArea(max(contours, key=cv2.contourArea)) >= h*w*0.1:
        cnt = max(contours, key=cv2.contourArea)
        (cx,cy),r = cv2.minEnclosingCircle(cnt)
        cx,cy,r = map(int,(cx,cy,r))
        mask = np.zeros_like(gray); cv2.circle(mask,(cx,cy),r,255,-1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        img_bgr = masked[max(cy-r,0):min(cy+r,h), max(cx-r,0):min(cx+r,w)]
    lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l,a_,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    lab = cv2.merge([l,a_,b])
    img_bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    if output_size:
        img_bgr = cv2.resize(img_bgr, output_size, interpolation=cv2.INTER_AREA)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# -------------------------------
# 2) Dataset 정의
# -------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        if not fname.lower().endswith('.png'):
            fname += '.png'
        img = np.array(preprocess_fundus_image(
            os.path.join(self.img_dir, fname),
            output_size=self.preprocess_size))
        label = int(row['label'])
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# -------------------------------
# 3) 데이터 로드 & Split
# -------------------------------
ROOT = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
df_all = pd.read_csv(os.path.join(ROOT, 'train.csv')).rename(columns={'diagnosis':'label'})
df_all = df_all[df_all['image'].apply(
    lambda f: os.path.isfile(os.path.join(ROOT,'train_images',
        f if f.lower().endswith('.png') else f+'.png'))
)].reset_index(drop=True)

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
tr_idx, hd_idx = next(sss1.split(df_all, df_all['label']))
df_train = df_all.iloc[tr_idx].reset_index(drop=True)
df_hold  = df_all.iloc[hd_idx].reset_index(drop=True)
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# -------------------------------
# 4) Sampler & Loader 함수
# -------------------------------
def make_sampler(df):
    counts = df['label'].value_counts().sort_index().values
    weights = 1.0 / counts
    sample_w = df['label'].apply(lambda x: weights[x]).tolist()
    return WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

def make_loaders(batch_size):
    sampler = make_sampler(df_train)
    train_ds = APTOSDataset(df_train, os.path.join(ROOT,'train_images'))
    val_ds   = APTOSDataset(df_val,   os.path.join(ROOT,'train_images'))
    test_ds  = APTOSDataset(df_test,  os.path.join(ROOT,'train_images'))
    return (
        DataLoader(train_ds, batch_size=batch_size, sampler=sampler,
                   num_workers=4, pin_memory=True),
        DataLoader(val_ds,   batch_size=batch_size, shuffle=False,
                   num_workers=4, pin_memory=True),
        DataLoader(test_ds,  batch_size=batch_size, shuffle=False,
                   num_workers=4, pin_memory=True),
    )

# -------------------------------
# 5) Optuna Objective (빠른 탐색용)
# -------------------------------
EPOCH_TRIAL = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def objective(trial):
    dp_rate    = trial.suggest_float('drop_path_rate', 0.0, 0.3)
    lr         = trial.suggest_float('max_lr', 1e-4, 1e-2, log=True)
    wd         = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32])

    train_loader, valid_loader, _ = make_loaders(batch_size)

    model = timm.create_model(
        'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
        pretrained=True,
        drop_path_rate=dp_rate,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = OneCycleLR(optimizer, max_lr=lr,
                          steps_per_epoch=len(train_loader),
                          epochs=EPOCH_TRIAL, pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    for _ in range(EPOCH_TRIAL):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

    model.eval()
    correct = total = 0
    with torch.no_grad():
        for imgs, labels in valid_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            correct += (pred == labels).sum().item()
            total   += labels.size(0)
    return correct / total

# -------------------------------
# 6) Optuna 최적화 실행 & 결과
# -------------------------------
if __name__ == '__main__':
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.SuccessiveHalvingPruner()
    )
    study.optimize(objective, n_trials=5, show_progress_bar=True)
    print("Best params:", study.best_params)
    print("Best validation accuracy:", study.best_value)

    # -------------------------------
    # 7) 최적 파라미터로 전체 50 epoch 재학습 & 테스트 평가
    # -------------------------------
    best = study.best_params
    train_loader, valid_loader, test_loader = make_loaders(best['batch_size'])

    model = timm.create_model(
        'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
        pretrained=True,
        drop_path_rate=best['drop_path_rate'],
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=best['max_lr'], weight_decay=best['weight_decay'])
    scheduler = OneCycleLR(optimizer, max_lr=best['max_lr'],
                          steps_per_epoch=len(train_loader), epochs=50,
                          pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1, 51):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

    model.eval()
    t_correct = t_total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            t_correct += (pred == labels).sum().item()
            t_total   += labels.size(0)
    test_acc = t_correct / t_total
    print(f"Test Accuracy: {test_acc:.4f}")

[I 2025-06-12 00:44:44,680] A new study created in memory with name: no-name-c7d3b26b-7c2e-4753-8f41-fa259f712192


  0%|          | 0/5 [00:00<?, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

  scaler    = GradScaler()
  with autocast():


[I 2025-06-12 01:00:30,333] Trial 0 finished with value: 0.863013698630137 and parameters: {'drop_path_rate': 0.15299544194888767, 'max_lr': 0.00019221751697071296, 'weight_decay': 0.0001160015683094429, 'batch_size': 32}. Best is trial 0 with value: 0.863013698630137.
[I 2025-06-12 01:11:41,944] Trial 1 finished with value: 0.8794520547945206 and parameters: {'drop_path_rate': 0.07785408634722764, 'max_lr': 0.00021160391896617083, 'weight_decay': 0.009990978364401213, 'batch_size': 16}. Best is trial 1 with value: 0.8794520547945206.
[I 2025-06-12 01:22:29,376] Trial 2 finished with value: 0.8191780821917808 and parameters: {'drop_path_rate': 0.14367705464594271, 'max_lr': 0.00044622715815366773, 'weight_decay': 9.636006850607332e-05, 'batch_size': 16}. Best is trial 1 with value: 0.8794520547945206.
[I 2025-06-12 01:33:04,492] Trial 3 finished with value: 0.821917808219178 and parameters: {'drop_path_rate': 0.2672695550866221, 'max_lr': 0.00027591348381561965, 'weight_decay': 0.00042

  scaler    = GradScaler()
  with autocast():


KeyboardInterrupt: 

In [None]:
'''efficientnet_b4, best parameter'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations optuna

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

import optuna

# -------------------------------
# 0) Reproducibility: Seed 고정
# -------------------------------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# -------------------------------
# 1) 전처리 함수 정의
# -------------------------------
def preprocess_fundus_image(img_path, output_size=(300,300)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"Cannot read image: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h,w = img_bgr.shape[:2]
    # crop circle if large enough
    if contours and cv2.contourArea(max(contours, key=cv2.contourArea)) >= h*w*0.1:
        cnt = max(contours, key=cv2.contourArea)
        (cx,cy), r = cv2.minEnclosingCircle(cnt)
        cx,cy,r = map(int,(cx,cy,r))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx,cy), r, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        img_bgr = masked[max(cy-r,0):min(cy+r,h), max(cx-r,0):min(cx+r,w)]
    # CLAHE in LAB
    lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l,a_,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    lab = cv2.merge([l,a_,b])
    img_bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    # resize
    if output_size:
        img_bgr = cv2.resize(img_bgr, output_size, interpolation=cv2.INTER_AREA)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# -------------------------------
# 2) Dataset 정의
# -------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(300,300)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        if not fname.lower().endswith('.png'):
            fname += '.png'
        img = np.array(preprocess_fundus_image(
            os.path.join(self.img_dir, fname),
            output_size=self.preprocess_size))
        label = int(row['label'])
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# -------------------------------
# 3) 데이터 로드 & Split
# -------------------------------
ROOT = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
df_all = pd.read_csv(os.path.join(ROOT, 'train.csv')).rename(columns={'diagnosis':'label'})
df_all = df_all[df_all['image'].apply(
    lambda f: os.path.isfile(os.path.join(ROOT,'train_images',
        f if f.lower().endswith('.png') else f+'.png'))
)].reset_index(drop=True)

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
tr_idx, hd_idx = next(sss1.split(df_all, df_all['label']))
df_train = df_all.iloc[tr_idx].reset_index(drop=True)
df_hold  = df_all.iloc[hd_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# -------------------------------
# 4) Sampler & Loader 함수
# -------------------------------
def make_sampler(df):
    counts = df['label'].value_counts().sort_index().values
    weights = 1.0 / counts
    sample_w = df['label'].apply(lambda x: weights[x]).tolist()
    return WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

def make_loaders(batch_size):
    sampler    = make_sampler(df_train)
    train_ds   = APTOSDataset(df_train, os.path.join(ROOT,'train_images'))
    val_ds     = APTOSDataset(df_val,   os.path.join(ROOT,'train_images'))
    test_ds    = APTOSDataset(df_test,  os.path.join(ROOT,'train_images'))
    return (
        DataLoader(train_ds, batch_size=batch_size, sampler=sampler,   num_workers=4, pin_memory=True),
        DataLoader(val_ds,   batch_size=batch_size, shuffle=False,     num_workers=4, pin_memory=True),
        DataLoader(test_ds,  batch_size=batch_size, shuffle=False,     num_workers=4, pin_memory=True),
    )

# -------------------------------
# 5) Optuna Objective (빠른 탐색용)
# -------------------------------
EPOCH_TRIAL = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def objective(trial):
    lr         = trial.suggest_float('max_lr',        1e-4, 1e-2, log=True)
    wd         = trial.suggest_float('weight_decay',  1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16,32])

    train_loader, valid_loader, _ = make_loaders(batch_size)

    model = timm.create_model(
        'efficientnet_b3',
        pretrained=True,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = OneCycleLR(optimizer, max_lr=lr,
                          steps_per_epoch=len(train_loader),
                          epochs=EPOCH_TRIAL, pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    # 빠른 학습
    for _ in range(EPOCH_TRIAL):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

    # 검증 정확도
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for imgs, labels in valid_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            correct += (pred==labels).sum().item()
            total   += labels.size(0)
    return correct/total

# -------------------------------
# 6) Optuna 최적화 실행 & 결과
# -------------------------------
if __name__ == '__main__':
    study = optuna.create_study(direction='maximize',
                                pruner=optuna.pruners.SuccessiveHalvingPruner())
    study.optimize(objective, n_trials=5, show_progress_bar=True)
    print("Best params:", study.best_params)
    print("Best validation accuracy:", study.best_value)

    # -------------------------------
    # 7) 최적 파라미터로 전체 50 epoch 재학습 & 테스트 평가
    # -------------------------------
    best = study.best_params
    train_loader, valid_loader, test_loader = make_loaders(best['batch_size'])

    model = timm.create_model(
        'efficientnet_b3',
        pretrained=True,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=best['max_lr'],      weight_decay=best['weight_decay'])
    scheduler = OneCycleLR(optimizer, max_lr=best['max_lr'],
                          steps_per_epoch=len(train_loader), epochs=50,
                          pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1,51):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

'''    # Test 성능
    model.eval()
    t_correct = t_total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            t_correct += (pred==labels).sum().item()
            t_total   += labels.size(0)
    test_acc = t_correct / t_total
    print(f"Test Accuracy: {test_acc:.4f}")'''


In [None]:
'''ConvNeXt-Base pretrained, best parameter'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations optuna

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

import optuna

# -------------------------------
# 0) Reproducibility: Seed 고정
# -------------------------------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# -------------------------------
# 1) 전처리 함수 정의
# -------------------------------
def preprocess_fundus_image(img_path, output_size=(224,224)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"Cannot read image: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h,w = img_bgr.shape[:2]
    if contours and cv2.contourArea(max(contours, key=cv2.contourArea)) >= h*w*0.1:
        cnt = max(contours, key=cv2.contourArea)
        (cx,cy), r = cv2.minEnclosingCircle(cnt)
        cx,cy,r = map(int,(cx,cy,r))
        mask = np.zeros_like(gray)
        cv2.circle(mask,(cx,cy),r,255,-1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        img_bgr = masked[max(cy-r,0):min(cy+r,h), max(cx-r,0):min(cx+r,w)]
    lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l,a_,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    lab = cv2.merge([l,a_,b])
    img_bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    if output_size:
        img_bgr = cv2.resize(img_bgr, output_size, interpolation=cv2.INTER_AREA)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# -------------------------------
# 2) Dataset 정의
# -------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(224,224)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        if not fname.lower().endswith('.png'):
            fname += '.png'
        img = np.array(preprocess_fundus_image(
            os.path.join(self.img_dir, fname),
            output_size=self.preprocess_size))
        label = int(row['label'])
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# -------------------------------
# 3) 데이터 로드 & Split
# -------------------------------
ROOT = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
df_all = pd.read_csv(os.path.join(ROOT, 'train.csv')).rename(columns={'diagnosis':'label'})
df_all = df_all[df_all['image'].apply(
    lambda f: os.path.isfile(os.path.join(ROOT,'train_images',
        f if f.lower().endswith('.png') else f+'.png'))
)].reset_index(drop=True)

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
tr_idx, hd_idx = next(sss1.split(df_all, df_all['label']))
df_train = df_all.iloc[tr_idx].reset_index(drop=True)
df_hold  = df_all.iloc[hd_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# -------------------------------
# 4) Sampler & Loader 함수
# -------------------------------
def make_sampler(df):
    counts   = df['label'].value_counts().sort_index().values
    weights  = 1.0 / counts
    sample_w = df['label'].apply(lambda x: weights[x]).tolist()
    return WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

def make_loaders(batch_size):
    sampler  = make_sampler(df_train)
    tr_ds     = APTOSDataset(df_train, os.path.join(ROOT,'train_images'))
    v_ds      = APTOSDataset(df_val,   os.path.join(ROOT,'train_images'))
    te_ds     = APTOSDataset(df_test,  os.path.join(ROOT,'train_images'))
    return (
        DataLoader(tr_ds, batch_size=batch_size, sampler=sampler, num_workers=4, pin_memory=True),
        DataLoader(v_ds,  batch_size=batch_size, shuffle=False,    num_workers=4, pin_memory=True),
        DataLoader(te_ds, batch_size=batch_size, shuffle=False,    num_workers=4, pin_memory=True),
    )

# -------------------------------
# 5) Optuna Objective (빠른 탐색용)
# -------------------------------
EPOCH_TRIAL = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def objective(trial):
    lr         = trial.suggest_float('max_lr',        1e-4, 1e-2, log=True)
    wd         = trial.suggest_float('weight_decay',  1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16,32])

    train_loader, valid_loader, _ = make_loaders(batch_size)

    model = timm.create_model(
        'convnext_base',
        pretrained=True,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = OneCycleLR(optimizer, max_lr=lr,
                          steps_per_epoch=len(train_loader),
                          epochs=EPOCH_TRIAL, pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    # 빠른 학습 루프
    for _ in range(EPOCH_TRIAL):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

    # 검증 정확도 계산
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for imgs, labels in valid_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            correct += (pred==labels).sum().item()
            total   += labels.size(0)
    return correct/total

# -------------------------------
# 6) Optuna 최적화 실행 & 결과
# -------------------------------
if __name__ == '__main__':
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.SuccessiveHalvingPruner()
    )
    study.optimize(objective, n_trials=5, show_progress_bar=True)
    print("Best params:", study.best_params)
    print("Best validation accuracy:", study.best_value)

    # -------------------------------
    # 7) 최적 파라미터로 전체 50 epoch 재학습 & 테스트 평가
    # -------------------------------
    best = study.best_params
    train_loader, valid_loader, test_loader = make_loaders(best['batch_size'])

    model = timm.create_model(
        'convnext_base',
        pretrained=True,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=best['max_lr'],      weight_decay=best['weight_decay'])
    scheduler = OneCycleLR(optimizer, max_lr=best['max_lr'],
                          steps_per_epoch=len(train_loader), epochs=50,
                          pct_start=0.1, div_factor=25.0)
    scaler    = GradScaler()
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1,51):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                loss = criterion(model(imgs), labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

'''    # Test 성능
    model.eval()
    t_correct = t_total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pred = model(imgs).argmax(1)
            t_correct += (pred==labels).sum().item()
            t_total   += labels.size(0)
    test_acc = t_correct / t_total
    print(f"Test Accuracy: {test_acc:.4f}")'''

In [4]:
'''1. Swin transformer, best parameter
seed 고정 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
optuna best parameter 적용
# 나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
# APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
# OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 필요한 라이브러리 임포트
# -------------------------------
import cv2
import pandas as pd
from PIL import Image

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용, batch_size=16)
# ----------------------------------------
BATCH_SIZE = 16  # Optuna 결과 반영
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅 (Optuna 최고 파라미터 적용)
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    drop_path_rate=0.07785408634722764,  # Optuna 최적값
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(
    model.parameters(),
    lr=0.00021160391896617083,         # Optuna 최적 max_lr
    weight_decay=0.009990978364401213  # Optuna 최적 weight_decay
)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=0.00021160391896617083,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_aug_final.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_aug_final.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 001 — Train Loss: 0.9566, Acc: 0.6011
  Class counts: [554, 583, 616, 584, 581]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 001 — Valid Loss: 0.5716, Acc: 0.7699
  P: 0.6613, R: 0.7129, F1: 0.6525
  CM:
 [[169   9   1   0   0]
 [  2  30   5   0   0]
 [  2   5  52  33   8]
 [  0   0   2  15   2]
 [  0   1   5   9  15]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:19<00:00,  1.09s/it]



Epoch 002 — Train Loss: 0.5430, Acc: 0.7875
  Class counts: [595, 604, 578, 578, 563]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 002 — Valid Loss: 0.8337, Acc: 0.6959
  P: 0.6447, R: 0.6903, F1: 0.5992
  CM:
 [[156  23   0   0   0]
 [  0  37   0   0   0]
 [  1  47  31  12   9]
 [  0   1   2  14   2]
 [  0   6   3   5  16]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 003 — Train Loss: 0.4877, Acc: 0.8074
  Class counts: [582, 577, 562, 612, 585]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 003 — Valid Loss: 0.4814, Acc: 0.8329
  P: 0.7165, R: 0.7792, F1: 0.7286
  CM:
 [[170   9   0   0   0]
 [  0  36   1   0   0]
 [  1   6  67  19   7]
 [  0   0   4  14   1]
 [  0   1   8   4  17]]
>> Best model saved.


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:28<00:00,  1.14s/it]



Epoch 004 — Train Loss: 0.4402, Acc: 0.8328
  Class counts: [568, 554, 607, 588, 601]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 004 — Valid Loss: 0.4281, Acc: 0.8329
  P: 0.7088, R: 0.7398, F1: 0.7154
  CM:
 [[177   2   0   0   0]
 [  3  31   3   0   0]
 [  3   6  66  16   9]
 [  0   0   5  11   3]
 [  0   0   5   6  19]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 005 — Train Loss: 0.3675, Acc: 0.8619
  Class counts: [597, 565, 578, 580, 598]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 005 — Valid Loss: 0.4984, Acc: 0.8301
  P: 0.7015, R: 0.6651, F1: 0.6805
  CM:
 [[174   5   0   0   0]
 [  4  23  10   0   0]
 [  3   2  83   7   5]
 [  0   0   6   7   6]
 [  1   0  11   2  16]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 006 — Train Loss: 0.2979, Acc: 0.8931
  Class counts: [580, 572, 570, 582, 614]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 006 — Valid Loss: 0.4650, Acc: 0.8384
  P: 0.7044, R: 0.7520, F1: 0.7223
  CM:
 [[173   5   1   0   0]
 [  0  31   5   0   1]
 [  1   6  71  13   9]
 [  0   0   4  11   4]
 [  0   1   5   4  20]]
>> Best model saved.


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:27<00:00,  1.13s/it]



Epoch 007 — Train Loss: 0.2656, Acc: 0.9092
  Class counts: [593, 555, 604, 547, 619]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 007 — Valid Loss: 0.5117, Acc: 0.8384
  P: 0.7216, R: 0.6916, F1: 0.7002
  CM:
 [[173   6   0   0   0]
 [  5  29   3   0   0]
 [  2   4  82   9   3]
 [  0   0   9   8   2]
 [  0   0  13   3  14]]


  with autocast():
Train Epoch 8: 100%|██████████| 183/183 [03:27<00:00,  1.13s/it]



Epoch 008 — Train Loss: 0.1968, Acc: 0.9291
  Class counts: [578, 599, 561, 597, 583]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 008 — Valid Loss: 0.5162, Acc: 0.8356
  P: 0.7012, R: 0.7372, F1: 0.7164
  CM:
 [[171   8   0   0   0]
 [  2  29   5   0   1]
 [  0   3  74  11  12]
 [  0   0   7   9   3]
 [  0   0   6   2  22]]


  with autocast():
Train Epoch 9: 100%|██████████| 183/183 [03:29<00:00,  1.14s/it]



Epoch 009 — Train Loss: 0.2062, Acc: 0.9318
  Class counts: [579, 547, 634, 590, 568]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 009 — Valid Loss: 0.7142, Acc: 0.8219
  P: 0.7268, R: 0.6332, F1: 0.6561
  CM:
 [[173   4   2   0   0]
 [  0  14  23   0   0]
 [  0   0  90   8   2]
 [  0   0   9   8   2]
 [  0   0  11   4  15]]


  with autocast():
Train Epoch 10: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 010 — Train Loss: 0.1481, Acc: 0.9534
  Class counts: [604, 529, 614, 601, 570]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 010 — Valid Loss: 0.5716, Acc: 0.8767
  P: 0.7951, R: 0.7578, F1: 0.7724
  CM:
 [[174   5   0   0   0]
 [  3  30   4   0   0]
 [  2   3  88   4   3]
 [  0   0   8  10   1]
 [  0   0   9   3  18]]
>> Best model saved.


  with autocast():
Train Epoch 11: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 011 — Train Loss: 0.1426, Acc: 0.9465
  Class counts: [609, 551, 582, 630, 546]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 011 — Valid Loss: 0.7272, Acc: 0.8219
  P: 0.7210, R: 0.6352, F1: 0.6609
  CM:
 [[177   2   0   0   0]
 [ 10  17   8   0   2]
 [  5   1  84   8   2]
 [  0   0   9   8   2]
 [  1   0  10   5  14]]


  with autocast():
Train Epoch 12: 100%|██████████| 183/183 [03:22<00:00,  1.11s/it]



Epoch 012 — Train Loss: 0.1314, Acc: 0.9520
  Class counts: [564, 604, 600, 579, 571]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 012 — Valid Loss: 0.5719, Acc: 0.8274
  P: 0.7339, R: 0.6988, F1: 0.7142
  CM:
 [[176   3   0   0   0]
 [ 11  21   5   0   0]
 [  7   6  75   7   5]
 [  0   0   8  10   1]
 [  0   1   7   2  20]]


  with autocast():
Train Epoch 13: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 013 — Train Loss: 0.1611, Acc: 0.9387
  Class counts: [566, 581, 604, 586, 581]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 013 — Valid Loss: 0.5344, Acc: 0.8438
  P: 0.7452, R: 0.7119, F1: 0.7243
  CM:
 [[169   9   1   0   0]
 [  4  25   8   0   0]
 [  0   1  88   5   6]
 [  0   0   9  10   0]
 [  0   0  11   3  16]]


  with autocast():
Train Epoch 14: 100%|██████████| 183/183 [03:20<00:00,  1.09s/it]



Epoch 014 — Train Loss: 0.1175, Acc: 0.9599
  Class counts: [588, 580, 572, 590, 588]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 014 — Valid Loss: 0.7278, Acc: 0.8082
  P: 0.7126, R: 0.6627, F1: 0.6641
  CM:
 [[173   5   1   0   0]
 [  2  16  15   0   4]
 [  0   0  76   3  21]
 [  0   0   5   8   6]
 [  0   0   7   1  22]]


  with autocast():
Train Epoch 15: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 015 — Train Loss: 0.1022, Acc: 0.9640
  Class counts: [591, 605, 597, 576, 549]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 015 — Valid Loss: 0.6812, Acc: 0.8356
  P: 0.7028, R: 0.6828, F1: 0.6737
  CM:
 [[173   6   0   0   0]
 [  1  35   1   0   0]
 [  0  11  80   6   3]
 [  0   0  11   7   1]
 [  1   1  13   5  10]]
Early stopping.


Test: 100%|██████████| 23/23 [00:25<00:00,  1.10s/it]


=== Test Set Performance ===
Test Loss : 0.4473
Test Acc  : 0.8849
Precision : 0.7629
Recall    : 0.7656
F1-score  : 0.7641
Confusion Matrix:
[[179   1   0   0   0]
 [  1  30   5   0   1]
 [  0   7  84   6   2]
 [  0   0   5   9   6]
 [  0   0   4   4  21]]





In [None]:
''' StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만
WeightedRandomsampler 추가
Optuna'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations optuna

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler
import optuna

# -------------------------------
# 0) Reproducibility: Seed 고정
# -------------------------------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# -------------------------------
# 1) 전처리 함수 정의
# -------------------------------
def preprocess_fundus_image(img_path, output_size=(384,384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h,w = img_bgr.shape[:2]
    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx,cy),radius = cv2.minEnclosingCircle(cnt)
        cx,cy,radius = map(int,(cx,cy,radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask,(cx,cy),radius,255,-1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1,y1 = max(cx-radius,0), max(cy-radius,0)
        x2,y2 = min(cx+radius,w), min(cy+radius,h)
        cropped = masked[y1:y2,x1:x2]
    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l,a_,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe,a_,b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)
    if output_size:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)
    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# -------------------------------
# 2) Dataset 정의
# -------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15,p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        if not fname.lower().endswith('.png'):
            fname += '.png'
        pil = preprocess_fundus_image(os.path.join(self.img_dir,fname), output_size=self.preprocess_size)
        img = np.array(pil)
        label = int(row['label'])
        if label>=2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# -------------------------------
# 3) 데이터 로드 & Split
# -------------------------------
ROOT = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
df_all = pd.read_csv(os.path.join(ROOT,'train.csv')).rename(columns={'diagnosis':'label'})
df_all = df_all[df_all['image'].apply(
    lambda f: os.path.isfile(os.path.join(ROOT,'train_images', f if f.lower().endswith('.png') else f+'.png'))
)].reset_index(drop=True)

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
tr_idx,hold_idx = next(sss1.split(df_all, df_all['label']))
df_train = df_all.iloc[tr_idx].reset_index(drop=True)
df_hold  = df_all.iloc[hold_idx].reset_index(drop=True)
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# -------------------------------
# 4) Sampler & Loader 함수
# -------------------------------
def make_sampler(df):
    counts = df['label'].value_counts().sort_index().values
    weights = 1.0/counts
    sample_w = df['label'].apply(lambda x:weights[x]).tolist()
    return WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

def make_loaders(bs):
    samp = make_sampler(df_train)
    tr_ds = APTOSDataset(df_train, os.path.join(ROOT,'train_images'))
    v_ds  = APTOSDataset(df_val,   os.path.join(ROOT,'train_images'))
    te_ds = APTOSDataset(df_test,  os.path.join(ROOT,'train_images'))
    return (
      DataLoader(tr_ds, batch_size=bs, sampler=samp, num_workers=4, pin_memory=True),
      DataLoader(v_ds,  batch_size=bs, shuffle=False, num_workers=4, pin_memory=True),
      DataLoader(te_ds, batch_size=bs, shuffle=False, num_workers=4, pin_memory=True)
    )

# -------------------------------
# 5) Optuna Objective
# -------------------------------
EPOCH_TRIAL = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def objective(trial):
    window      = trial.suggest_categorical('window_size',[7,9,11])
    dp_rate     = trial.suggest_float('drop_path_rate',0.0,0.3)
    mlp_r       = trial.suggest_float('mlp_ratio',2.0,4.0)
    lr          = trial.suggest_loguniform('max_lr',1e-4,1e-2)
    wd          = trial.suggest_loguniform('weight_decay',1e-5,1e-2)
    batch_size  = trial.suggest_categorical('batch_size',[16,32])

    train_loader, valid_loader, _ = make_loaders(batch_size)
    model = timm.create_model(
        f'swin_large_patch4_window{window}_384.ms_in22k_ft_in1k',
        pretrained=True,
        drop_path_rate=dp_rate,
        mlp_ratio=mlp_r,
        num_classes=5
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = OneCycleLR(
      optimizer, max_lr=lr,
      steps_per_epoch=len(train_loader),
      epochs=EPOCH_TRIAL, pct_start=0.1, div_factor=25.0
    )
    scaler = GradScaler()
    criterion = nn.CrossEntropyLoss()

    # 짧은 루프
    for epoch in range(EPOCH_TRIAL):
        model.train()
        for imgs,labels in train_loader:
            imgs,labels=imgs.to(device),labels.to(device)
            optimizer.zero_grad()
            with autocast():
                out  = model(imgs)
                loss = criterion(out, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer); scaler.update()
            scheduler.step()

    # Validation 정확도
    model.eval()
    correct=total=0
    with torch.no_grad():
        for imgs,labels in valid_loader:
            imgs,labels=imgs.to(device),labels.to(device)
            pred = model(imgs).argmax(1)
            correct += (pred==labels).sum().item()
            total   += labels.size(0)
    return correct/total

# -------------------------------
# 6) 최적화 실행 & 결과 출력
# -------------------------------
if __name__=='__main__':
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=20)
    print("Best params:", study.best_params)
    print("Best validation accuracy:", study.best_value)

# 7) 최적 파라미터로 전체 50 epoch 재학습 & Test 평가
best = study.best_params
train_loader, valid_loader, test_loader = make_loaders(best['batch_size'])

# 7-1) 모델 초기화 (Optuna로 찾은 params 반영)
model = timm.create_model(
    f"swin_large_patch4_window{best['window_size']}_384.ms_in22k_ft_in1k",
    pretrained=True,
    drop_path_rate=best['drop_path_rate'],
    mlp_ratio=best['mlp_ratio'],
    num_classes=5
).to(device)

optimizer = optim.AdamW(model.parameters(), lr=best['max_lr'], weight_decay=best['weight_decay'])
scheduler = OneCycleLR(optimizer, max_lr=best['max_lr'],
                      steps_per_epoch=len(train_loader), epochs=50, pct_start=0.1, div_factor=25.0)
scaler    = GradScaler()
criterion = nn.CrossEntropyLoss()

# 7-2) 50 에폭 학습 루프 (기존 코드 재사용)
for epoch in range(1, 51):
    model.train()
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            loss = criterion(model(imgs), labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

# 7-3) Test 성능 출력
model.eval()
t_correct = t_total = 0
all_p = all_l = []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        out = model(imgs)
        pred = out.argmax(1)
        t_correct += (pred==labels).sum().item()
        t_total   += labels.size(0)
test_acc = t_correct / t_total
print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
# Colab에서 실행 시, 필요 라이브러리 설치 (Albumentations 예시)
#!pip install albumentations opencv-python

# 만약 torchvision 0.8 이하로 설치되어 있으면, 최신 버전으로 업그레이드하세요.
#!pip install --upgrade torchvision

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
'''1차시, 증강 없음 !!!! APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (전처리만, 증강 없음)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np = np.array(pil_img)
        img_t = self.transform(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 “존재하지 않는 파일” 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})

# 파일 존재 여부 체크 함수
def file_exists(fname):
    if not fname.lower().endswith('.png'):
        fname = f"{fname}.png"
    return os.path.isfile(os.path.join(img_dir, fname))

# 실제 파일이 있는 행만 남깁니다.
full_df = full_df[ full_df['image'].apply(file_exists) ].reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, holdout_idx = next(sss1.split(full_df, full_df['label']))
df_train   = full_df.iloc[train_idx].reset_index(drop=True)
df_holdout = full_df.iloc[holdout_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_holdout, df_holdout['label']))
df_val  = df_holdout.iloc[val_idx].reset_index(drop=True)
df_test = df_holdout.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) Dataset & DataLoader 준비
# ----------------------------------------
train_ds = APTOSDataset(df_train, img_dir)
val_ds   = APTOSDataset(df_val,   img_dir)
test_ds  = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 6) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 7) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    t_loss = t_correct = t_total = 0
    class_counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            class_counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    for cls in range(num_classes):
        print(f"  Class {cls}: {class_counts[cls]} samples")

    # — Validate —
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu())
            all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  Confusion Matrix:")
    print(v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break

# ----------------------------------------
# 8) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu())
        all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [08:00<00:00,  2.63s/it]



Epoch 001 — Train Loss: 0.6145, Acc: 0.7721
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [01:05<00:00,  2.84s/it]



Epoch 001 — Valid Loss: 0.4989, Acc: 0.8164
  P: 0.8164, R: 0.5662, F1: 0.5760
  Confusion Matrix:
[[172   7   0   0   0]
 [  0  21  16   0   0]
 [  0   4  95   0   1]
 [  0   0  17   1   1]
 [  0   2  19   0   9]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:02<00:00,  1.00it/s]



Epoch 002 — Train Loss: 0.5608, Acc: 0.7913
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 002 — Valid Loss: 0.6270, Acc: 0.7671
  P: 0.5390, R: 0.5044, F1: 0.4990
  Confusion Matrix:
[[175   3   1   0   0]
 [  9   9  19   0   0]
 [  6   2  88   4   0]
 [  0   0  11   8   0]
 [  0   1  27   2   0]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:03<00:00,  1.00s/it]



Epoch 003 — Train Loss: 0.6524, Acc: 0.7546
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 003 — Valid Loss: 1.1828, Acc: 0.6877
  P: 0.3311, R: 0.3678, F1: 0.3446
  Confusion Matrix:
[[179   0   0   0   0]
 [ 26   7   4   0   0]
 [ 23  12  65   0   0]
 [  0   1  18   0   0]
 [  5   5  20   0   0]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:02<00:00,  1.00it/s]



Epoch 004 — Train Loss: 1.1130, Acc: 0.5822
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 004 — Valid Loss: 1.3414, Acc: 0.2740
  P: 0.0548, R: 0.2000, F1: 0.0860
  Confusion Matrix:
[[  0   0 179   0   0]
 [  0   0  37   0   0]
 [  0   0 100   0   0]
 [  0   0  19   0   0]
 [  0   0  30   0   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [02:55<00:00,  1.04it/s]



Epoch 005 — Train Loss: 1.3139, Acc: 0.4856
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 005 — Valid Loss: 1.3110, Acc: 0.4904
  P: 0.0981, R: 0.2000, F1: 0.1316
  Confusion Matrix:
[[179   0   0   0   0]
 [ 37   0   0   0   0]
 [100   0   0   0   0]
 [ 19   0   0   0   0]
 [ 30   0   0   0   0]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:01<00:00,  1.01it/s]



Epoch 006 — Train Loss: 1.3075, Acc: 0.4880
  Class 0: 1437 samples
  Class 1: 295 samples
  Class 2: 796 samples
  Class 3: 154 samples
  Class 4: 236 samples


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 006 — Valid Loss: 1.3104, Acc: 0.4904
  P: 0.0981, R: 0.2000, F1: 0.1316
  Confusion Matrix:
[[179   0   0   0   0]
 [ 37   0   0   0   0]
 [100   0   0   0   0]
 [ 19   0   0   0   0]
 [ 30   0   0   0   0]]
Early stopping.


Test: 100%|██████████| 23/23 [01:17<00:00,  3.37s/it]


=== Test Set Performance ===
Test Loss : 0.4578
Test Acc  : 0.8466
Precision : 0.7875
Recall    : 0.6089
F1-score  : 0.6396
Confusion Matrix:
[[180   0   0   0   0]
 [  1  18  17   0   1]
 [  0   4  95   0   0]
 [  0   2  13   3   2]
 [  0   2  13   1  13]]





In [None]:
'''(추가 !!!!!!) 1차시, 증강 없음, APTOS 데이터만 사용
train_loader에 WeightedRandomSampler를 적용해서 학습 배치마다 클래스 비율을 균등하게 뽑음
손실 함수는 nn.CrossEntropyLoss(weight=weights_tensor)를 사용해 클래스별 가중치를 반영한 Weighted CrossEntropyLoss가 적용
StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR (pct_start=0.3, max_lr=5e-4) + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (전처리만, 증강 없음)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np = np.array(pil_img)
        img_t = self.transform(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[ full_df['image'].apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png'))) ]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, holdout_idx = next(sss1.split(full_df, full_df['label']))
df_train   = full_df.iloc[train_idx].reset_index(drop=True)
df_holdout = full_df.iloc[holdout_idx].reset_index(drop=True)
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_holdout, df_holdout['label']))
df_val  = df_holdout.iloc[val_idx].reset_index(drop=True)
df_test = df_holdout.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = [1.0 / c for c in class_counts]
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) DataLoader 준비
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)
train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler, num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 손실함수 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True, num_classes=5
).to(device)

# 1) Weighted CrossEntropyLoss
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
NUM_EPOCHS = 50
scaler     = GradScaler()
# Scheduler 재조정: pct_start=0.3, max_lr=5e-4
scheduler  = OneCycleLR(
    optimizer,
    max_lr=5e-4,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.3,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    t_loss = t_correct = t_total = 0
    epoch_class_counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            epoch_class_counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Epoch Sampling Counts by Class:", epoch_class_counts)

    # — Validate —
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu())
            all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  Confusion Matrix:")
    print(v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k', pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu())
        all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)


총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:45<00:00,  1.23s/it]



Epoch 001 — Train Loss: 0.8389, Acc: 0.5569
  Epoch Sampling Counts by Class: [590, 572, 554, 622, 580]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 001 — Valid Loss: 0.8920, Acc: 0.6658
  P: 0.5937, R: 0.6642, F1: 0.5331
  Confusion Matrix:
[[163  15   0   0   1]
 [  1  28   2   1   5]
 [  1  13  15  21  50]
 [  0   0   0  14   5]
 [  0   3   0   4  23]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:31<00:00,  1.15s/it]



Epoch 002 — Train Loss: 0.4090, Acc: 0.7899
  Epoch Sampling Counts by Class: [616, 543, 619, 558, 582]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 002 — Valid Loss: 0.8794, Acc: 0.7890
  P: 0.6573, R: 0.7141, F1: 0.6719
  Confusion Matrix:
[[166  13   0   0   0]
 [  0  30   7   0   0]
 [  1  14  62  15   8]
 [  0   0   4  11   4]
 [  0   4   5   2  19]]
>> Best model saved.


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:35<00:00,  1.18s/it]



Epoch 003 — Train Loss: 0.3047, Acc: 0.8492
  Epoch Sampling Counts by Class: [561, 586, 569, 589, 613]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 003 — Valid Loss: 1.0611, Acc: 0.7644
  P: 0.6809, R: 0.6806, F1: 0.6426
  Confusion Matrix:
[[172   7   0   0   0]
 [  1  35   1   0   0]
 [  1  42  47   9   1]
 [  0   0   6  10   3]
 [  0   6   6   3  15]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 004 — Train Loss: 0.2990, Acc: 0.8413
  Epoch Sampling Counts by Class: [562, 609, 604, 580, 563]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 004 — Valid Loss: 0.8251, Acc: 0.7397
  P: 0.6065, R: 0.6706, F1: 0.5974
  Confusion Matrix:
[[172   7   0   0   0]
 [  1  33   2   0   1]
 [  2  27  36  14  21]
 [  0   0   3   9   7]
 [  0   5   2   3  20]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:27<00:00,  1.13s/it]



Epoch 005 — Train Loss: 0.3089, Acc: 0.8516
  Epoch Sampling Counts by Class: [602, 544, 593, 550, 629]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 005 — Valid Loss: 1.2213, Acc: 0.7534
  P: 0.6865, R: 0.6881, F1: 0.6204
  Confusion Matrix:
[[172   7   0   0   0]
 [  2  35   0   0   0]
 [  2  37  43  17   1]
 [  0   0   4  14   1]
 [  0   7   4   8  11]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 006 — Train Loss: 0.2344, Acc: 0.8831
  Epoch Sampling Counts by Class: [593, 566, 566, 591, 602]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 006 — Valid Loss: 1.2340, Acc: 0.7781
  P: 0.6021, R: 0.6355, F1: 0.6138
  Confusion Matrix:
[[175   3   1   0   0]
 [  1  28   7   0   1]
 [  2  14  58  11  15]
 [  0   0  10   5   4]
 [  0   3   6   3  18]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:22<00:00,  1.10s/it]



Epoch 007 — Train Loss: 0.2112, Acc: 0.8982
  Epoch Sampling Counts by Class: [614, 568, 566, 581, 589]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 007 — Valid Loss: 1.0701, Acc: 0.7699
  P: 0.6449, R: 0.7203, F1: 0.6483
  Confusion Matrix:
[[173   3   1   0   2]
 [  2  22   7   0   6]
 [  0   9  47  20  24]
 [  0   0   3  14   2]
 [  0   0   1   4  25]]
Early stopping.


Test: 100%|██████████| 23/23 [01:25<00:00,  3.74s/it]


=== Test Set Performance ===
Test Loss : 0.9141
Test Acc  : 0.8219
Precision : 0.6685
Recall    : 0.7132
F1-score  : 0.6838
Confusion Matrix:
[[178   2   0   0   0]
 [  0  28   8   0   1]
 [  0  15  64  11   9]
 [  0   0   3   9   8]
 [  0   1   3   4  21]]





In [None]:
'''(추가!!!!!!!) 1차시, 증강 없음 !!!! APTOS 데이터만 사용
Focal loss 적용 - 파라미터 좀 바꿈
max_lr=5e-4
pct_start=0.2 로 전체 스케줄의 20% 구간(첫 10 epochs 중 약 2 epoch)을 워밍업으로 사용
StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler


# ----------------------------------------
# 0) Focal Loss 구현 (gamma, alpha 지원)
# ----------------------------------------
class FocalLoss(nn.Module):
    def __init__(self, gamma=1.0, alpha=None, reduction='mean'):
        super().__init__()
        self.gamma = gamma
        self.alpha = alpha  # tensor of shape [C] or None
        self.reduction = reduction

    def forward(self, input, target):
        log_p = nn.functional.log_softmax(input, dim=1)  # [B, C]
        p = torch.exp(log_p)                             # [B, C]
        target_one_hot = nn.functional.one_hot(target, num_classes=input.size(1)).float().to(input.device)  # [B, C]

        # focal term
        focal_term = (1 - p) ** self.gamma
        loss = -target_one_hot * focal_term * log_p      # [B, C]

        if self.alpha is not None:
            # α 조정: 각 클래스별 가중치 곱하기
            loss = loss * self.alpha.unsqueeze(0)

        loss = loss.sum(dim=1)  # [B]
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss


# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h * w * 0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (전처리만, 증강 없음)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np = np.array(pil_img)
        img_t = self.transform(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image'].apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")


# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)


# ----------------------------------------
# 5) DataLoader 준비
# ----------------------------------------
train_ds = APTOSDataset(df_train, img_dir)
val_ds   = APTOSDataset(df_val,   img_dir)
test_ds  = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,  num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False, num_workers=4, pin_memory=True)


# ----------------------------------------
# 6) 모델 · 손실함수 · 옵티마이저 · 스케줄러 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True, num_classes=5
).to(device)

# class frequency 기반 alpha 계산 (역수 비율)
class_counts = df_train['label'].value_counts().sort_index().values
inv_freq = 1.0 / class_counts
alpha = torch.tensor(inv_freq / inv_freq.sum(), device=device)  # 합 = 1

criterion = FocalLoss(gamma=1.0, alpha=alpha, reduction='mean')

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=5e-4,           # max_lr 낮춤
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.2,         # 워밍업 구간 늘림
    div_factor=25.0
)


# ----------------------------------------
# 7) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    class_counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f"Train {epoch}"):
        for l in labels.cpu().tolist():
            class_counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:02d} Train Loss:{train_loss:.4f} Acc:{train_acc:.4f}")
    print("  Class counts:", class_counts)

    # Validation
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []
    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc="Valid"):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu())
            all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:02d} Valid Loss:{valid_loss:.4f} Acc:{valid_acc:.4f}")
    print(f"  P:{v_prec:.4f} R:{v_rec:.4f} F1:{v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_focal_alpha.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break


# ----------------------------------------
# 8) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_focal_alpha.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []
with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Test"):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu())
        all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train 1: 100%|██████████| 183/183 [02:48<00:00,  1.08it/s]



Epoch 01 Train Loss:0.0794 Acc:0.6611
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 01 Valid Loss:0.0577 Acc:0.7370
  P:0.7026 R:0.6776 F1:0.6066
  CM:
 [[167  12   0   0   0]
 [  0  28   9   0   0]
 [  1  12  47  40   0]
 [  0   0   2  17   0]
 [  0   3   3  14  10]]
>> Best model saved.


  with autocast():
Train 2: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 02 Train Loss:0.0563 Acc:0.7738
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 02 Valid Loss:0.0570 Acc:0.8110
  P:0.7070 R:0.6890 F1:0.6650
  CM:
 [[178   1   0   0   0]
 [  4  22  11   0   0]
 [  1   8  70  19   2]
 [  0   0   3  15   1]
 [  0   2   8   9  11]]
>> Best model saved.


  with autocast():
Train 3: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 03 Train Loss:0.0502 Acc:0.7937
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 03 Valid Loss:0.0692 Acc:0.7973
  P:0.6975 R:0.6622 F1:0.6191
  CM:
 [[175   3   1   0   0]
 [  4  27   6   0   0]
 [  1  14  70  14   1]
 [  0   0   5  14   0]
 [  0   4   9  12   5]]


  with autocast():
Train 4: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 04 Train Loss:0.0463 Acc:0.8050
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 04 Valid Loss:0.0651 Acc:0.6767
  P:0.6090 R:0.6525 F1:0.5968
  CM:
 [[129  44   6   0   0]
 [  0  28   9   0   0]
 [  0  15  62  18   5]
 [  0   0   4  12   3]
 [  0   5   5   4  16]]


  with autocast():
Train 5: 100%|██████████| 183/183 [02:51<00:00,  1.07it/s]



Epoch 05 Train Loss:0.0506 Acc:0.7995
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 05 Valid Loss:0.0591 Acc:0.7178
  P:0.6087 R:0.6687 F1:0.5782
  CM:
 [[173   6   0   0   0]
 [  6  31   0   0   0]
 [  5  29  26  20  20]
 [  0   0   2  11   6]
 [  0   5   1   3  21]]


  with autocast():
Train 6: 100%|██████████| 183/183 [02:48<00:00,  1.09it/s]



Epoch 06 Train Loss:0.0525 Acc:0.7875
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 06 Valid Loss:0.0769 Acc:0.7425
  P:0.5989 R:0.6166 F1:0.5709
  CM:
 [[176   3   0   0   0]
 [  4  26   7   0   0]
 [  5  21  42   5  27]
 [  0   0   3   4  12]
 [  0   6   0   1  23]]


  with autocast():
Train 7: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 07 Train Loss:0.0556 Acc:0.7735
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 07 Valid Loss:0.0705 Acc:0.6000
  P:0.5917 R:0.6150 F1:0.5427
  CM:
 [[118  60   0   0   1]
 [  0  34   3   0   0]
 [  0  38  39  15   8]
 [  0   1   3   9   6]
 [  0   6   5   0  19]]
Early stopping.


Test: 100%|██████████| 23/23 [00:26<00:00,  1.13s/it]


=== Test Set Performance ===
Test Loss : 0.0561
Test Acc  : 0.8192
Precision : 0.6984
Recall    : 0.6878
F1-score  : 0.6770
Confusion Matrix:
[[179   1   0   0   0]
 [  2  22  12   0   1]
 [  1   8  71  19   0]
 [  0   1   2  13   4]
 [  0   1   7   7  14]]





In [None]:
'''**** (추가 !!!!!!!) APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")


# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)


# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)


# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)


# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)


# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_aug.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break


# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_aug.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:22<00:00,  1.11s/it]



Epoch 001 — Train Loss: 0.8893, Acc: 0.6158
  Class counts: [606, 571, 567, 603, 571]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 001 — Valid Loss: 0.4689, Acc: 0.8384
  P: 0.7465, R: 0.7540, F1: 0.7208
  CM:
 [[169   9   1   0   0]
 [  1  34   2   0   0]
 [  0   9  77  13   1]
 [  0   0   4  14   1]
 [  0   1  12   5  12]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 002 — Train Loss: 0.7185, Acc: 0.7193
  Class counts: [567, 599, 594, 569, 589]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 002 — Valid Loss: 0.6557, Acc: 0.7315
  P: 0.6569, R: 0.6560, F1: 0.5900
  CM:
 [[172   6   1   0   0]
 [  1  28   8   0   0]
 [  1  11  42  44   2]
 [  0   0   2  16   1]
 [  0   1   5  15   9]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 003 — Train Loss: 0.8859, Acc: 0.6419
  Class counts: [610, 604, 517, 597, 590]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 003 — Valid Loss: 0.6796, Acc: 0.6986
  P: 0.5729, R: 0.6041, F1: 0.5558
  CM:
 [[169   6   4   0   0]
 [  2  19  15   1   0]
 [  0   2  37  20  41]
 [  0   0   0  10   9]
 [  0   0   4   6  20]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 004 — Train Loss: 0.8073, Acc: 0.6659
  Class counts: [606, 587, 576, 583, 566]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 004 — Valid Loss: 0.8012, Acc: 0.7123
  P: 0.6030, R: 0.6373, F1: 0.5780
  CM:
 [[143  36   0   0   0]
 [  1  33   3   0   0]
 [  0  19  65  14   2]
 [  0   0   3  11   5]
 [  0   3  10   9   8]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:26<00:00,  1.13s/it]



Epoch 005 — Train Loss: 1.5387, Acc: 0.2728
  Class counts: [588, 585, 614, 572, 559]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 005 — Valid Loss: 1.5268, Acc: 0.4904
  P: 0.0981, R: 0.2000, F1: 0.1316
  CM:
 [[179   0   0   0   0]
 [ 37   0   0   0   0]
 [100   0   0   0   0]
 [ 19   0   0   0   0]
 [ 30   0   0   0   0]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 006 — Train Loss: 1.6211, Acc: 0.2005
  Class counts: [591, 563, 561, 608, 595]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 006 — Valid Loss: 1.6237, Acc: 0.0521
  P: 0.0104, R: 0.2000, F1: 0.0198
  CM:
 [[  0   0   0 179   0]
 [  0   0   0  37   0]
 [  0   0   0 100   0]
 [  0   0   0  19   0]
 [  0   0   0  30   0]]
Early stopping.


Test: 100%|██████████| 23/23 [00:26<00:00,  1.14s/it]


=== Test Set Performance ===
Test Loss : 0.4118
Test Acc  : 0.8767
Precision : 0.7439
Recall    : 0.7571
F1-score  : 0.7466
Confusion Matrix:
[[179   1   0   0   0]
 [  0  34   3   0   0]
 [  0  12  79   6   2]
 [  0   0   7   7   6]
 [  0   1   4   3  21]]





In [None]:
'''**** 재탕 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")


# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)


# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)


# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)


# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)


# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_aug.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break


# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_aug.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:25<00:00,  1.13s/it]



Epoch 001 — Train Loss: 0.8335, Acc: 0.6559
  Class counts: [555, 566, 568, 612, 617]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 001 — Valid Loss: 0.4387, Acc: 0.8548
  P: 0.7420, R: 0.7542, F1: 0.7404
  CM:
 [[175   4   0   0   0]
 [  2  33   2   0   0]
 [  1   8  77   9   5]
 [  0   1   5  12   1]
 [  0   1  11   3  15]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 002 — Train Loss: 0.6713, Acc: 0.7395
  Class counts: [570, 578, 573, 555, 642]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 002 — Valid Loss: 0.6077, Acc: 0.7288
  P: 0.7109, R: 0.6383, F1: 0.5679
  CM:
 [[172   7   0   0   0]
 [  4  20  11   2   0]
 [  2   4  49  45   0]
 [  0   0   0  19   0]
 [  0   1   7  16   6]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 003 — Train Loss: 0.8535, Acc: 0.6343
  Class counts: [560, 546, 609, 607, 596]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.05s/it]



Epoch 003 — Valid Loss: 0.7694, Acc: 0.7808
  P: 0.6467, R: 0.6017, F1: 0.6159
  CM:
 [[178   1   0   0   0]
 [ 18  16   3   0   0]
 [  8   5  68   9  10]
 [  0   1   5   7   6]
 [  3   0   8   3  16]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:27<00:00,  1.13s/it]



Epoch 004 — Train Loss: 1.1510, Acc: 0.4983
  Class counts: [544, 584, 639, 589, 562]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 004 — Valid Loss: 1.5804, Acc: 0.2411
  P: 0.1667, R: 0.2212, F1: 0.1510
  CM:
 [[ 53  15 111   0   0]
 [  5  27   5   0   0]
 [ 23  69   8   0   0]
 [  7  12   0   0   0]
 [  2  26   2   0   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 005 — Train Loss: 1.5989, Acc: 0.2574
  Class counts: [550, 598, 611, 593, 566]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 005 — Valid Loss: 1.4606, Acc: 0.5479
  P: 0.1842, R: 0.3307, F1: 0.2293
  CM:
 [[175   4   0   0   0]
 [ 12  25   0   0   0]
 [ 43  56   0   0   1]
 [ 10   9   0   0   0]
 [  8  22   0   0   0]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:20<00:00,  1.09s/it]



Epoch 006 — Train Loss: 1.5574, Acc: 0.3283
  Class counts: [622, 615, 558, 565, 558]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.04s/it]



Epoch 006 — Valid Loss: 1.3979, Acc: 0.5151
  P: 0.2779, R: 0.3302, F1: 0.2383
  CM:
 [[156  19   4   0   0]
 [ 10  27   0   0   0]
 [ 32  63   5   0   0]
 [  5  12   2   0   0]
 [  5  25   0   0   0]]
Early stopping.


Test: 100%|██████████| 23/23 [00:26<00:00,  1.13s/it]


=== Test Set Performance ===
Test Loss : 0.3698
Test Acc  : 0.8575
Precision : 0.7097
Recall    : 0.7378
F1-score  : 0.7201
Confusion Matrix:
[[179   1   0   0   0]
 [  1  33   2   0   1]
 [  1  10  74   9   5]
 [  0   1   5   8   6]
 [  0   2   3   5  19]]





In [None]:
'''**** seed 고정 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드
# -------------------------------

import cv2
import pandas as pd
from PIL import Image

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_aug_2.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_aug_2.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [06:03<00:00,  1.98s/it]



Epoch 001 — Train Loss: 0.8756, Acc: 0.6364
  Class counts: [554, 583, 616, 584, 581]


Valid: 100%|██████████| 23/23 [01:04<00:00,  2.82s/it]



Epoch 001 — Valid Loss: 0.5249, Acc: 0.7863
  P: 0.6954, R: 0.7232, F1: 0.6578
  CM:
 [[171   8   0   0   0]
 [  0  33   4   0   0]
 [  1   8  56  32   3]
 [  0   0   2  16   1]
 [  0   2   5  12  11]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [04:25<00:00,  1.45s/it]



Epoch 002 — Train Loss: 0.6594, Acc: 0.7341
  Class counts: [595, 604, 578, 578, 563]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 002 — Valid Loss: 0.6391, Acc: 0.7260
  P: 0.6566, R: 0.6974, F1: 0.6138
  CM:
 [[159  20   0   0   0]
 [  0  35   2   0   0]
 [  1   9  43  43   4]
 [  0   0   3  15   1]
 [  0   1   5  11  13]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:58<00:00,  1.31s/it]



Epoch 003 — Train Loss: 0.8150, Acc: 0.6782
  Class counts: [582, 577, 562, 612, 585]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 003 — Valid Loss: 0.7267, Acc: 0.6630
  P: 0.5850, R: 0.5979, F1: 0.5202
  CM:
 [[162  15   0   0   2]
 [  3  28   0   3   3]
 [  1   4  24  21  50]
 [  0   0   1   8  10]
 [  0   0   1   9  20]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:46<00:00,  1.24s/it]



Epoch 004 — Train Loss: 1.3832, Acc: 0.3537
  Class counts: [568, 554, 607, 588, 601]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 004 — Valid Loss: 1.4001, Acc: 0.4904
  P: 0.0984, R: 0.2000, F1: 0.1319
  CM:
 [[179   0   0   0   0]
 [ 37   0   0   0   0]
 [ 99   0   0   0   1]
 [ 19   0   0   0   0]
 [ 30   0   0   0   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:45<00:00,  1.23s/it]



Epoch 005 — Train Loss: 1.6484, Acc: 0.1988
  Class counts: [597, 565, 578, 580, 598]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 005 — Valid Loss: 1.6210, Acc: 0.0822
  P: 0.0164, R: 0.2000, F1: 0.0304
  CM:
 [[  0   0   0   0 179]
 [  0   0   0   0  37]
 [  0   0   0   0 100]
 [  0   0   0   0  19]
 [  0   0   0   0  30]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:36<00:00,  1.18s/it]



Epoch 006 — Train Loss: 1.6188, Acc: 0.2019
  Class counts: [580, 572, 570, 582, 614]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 006 — Valid Loss: 1.6634, Acc: 0.0822
  P: 0.0164, R: 0.2000, F1: 0.0304
  CM:
 [[  0   0   0   0 179]
 [  0   0   0   0  37]
 [  0   0   0   0 100]
 [  0   0   0   0  19]
 [  0   0   0   0  30]]
Early stopping.


Test: 100%|██████████| 23/23 [01:19<00:00,  3.44s/it]


=== Test Set Performance ===
Test Loss : 0.4875
Test Acc  : 0.8247
Precision : 0.7153
Recall    : 0.7519
F1-score  : 0.7097
Confusion Matrix:
[[175   5   0   0   0]
 [  0  32   5   0   0]
 [  0  14  63  20   2]
 [  0   1   2  14   3]
 [  0   0   3   9  17]]





In [None]:
'''**** (재탕 !!!!) seed 고정 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드
# -------------------------------

import cv2
import pandas as pd
from PIL import Image

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_aug_3.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_aug_3.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 001 — Train Loss: 0.8716, Acc: 0.6395
  Class counts: [554, 583, 616, 584, 581]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 001 — Valid Loss: 0.7366, Acc: 0.7014
  P: 0.6534, R: 0.6350, F1: 0.5629
  CM:
 [[178   1   0   0   0]
 [ 12  23   1   1   0]
 [  6  17  25  49   3]
 [  0   1   1  16   1]
 [  0   3   2  11  14]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:21<00:00,  1.10s/it]



Epoch 002 — Train Loss: 0.6572, Acc: 0.7474
  Class counts: [595, 604, 578, 578, 563]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 002 — Valid Loss: 0.4716, Acc: 0.8247
  P: 0.6998, R: 0.7487, F1: 0.7172
  CM:
 [[164  15   0   0   0]
 [  0  32   5   0   0]
 [  0   7  75   6  12]
 [  0   0   4  11   4]
 [  0   1   7   3  19]]
>> Best model saved.


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 003 — Train Loss: 0.7734, Acc: 0.6981
  Class counts: [582, 577, 562, 612, 585]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 003 — Valid Loss: 0.7525, Acc: 0.6904
  P: 0.5939, R: 0.6685, F1: 0.5659
  CM:
 [[165  13   0   0   1]
 [  5  27   4   0   1]
 [  5   7  24  44  20]
 [  0   0   0  13   6]
 [  0   1   2   4  23]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:26<00:00,  1.13s/it]



Epoch 004 — Train Loss: 0.8501, Acc: 0.6672
  Class counts: [568, 554, 607, 588, 601]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 004 — Valid Loss: 2.4428, Acc: 0.1945
  P: 0.3147, R: 0.2741, F1: 0.1474
  CM:
 [[ 1 41 60 77  0]
 [ 0  9 17 11  0]
 [ 0  6 49 45  0]
 [ 0  0  7 12  0]
 [ 0  1 10 19  0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:26<00:00,  1.13s/it]



Epoch 005 — Train Loss: 0.8913, Acc: 0.6374
  Class counts: [597, 565, 578, 580, 598]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 005 — Valid Loss: 0.5969, Acc: 0.7726
  P: 0.6866, R: 0.6459, F1: 0.6133
  CM:
 [[172   7   0   0   0]
 [  6  29   2   0   0]
 [  6  11  62  21   0]
 [  0   1   5  12   1]
 [  0   3  14   6   7]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 006 — Train Loss: 0.7511, Acc: 0.6947
  Class counts: [580, 572, 570, 582, 614]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 006 — Valid Loss: 1.7454, Acc: 0.4192
  P: 0.5631, R: 0.3875, F1: 0.3178
  CM:
 [[102   0   2  15  60]
 [  2   7   3   5  20]
 [  0   0  14  14  72]
 [  0   0   1   2  16]
 [  0   0   2   0  28]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:32<00:00,  1.16s/it]



Epoch 007 — Train Loss: 0.8140, Acc: 0.6614
  Class counts: [593, 555, 604, 547, 619]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 007 — Valid Loss: 0.8635, Acc: 0.7288
  P: 0.6266, R: 0.5730, F1: 0.5855
  CM:
 [[173   5   1   0   0]
 [ 12  22   3   0   0]
 [ 25   7  53  11   4]
 [  2   1   5   9   2]
 [  5   3  13   0   9]]
Early stopping.


Test: 100%|██████████| 23/23 [00:26<00:00,  1.16s/it]


=== Test Set Performance ===
Test Loss : 0.3906
Test Acc  : 0.8603
Precision : 0.7506
Recall    : 0.7507
F1-score  : 0.7283
Confusion Matrix:
[[173   7   0   0   0]
 [  0  35   1   0   1]
 [  0   6  78   2  13]
 [  0   0   6   6   8]
 [  0   0   6   1  22]]





In [None]:
'''**** (재탕 !!!!) seed 고정 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 추가
손실 함수를 nn.CrossEntropyLoss() 대신 Focal Loss로 교체
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드 + Focal Loss 정의
# -------------------------------

import cv2
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# Focal Loss 구현
# ----------------------------------------
class FocalLoss(nn.Module):
    def __init__(self, gamma: float = 2.0, alpha: float = 0.25, reduction: str = "mean"):
        super().__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction
        self.ce = nn.CrossEntropyLoss(reduction="none")

    def forward(self, inputs, targets):
        logpt = -self.ce(inputs, targets)
        pt = torch.exp(logpt)
        focal_term = (1 - pt) ** self.gamma
        loss = -self.alpha * focal_term * logpt
        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = FocalLoss(gamma=2.0, alpha=0.25, reduction="mean")
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_focal.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_focal.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [05:53<00:00,  1.93s/it]



Epoch 001 — Train Loss: 0.1071, Acc: 0.6501
  Class counts: [554, 583, 616, 584, 581]


Valid: 100%|██████████| 23/23 [01:06<00:00,  2.88s/it]



Epoch 001 — Valid Loss: 0.0742, Acc: 0.7315
  P: 0.6865, R: 0.6140, F1: 0.5687
  CM:
 [[179   0   0   0   0]
 [ 13  15   9   0   0]
 [  2   9  47  41   1]
 [  0   0   2  17   0]
 [  0   0   4  17   9]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [04:40<00:00,  1.53s/it]



Epoch 002 — Train Loss: 0.0786, Acc: 0.7330
  Class counts: [595, 604, 578, 578, 563]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 002 — Valid Loss: 0.0775, Acc: 0.7014
  P: 0.6681, R: 0.6169, F1: 0.5147
  CM:
 [[175   4   0   0   0]
 [  4  32   0   1   0]
 [  2  19  30  48   1]
 [  0   1   2  16   0]
 [  0   1   2  24   3]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:53<00:00,  1.27s/it]



Epoch 003 — Train Loss: 0.0867, Acc: 0.7077
  Class counts: [582, 577, 562, 612, 585]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]



Epoch 003 — Valid Loss: 0.1536, Acc: 0.5479
  P: 0.3734, R: 0.5297, F1: 0.3935
  CM:
 [[153  23   0   1   2]
 [  3  16   0  11   7]
 [  3   4   0  83  10]
 [  0   0   0  17   2]
 [  0   0   0  16  14]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:51<00:00,  1.26s/it]



Epoch 004 — Train Loss: 0.1226, Acc: 0.6080
  Class counts: [568, 554, 607, 588, 601]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 004 — Valid Loss: 0.1256, Acc: 0.5562
  P: 0.6416, R: 0.4843, F1: 0.3969
  CM:
 [[130  48   0   0   1]
 [  2  35   0   0   0]
 [  2  35  23   0  40]
 [  0   4   1   1  13]
 [  0  10   6   0  14]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:45<00:00,  1.23s/it]



Epoch 005 — Train Loss: 0.2551, Acc: 0.2481
  Class counts: [597, 565, 578, 580, 598]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.05s/it]



Epoch 005 — Valid Loss: 0.2281, Acc: 0.4904
  P: 0.2217, R: 0.2933, F1: 0.2400
  CM:
 [[159  19   0   0   1]
 [ 23  14   0   0   0]
 [ 51  39   0   0  10]
 [ 12   6   0   0   1]
 [ 11  13   0   0   6]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:38<00:00,  1.19s/it]



Epoch 006 — Train Loss: 0.2570, Acc: 0.2186
  Class counts: [580, 572, 570, 582, 614]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 006 — Valid Loss: 0.2826, Acc: 0.0822
  P: 0.0164, R: 0.2000, F1: 0.0304
  CM:
 [[  0   0   0   0 179]
 [  0   0   0   0  37]
 [  0   0   0   0 100]
 [  0   0   0   0  19]
 [  0   0   0   0  30]]
Early stopping.


Test: 100%|██████████| 23/23 [01:03<00:00,  2.78s/it]


=== Test Set Performance ===
Test Loss : 0.0659
Test Acc  : 0.7342
Precision : 0.6768
Recall    : 0.6157
F1-score  : 0.5717
Confusion Matrix:
[[180   0   0   0   0]
 [ 14  12  11   0   0]
 [  1   3  47  44   4]
 [  0   0   0  18   2]
 [  0   0   2  16  11]]





In [None]:
'''EfficientNet-B3
seed 고정 APTOS 데이터만 사용
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 사용
preprocess_size를 (300, 300)
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드 (변경된 부분만 주석 처리)
# -------------------------------

import cv2
import pandas as pd
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(300, 300)):  # 384→300
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(300,300)):  # 384→300
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'efficientnet_b3',            # Swin → EfficientNet-B3
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_efficientnetb3_aptos.pth')
        print(">> Best EfficientNet-B3 model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'efficientnet_b3',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_efficientnetb3_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance (EfficientNet-B3) ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)


총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:40<00:00,  1.20s/it]



Epoch 001 — Train Loss: 1.2037, Acc: 0.5894
  Class counts: [584, 601, 581, 574, 578]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 001 — Valid Loss: 0.8082, Acc: 0.7425
  P: 0.5969, R: 0.6343, F1: 0.6001
  CM:
 [[166   9   3   1   0]
 [  3  25   6   2   1]
 [  7   8  57  20   8]
 [  0   0   4  12   3]
 [  0   2  12   5  11]]
>> Best EfficientNet-B3 model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:42<00:00,  1.22s/it]



Epoch 002 — Train Loss: 0.5631, Acc: 0.8112
  Class counts: [585, 592, 551, 618, 572]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.07s/it]



Epoch 002 — Valid Loss: 1.0666, Acc: 0.6795
  P: 0.5755, R: 0.6257, F1: 0.5664
  CM:
 [[151  26   2   0   0]
 [  2  23  12   0   0]
 [  4   8  46  35   7]
 [  0   0   4  14   1]
 [  0   2   9   5  14]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:47<00:00,  1.24s/it]



Epoch 003 — Train Loss: 0.5481, Acc: 0.8249
  Class counts: [539, 612, 615, 523, 629]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.08s/it]



Epoch 003 — Valid Loss: 0.9191, Acc: 0.7781
  P: 0.6642, R: 0.6128, F1: 0.6132
  CM:
 [[173   5   1   0   0]
 [ 10  17   9   0   1]
 [  9   2  74  12   3]
 [  0   0   6  12   1]
 [  2   0  13   7   8]]
>> Best EfficientNet-B3 model saved.


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:38<00:00,  1.19s/it]



Epoch 004 — Train Loss: 0.4864, Acc: 0.8437
  Class counts: [609, 578, 589, 570, 572]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 004 — Valid Loss: 0.7046, Acc: 0.7973
  P: 0.6749, R: 0.7016, F1: 0.6796
  CM:
 [[172   5   0   0   2]
 [  6  22   6   0   3]
 [  5   6  64   9  16]
 [  0   0   4  11   4]
 [  0   0   6   2  22]]
>> Best EfficientNet-B3 model saved.


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:40<00:00,  1.21s/it]



Epoch 005 — Train Loss: 0.3637, Acc: 0.8807
  Class counts: [579, 595, 568, 566, 610]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 005 — Valid Loss: 0.6732, Acc: 0.8164
  P: 0.6805, R: 0.6807, F1: 0.6741
  CM:
 [[174   5   0   0   0]
 [  2  26   8   0   1]
 [  2   7  75  11   5]
 [  0   0   6  11   2]
 [  0   1  13   4  12]]
>> Best EfficientNet-B3 model saved.


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:32<00:00,  1.16s/it]



Epoch 006 — Train Loss: 0.2993, Acc: 0.9023
  Class counts: [527, 596, 610, 602, 583]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 006 — Valid Loss: 0.8998, Acc: 0.7534
  P: 0.6112, R: 0.6661, F1: 0.6266
  CM:
 [[150  28   0   0   1]
 [  2  29   4   0   2]
 [  2   4  73   9  12]
 [  0   0   6  11   2]
 [  0   2  10   6  12]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:40<00:00,  1.21s/it]



Epoch 007 — Train Loss: 0.2111, Acc: 0.9291
  Class counts: [569, 604, 574, 596, 575]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 007 — Valid Loss: 0.5687, Acc: 0.8493
  P: 0.7355, R: 0.7060, F1: 0.7106
  CM:
 [[174   4   1   0   0]
 [  2  30   5   0   0]
 [  1   5  84   7   3]
 [  0   0   9   9   1]
 [  0   1  10   6  13]]
>> Best EfficientNet-B3 model saved.


  with autocast():
Train Epoch 8: 100%|██████████| 183/183 [03:37<00:00,  1.19s/it]



Epoch 008 — Train Loss: 0.2019, Acc: 0.9335
  Class counts: [562, 570, 588, 589, 609]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 008 — Valid Loss: 0.7479, Acc: 0.8247
  P: 0.6995, R: 0.7081, F1: 0.7012
  CM:
 [[169  10   0   0   0]
 [  3  23  11   0   0]
 [  2   4  81   5   8]
 [  0   0   6  12   1]
 [  1   1   5   7  16]]


  with autocast():
Train Epoch 9: 100%|██████████| 183/183 [03:40<00:00,  1.20s/it]



Epoch 009 — Train Loss: 0.2066, Acc: 0.9304
  Class counts: [558, 565, 643, 563, 589]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 009 — Valid Loss: 0.7665, Acc: 0.8274
  P: 0.7306, R: 0.6639, F1: 0.6879
  CM:
 [[176   3   0   0   0]
 [  6  19  12   0   0]
 [  3   3  83   6   5]
 [  0   0   8  10   1]
 [  0   1  13   2  14]]


  with autocast():
Train Epoch 10: 100%|██████████| 183/183 [03:34<00:00,  1.17s/it]



Epoch 010 — Train Loss: 0.2137, Acc: 0.9287
  Class counts: [570, 595, 578, 582, 593]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 010 — Valid Loss: 0.6356, Acc: 0.8438
  P: 0.7217, R: 0.7338, F1: 0.7266
  CM:
 [[173   6   0   0   0]
 [  2  27   7   0   1]
 [  1   4  78   7  10]
 [  0   0   4  10   5]
 [  0   1   8   1  20]]


  with autocast():
Train Epoch 11: 100%|██████████| 183/183 [03:35<00:00,  1.18s/it]



Epoch 011 — Train Loss: 0.1824, Acc: 0.9400
  Class counts: [592, 560, 586, 588, 592]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 011 — Valid Loss: 0.7320, Acc: 0.8329
  P: 0.7386, R: 0.6599, F1: 0.6841
  CM:
 [[174   5   0   0   0]
 [  4  25   8   0   0]
 [  2   6  85   4   3]
 [  0   0  11   7   1]
 [  0   2  14   1  13]]


  with autocast():
Train Epoch 12: 100%|██████████| 183/183 [03:36<00:00,  1.18s/it]



Epoch 012 — Train Loss: 0.1432, Acc: 0.9548
  Class counts: [571, 615, 571, 598, 563]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.01s/it]



Epoch 012 — Valid Loss: 0.7989, Acc: 0.7973
  P: 0.6888, R: 0.7096, F1: 0.6818
  CM:
 [[174   5   0   0   0]
 [  6  21   9   0   1]
 [  2   0  61   9  28]
 [  0   0   4  12   3]
 [  0   1   3   3  23]]
Early stopping.


Test: 100%|██████████| 23/23 [00:24<00:00,  1.07s/it]


=== Test Set Performance (EfficientNet-B3) ===
Test Loss : 0.5490
Test Acc  : 0.8630
Precision : 0.6960
Recall    : 0.6858
F1-score  : 0.6805
Confusion Matrix:
[[179   1   0   0   0]
 [  1  34   2   0   0]
 [  1   9  84   4   1]
 [  0   3   8   3   6]
 [  0   2   9   3  15]]





In [None]:
'''DenseNet-121
seed 고정 APTOS 데이터만 사용
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 사용
preprocess_size를 (224, 224)
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드 (변경된 부분만 주석 처리)
# -------------------------------

import cv2
import pandas as pd
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(224, 224)):  # 300→224
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(224,224)):  # 300→224
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'densenet121',            # EfficientNet → DenseNet-121
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_densenet121_aptos.pth')
        print(">> Best DenseNet-121 model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'densenet121',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_densenet121_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance (DenseNet-121) ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


model.safetensors:   0%|          | 0.00/32.3M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:41<00:00,  1.21s/it]



Epoch 001 — Train Loss: 1.1593, Acc: 0.5446
  Class counts: [585, 549, 605, 580, 599]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 001 — Valid Loss: 0.6712, Acc: 0.7425
  P: 0.6236, R: 0.6438, F1: 0.6097
  CM:
 [[170   5   4   0   0]
 [  8  19   7   1   2]
 [  6   1  51  19  23]
 [  0   0   4  11   4]
 [  0   0   4   6  20]]
>> Best DenseNet-121 model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:32<00:00,  1.16s/it]



Epoch 002 — Train Loss: 0.6792, Acc: 0.7416
  Class counts: [572, 561, 579, 617, 589]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 002 — Valid Loss: 0.6643, Acc: 0.7342
  P: 0.5921, R: 0.6196, F1: 0.5799
  CM:
 [[172   6   1   0   0]
 [  8  24   4   0   1]
 [  3   6  49  27  15]
 [  0   0   1  12   6]
 [  0   1   6  12  11]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:33<00:00,  1.17s/it]



Epoch 003 — Train Loss: 0.6160, Acc: 0.7683
  Class counts: [582, 604, 592, 562, 578]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 003 — Valid Loss: 0.4801, Acc: 0.8164
  P: 0.7449, R: 0.6806, F1: 0.6839
  CM:
 [[172   7   0   0   0]
 [  8  27   2   0   0]
 [  5   9  73   1  12]
 [  1   1   5   6   6]
 [  0   2   8   0  20]]
>> Best DenseNet-121 model saved.


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:37<00:00,  1.19s/it]



Epoch 004 — Train Loss: 0.5911, Acc: 0.7766
  Class counts: [558, 597, 579, 550, 634]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 004 — Valid Loss: 0.7943, Acc: 0.7315
  P: 0.5900, R: 0.6415, F1: 0.5771
  CM:
 [[171   8   0   0   0]
 [  5  31   1   0   0]
 [  4  15  43  28  10]
 [  0   3   2  13   1]
 [  0   3  10   8   9]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:39<00:00,  1.20s/it]



Epoch 005 — Train Loss: 0.5322, Acc: 0.7999
  Class counts: [607, 562, 567, 603, 579]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 005 — Valid Loss: 0.6664, Acc: 0.7452
  P: 0.6263, R: 0.6305, F1: 0.6000
  CM:
 [[173   5   1   0   0]
 [  9  21   5   1   1]
 [  3   2  52  31  12]
 [  0   0   1  12   6]
 [  0   0   6  10  14]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:38<00:00,  1.19s/it]



Epoch 006 — Train Loss: 0.4797, Acc: 0.8136
  Class counts: [591, 588, 563, 609, 567]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.00s/it]



Epoch 006 — Valid Loss: 0.7171, Acc: 0.7890
  P: 0.6350, R: 0.6588, F1: 0.6440
  CM:
 [[168   7   2   1   1]
 [  6  25   6   0   0]
 [  0   4  72  12  12]
 [  0   2   4  10   3]
 [  0   1  12   4  13]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:34<00:00,  1.17s/it]



Epoch 007 — Train Loss: 0.4046, Acc: 0.8454
  Class counts: [547, 630, 549, 634, 558]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 007 — Valid Loss: 0.5939, Acc: 0.7973
  P: 0.6776, R: 0.6785, F1: 0.6659
  CM:
 [[175   4   0   0   0]
 [  7  29   1   0   0]
 [  4  14  61   3  18]
 [  0   2   5   8   4]
 [  0   2   8   2  18]]


  with autocast():
Train Epoch 8: 100%|██████████| 183/183 [03:42<00:00,  1.22s/it]



Epoch 008 — Train Loss: 0.3599, Acc: 0.8646
  Class counts: [572, 594, 563, 600, 589]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 008 — Valid Loss: 0.8355, Acc: 0.7616
  P: 0.5842, R: 0.5842, F1: 0.5774
  CM:
 [[171   7   0   0   1]
 [  5  28   4   0   0]
 [  2  14  66   5  13]
 [  0   0   9   6   4]
 [  0   3  18   2   7]]
Early stopping.


Test: 100%|██████████| 23/23 [00:24<00:00,  1.06s/it]


=== Test Set Performance (DenseNet-121) ===
Test Loss : 0.4896
Test Acc  : 0.8110
Precision : 0.6522
Recall    : 0.6497
F1-score  : 0.6264
Confusion Matrix:
[[178   2   0   0   0]
 [  3  31   3   0   0]
 [  4  11  66   1  17]
 [  1   0   9   2   8]
 [  0   0   8   2  19]]





In [None]:
'''ViT (Vision Transformer)
seed 고정 APTOS 데이터만 사용
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 사용
preprocess_size를 (224, 224)
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 이하 기존 코드 (ViT 적용을 위해 변경된 부분만 주석 처리)
# -------------------------------

import cv2
import pandas as pd
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(224, 224)):  # ViT 기본 입력 크기
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(224,224)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'vit_base_patch16_224',  # ViT Base patch16 224×224
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_vit_base_patch16_224_aptos.pth')
        print(">> Best ViT-Base model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'vit_base_patch16_224',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_vit_base_patch16_224_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance (ViT Base Patch16 224) ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:20<00:00,  1.10s/it]



Epoch 001 — Train Loss: 1.0339, Acc: 0.5562
  Class counts: [604, 575, 585, 573, 581]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 001 — Valid Loss: 0.8228, Acc: 0.6493
  P: 0.5828, R: 0.5708, F1: 0.4924
  CM:
 [[149  23   5   2   0]
 [  4  19  14   0   0]
 [  2   1  48  48   1]
 [  0   0   1  17   1]
 [  0   0   6  20   4]]
>> Best ViT-Base model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 002 — Train Loss: 0.9054, Acc: 0.6234
  Class counts: [613, 582, 570, 592, 561]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 002 — Valid Loss: 1.4204, Acc: 0.4740
  P: 0.3845, R: 0.4105, F1: 0.3183
  CM:
 [[129   3   6   0  41]
 [  5  14   5   0  13]
 [  3   3   2   0  92]
 [  0   0   0   0  19]
 [  1   0   1   0  28]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:18<00:00,  1.09s/it]



Epoch 003 — Train Loss: 1.1823, Acc: 0.5041
  Class counts: [574, 595, 580, 611, 558]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.03it/s]



Epoch 003 — Valid Loss: 1.0656, Acc: 0.5671
  P: 0.2790, R: 0.4976, F1: 0.3302
  CM:
 [[164  12   0   3   0]
 [  8  27   0   2   0]
 [ 18  18   0  64   0]
 [  0   3   0  16   0]
 [  2  10   0  18   0]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:21<00:00,  1.10s/it]



Epoch 004 — Train Loss: 1.2401, Acc: 0.4777
  Class counts: [624, 548, 593, 562, 591]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.02it/s]



Epoch 004 — Valid Loss: 1.3333, Acc: 0.6164
  P: 0.4076, R: 0.3807, F1: 0.3561
  CM:
 [[121   2  56   0   0]
 [  5  11  21   0   0]
 [  4   3  93   0   0]
 [  0   0  19   0   0]
 [  1   0  29   0   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 005 — Train Loss: 1.4190, Acc: 0.3811
  Class counts: [575, 593, 579, 595, 576]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 005 — Valid Loss: 1.5456, Acc: 0.2603
  P: 0.1399, R: 0.2191, F1: 0.1270
  CM:
 [[83  0  1 95  0]
 [ 5  0  0 32  0]
 [26  0  0 74  0]
 [ 7  0  0 12  0]
 [ 7  0  0 23  0]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:21<00:00,  1.10s/it]



Epoch 006 — Train Loss: 1.5742, Acc: 0.2711
  Class counts: [555, 543, 616, 609, 595]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 006 — Valid Loss: 1.8096, Acc: 0.1233
  P: 0.2664, R: 0.2211, F1: 0.0830
  CM:
 [[  0 159  20   0   0]
 [  0  36   1   0   0]
 [  0  92   8   0   0]
 [  0  13   5   1   0]
 [  0  28   2   0   0]]
Early stopping.


Test: 100%|██████████| 23/23 [00:24<00:00,  1.08s/it]


=== Test Set Performance (ViT Base Patch16 224) ===
Test Loss : 0.7469
Test Acc  : 0.6685
Precision : 0.5998
Recall    : 0.5926
F1-score  : 0.5254
Confusion Matrix:
[[147  24   9   0   0]
 [  1  24  10   2   0]
 [  2   2  52  43   0]
 [  0   1   1  16   2]
 [  0   0  13  11   5]]





In [None]:
'''ConvNeXt-Base
seed 고정 APTOS 데이터만 사용
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler 사용
preprocess_size를 (224, 224)
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# -------------------------------
# 실험 재현성 위한 Seed 고정 함수 추가
# -------------------------------
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 코드 제일 앞에서 실행!

# -------------------------------
# 필요한 라이브러리 임포트
# -------------------------------
import cv2
import pandas as pd
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(224, 224)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(224,224)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        # class2, 3, 4만 증강, 나머지는 base transform만 적용
        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")

# ----------------------------------------
# 4) Stratified Split (80/10/10) - random_state 고정!
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler  = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (Sampler 적용)
# ----------------------------------------
train_ds     = APTOSDataset(df_train, img_dir)
val_ds       = APTOSDataset(df_val,   img_dir)
test_ds      = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'convnext_base',   # ConvNeXt-Base 사용
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_convnext_base_aptos.pth')
        print(">> Best ConvNeXt-Base model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'convnext_base',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_convnext_base_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance (ConvNeXt-Base) ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


model.safetensors:   0%|          | 0.00/354M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 001 — Train Loss: 0.8184, Acc: 0.6576
  Class counts: [603, 608, 524, 580, 603]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 001 — Valid Loss: 0.5370, Acc: 0.8000
  P: 0.6978, R: 0.6990, F1: 0.6637
  CM:
 [[173   6   0   0   0]
 [  2  31   4   0   0]
 [  3  10  64  21   2]
 [  0   0   5  13   1]
 [  0   1   8  10  11]]
>> Best ConvNeXt-Base model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 002 — Train Loss: 0.5667, Acc: 0.7862
  Class counts: [576, 638, 587, 566, 551]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 002 — Valid Loss: 0.4771, Acc: 0.8274
  P: 0.7947, R: 0.5979, F1: 0.6401
  CM:
 [[179   0   0   0   0]
 [ 10  12  15   0   0]
 [  2   0  93   2   3]
 [  0   0   9   7   3]
 [  1   0  17   1  11]]
>> Best ConvNeXt-Base model saved.


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [03:24<00:00,  1.12s/it]



Epoch 003 — Train Loss: 0.5107, Acc: 0.8060
  Class counts: [568, 618, 615, 560, 557]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]



Epoch 003 — Valid Loss: 0.6212, Acc: 0.8055
  P: 0.7065, R: 0.6478, F1: 0.6693
  CM:
 [[178   1   0   0   0]
 [ 15  19   3   0   0]
 [  6   8  71   4  11]
 [  0   0   8   8   3]
 [  1   2   9   0  18]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:25<00:00,  1.12s/it]



Epoch 004 — Train Loss: 0.5284, Acc: 0.8029
  Class counts: [579, 572, 594, 584, 589]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 004 — Valid Loss: 0.7494, Acc: 0.7479
  P: 0.6349, R: 0.6424, F1: 0.5893
  CM:
 [[172   7   0   0   0]
 [  7  30   0   0   0]
 [  3  20  43   3  31]
 [  0   1   4   4  10]
 [  0   2   3   1  24]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:22<00:00,  1.11s/it]



Epoch 005 — Train Loss: 0.4999, Acc: 0.8033
  Class counts: [601, 580, 615, 551, 571]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 005 — Valid Loss: 0.6491, Acc: 0.7726
  P: 0.6048, R: 0.6477, F1: 0.6194
  CM:
 [[165  10   0   0   4]
 [  1  32   4   0   0]
 [  1  12  65  10  12]
 [  0   0   7   7   5]
 [  0   1  12   4  13]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:23<00:00,  1.11s/it]



Epoch 006 — Train Loss: 0.4467, Acc: 0.8338
  Class counts: [607, 577, 577, 586, 571]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 006 — Valid Loss: 0.6099, Acc: 0.8219
  P: 0.7232, R: 0.6854, F1: 0.6771
  CM:
 [[171   7   1   0   0]
 [  7  24   5   0   1]
 [  3   7  76   1  13]
 [  0   0   6   5   8]
 [  0   2   3   1  24]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [03:21<00:00,  1.10s/it]



Epoch 007 — Train Loss: 0.3658, Acc: 0.8653
  Class counts: [575, 601, 576, 588, 578]


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.01it/s]



Epoch 007 — Valid Loss: 0.5312, Acc: 0.8137
  P: 0.6994, R: 0.7261, F1: 0.6945
  CM:
 [[172   7   0   0   0]
 [  2  33   2   0   0]
 [  1  20  66   7   6]
 [  0   1   4  13   1]
 [  0   4   9   4  13]]
Early stopping.


Test: 100%|██████████| 23/23 [00:24<00:00,  1.07s/it]


=== Test Set Performance (ConvNeXt-Base) ===
Test Loss : 0.4714
Test Acc  : 0.8329
Precision : 0.7533
Recall    : 0.5894
F1-score  : 0.6118
Confusion Matrix:
[[180   0   0   0   0]
 [  6  10  21   0   0]
 [  0   0  95   2   2]
 [  0   0   8   4   8]
 [  0   0  12   2  15]]





In [None]:
!mv /content/best_efficientnetb3_aptos.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_efficientnetb3_aptos.pth
!mv /content/best_convnext_base_aptos.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_convnext_base_aptos.pth
!mv /content/best_vit_base_patch16_224_aptos.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_vit_base_patch16_224_aptos.pth
!mv /content/best_densenet121_aptos.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_densenet121_aptos.pth

In [None]:
'''(추가 !!!!!!!) 잘못된 방법 APTOS 데이터만 사용, StratifiedShuffleSplit을 두 단계로 사용해 전체 APTOS를 80/10/10으로 나눴고,
class2·3·4만 On-DAT 증강을 매 배치마다 자동으로 받고, class0·1은 원래 전처리만 거치게 됩니다.
WeightedRandomsampler를 쓰니 학습은 균일 분포로 하는데 test는 불균형 분포를 그대로 해서 train은 안나오고 test는 잘나오는 현상이 생김
그래서 WeightRamdomsampler는 제거, Sampler 대신 Loss 함수에 클래스별 가중치를 주는 방법으로 전환 - CrossEntropyLoss(weight=weights_tensor) 적용
나머지 학습 루프(optimizer, scheduler, loss)는 기본 shuffle + nn.CrossEntropyLoss() 세팅 그대로
APTOSDataset 클래스에서 .png 확장자를 붙여 이미지를 로드합니다.
OneCycleLR + AMP 학습 루프, 검증, 최종 테스트까지 포함'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler


# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)


# ----------------------------------------
# 2) Dataset 정의 (On-DAT 클래스별 증강)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384)):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size

        self.base_transform = A.Compose([
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])
        self.aug_transform = A.Compose([
            A.Rotate(limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img = np.array(pil)

        if label >= 2:
            img_t = self.aug_transform(image=img)['image']
        else:
            img_t = self.base_transform(image=img)['image']

        return img_t, torch.tensor(label, dtype=torch.long)


# ----------------------------------------
# 3) APTOS train.csv 로드 및 파일 존재 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[full_df['image']
    .apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png')))]
full_df = full_df.reset_index(drop=True)
print(f"총 {len(full_df)}개 중, 실제 파일이 있는 {len(full_df)}개만 사용합니다.")


# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, hold_idx = next(sss1.split(full_df, full_df['label']))
df_train = full_df.iloc[train_idx].reset_index(drop=True)
df_hold  = full_df.iloc[hold_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_hold, df_hold['label']))
df_val  = df_hold.iloc[val_idx].reset_index(drop=True)
df_test = df_hold.iloc[test_idx].reset_index(drop=True)


# ----------------------------------------
# 5) 클래스 가중치 기반 Loss 세팅
# ----------------------------------------
class_counts   = df_train['label'].value_counts().sort_index().values
class_weights  = 1.0 / class_counts
weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')

criterion = nn.CrossEntropyLoss(weight=weights_tensor)


# ----------------------------------------
# 6) Dataset & DataLoader 준비 (불균형 shuffle)
# ----------------------------------------
train_ds = APTOSDataset(df_train, img_dir)
val_ds   = APTOSDataset(df_val,   img_dir)
test_ds  = APTOSDataset(df_test,  img_dir)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)


# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)


# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    t_loss = t_correct = t_total = 0
    counts = [0]*num_classes

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist():
            counts[l] += 1

        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"\nEpoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print("  Class counts:", counts)

    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"\nEpoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  CM:\n", v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc; no_improve = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos_weighted.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping."); break


# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos_weighted.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)


총 3648개 중, 실제 파일이 있는 3648개만 사용합니다.


  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [02:47<00:00,  1.09it/s]



Epoch 001 — Train Loss: 0.9807, Acc: 0.7032
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 001 — Valid Loss: 0.8099, Acc: 0.6548
  P: 0.5609, R: 0.6098, F1: 0.5028
  CM:
 [[163  15   1   0   0]
 [  3  31   1   2   0]
 [  0  12  16  25  47]
 [  0   1   0   9   9]
 [  0   2   1   7  20]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [02:50<00:00,  1.08it/s]



Epoch 002 — Train Loss: 0.8796, Acc: 0.7502
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 002 — Valid Loss: 0.9974, Acc: 0.6575
  P: 0.5191, R: 0.5114, F1: 0.4210
  CM:
 [[179   0   0   0   0]
 [ 16  20   1   0   0]
 [  4  13  15   0  68]
 [  0   1   0   0  18]
 [  0   4   0   0  26]]
>> Best model saved.


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [02:50<00:00,  1.07it/s]



Epoch 003 — Train Loss: 1.0702, Acc: 0.6439
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 003 — Valid Loss: 1.2353, Acc: 0.5753
  P: 0.4080, R: 0.4365, F1: 0.3778
  CM:
 [[149  27   0   0   3]
 [  5  30   0   0   2]
 [ 16  29  22   5  28]
 [  0   5   6   1   7]
 [  3   7   9   3   8]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [02:50<00:00,  1.08it/s]



Epoch 004 — Train Loss: 1.2913, Acc: 0.5651
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 004 — Valid Loss: 1.4644, Acc: 0.6164
  P: 0.3418, R: 0.3947, F1: 0.3355
  CM:
 [[163  16   0   0   0]
 [ 10  26   1   0   0]
 [ 11  53  36   0   0]
 [  1   8  10   0   0]
 [  2  17  11   0   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [02:47<00:00,  1.09it/s]



Epoch 005 — Train Loss: 1.5880, Acc: 0.2947
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.04s/it]



Epoch 005 — Valid Loss: 1.6166, Acc: 0.2740
  P: 0.0548, R: 0.2000, F1: 0.0860
  CM:
 [[  0   0 179   0   0]
 [  0   0  37   0   0]
 [  0   0 100   0   0]
 [  0   0  19   0   0]
 [  0   0  30   0   0]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 006 — Train Loss: 1.6230, Acc: 0.2526
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]



Epoch 006 — Valid Loss: 1.6184, Acc: 0.1014
  P: 0.0203, R: 0.2000, F1: 0.0368
  CM:
 [[  0 179   0   0   0]
 [  0  37   0   0   0]
 [  0 100   0   0   0]
 [  0  19   0   0   0]
 [  0  30   0   0   0]]


  with autocast():
Train Epoch 7: 100%|██████████| 183/183 [02:49<00:00,  1.08it/s]



Epoch 007 — Train Loss: 1.6183, Acc: 0.3561
  Class counts: [1437, 295, 796, 154, 236]


Valid: 100%|██████████| 23/23 [00:24<00:00,  1.05s/it]



Epoch 007 — Valid Loss: 1.6119, Acc: 0.2740
  P: 0.0548, R: 0.2000, F1: 0.0860
  CM:
 [[  0   0 179   0   0]
 [  0   0  37   0   0]
 [  0   0 100   0   0]
 [  0   0  19   0   0]
 [  0   0  30   0   0]]
Early stopping.


Test: 100%|██████████| 23/23 [00:26<00:00,  1.14s/it]


=== Test Set Performance ===
Test Loss : 0.9346
Test Acc  : 0.6904
Precision : 0.5754
Recall    : 0.5709
F1-score  : 0.4741
Confusion Matrix:
[[179   1   0   0   0]
 [ 11  26   0   0   0]
 [  1   8  19   0  71]
 [  0   1   0   0  19]
 [  0   1   0   0  28]]





In [None]:
#!mv /content/best_swin_large384_aptos_aug_2.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_swin_large384_aptos_aug_2.pth
!mv /content/best_swin_large384_aptos_aug_3.pth /content/drive/MyDrive/DL_Project_17/YJ/APTOS/best_swin_large384_aptos_aug_3.pth



---



---



---



---



---




In [None]:
'''APTOS 전체 데이터, OnDAT 1차, WeightedRamdomSampler 적용
RandomRotate
RandomBrightnessContrast
GaussianNoise
HueSaturationValue
Cutout (RandomErasing)
(참고: Albumentations 라이브러리 기반, 각 증강 강도는 안저(fundus) 영상 문헌에서 자주 쓰이는 범위로 설정)'''

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a_chan, b_chan = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a_chan, b_chan])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (증강 포함)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384), train=True):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.train = train

        if self.train:
            self.aug = A.Compose([
                A.RandomRotate90(p=0.5),
                A.HorizontalFlip(p=0.5),
                A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
                A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.5),
                A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                A.CoarseDropout(              # 수정: Cutout -> CoarseDropout
                    min_holes=1,
                    max_holes=8,
                    max_height=32,
                    max_width=32,
                    fill_value=0,
                    p=0.5
                ),
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2()
            ])
        else:
            self.aug = A.Compose([
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2()
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np  = np.array(pil_img)
        img_t   = self.aug(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})

def file_exists(fname):
    if not fname.lower().endswith('.png'):
        fname = f"{fname}.png"
    return os.path.isfile(os.path.join(img_dir, fname))

full_df = full_df[ full_df['image'].apply(file_exists) ].reset_index(drop=True)

# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, holdout_idx = next(sss1.split(full_df, full_df['label']))
df_train   = full_df.iloc[train_idx].reset_index(drop=True)
df_holdout = full_df.iloc[holdout_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_holdout, df_holdout['label']))
df_val  = df_holdout.iloc[val_idx].reset_index(drop=True)
df_test = df_holdout.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 준비
# ----------------------------------------
class_counts = df_train['label'].value_counts().sort_index().values
class_weights = 1.0 / class_counts
sample_weights = class_weights[df_train['label'].values]
sampler = WeightedRandomSampler(weights=sample_weights,
                                num_samples=len(sample_weights),
                                replacement=True)

# ----------------------------------------
# 6) Dataset & DataLoader 준비 (sampler 사용)
# ----------------------------------------
train_ds = APTOSDataset(df_train, img_dir, train=True)
val_ds   = APTOSDataset(df_val,   img_dir, train=False)
test_ds  = APTOSDataset(df_test,  img_dir, train=False)

train_loader = DataLoader(train_ds, batch_size=16, sampler=sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True,
    num_classes=5
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.1,
    div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0

for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    t_loss = t_correct = t_total = 0

    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss/t_total
    train_acc  = t_correct/t_total
    print(f"Epoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")

    # — Validate —
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu())
            all_l.append(labels.cpu())

    valid_loss = v_loss/v_total
    valid_acc  = v_correct/v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"Epoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  Confusion Matrix:")
    print(v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=False, num_classes=5
).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu())
        all_l.append(labels.cpu())

test_loss = t_loss/t_total
test_acc  = t_correct/t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
  A.CoarseDropout(              # 수정: Cutout -> CoarseDropout
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

  scaler     = GradScaler()
  with autocast():
Train Epoch 1: 100%|██████████| 183/183 [06:13<00:00,  2.04s/it]


Epoch 001 — Train Loss: 1.2116, Acc: 0.4517


Valid: 100%|██████████| 23/23 [01:18<00:00,  3.40s/it]


Epoch 001 — Valid Loss: 0.7155, Acc: 0.7370
  P: 0.6144, R: 0.6867, F1: 0.6151
  Confusion Matrix:
[[167  11   1   0   0]
 [  1  27   9   0   0]
 [  1  14  42  30  13]
 [  0   0   3  13   3]
 [  0   0   0  10  20]]
>> Best model saved.


  with autocast():
Train Epoch 2: 100%|██████████| 183/183 [04:40<00:00,  1.53s/it]


Epoch 002 — Train Loss: 1.0654, Acc: 0.5480


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]


Epoch 002 — Valid Loss: 0.7595, Acc: 0.7288
  P: 0.6562, R: 0.6054, F1: 0.5797
  Confusion Matrix:
[[171   7   1   0   0]
 [  4  31   2   0   0]
 [  1  45  46   8   0]
 [  0   1   7   9   2]
 [  0   9   9   3   9]]


  with autocast():
Train Epoch 3: 100%|██████████| 183/183 [04:12<00:00,  1.38s/it]


Epoch 003 — Train Loss: 1.1855, Acc: 0.5048


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]


Epoch 003 — Valid Loss: 1.1260, Acc: 0.6137
  P: 0.4776, R: 0.5107, F1: 0.4554
  Confusion Matrix:
[[138  25   4   0  12]
 [  2  27   3   1   4]
 [  1   8  40   8  43]
 [  0   0   2   1  16]
 [  0   1   9   2  18]]


  with autocast():
Train Epoch 4: 100%|██████████| 183/183 [03:55<00:00,  1.29s/it]


Epoch 004 — Train Loss: 1.2658, Acc: 0.4383


Valid: 100%|██████████| 23/23 [00:22<00:00,  1.00it/s]


Epoch 004 — Valid Loss: 0.9424, Acc: 0.5836
  P: 0.3625, R: 0.5326, F1: 0.3478
  Confusion Matrix:
[[165  10   2   2   0]
 [  6  29   0   2   0]
 [  5  32   1  62   0]
 [  0   1   0  18   0]
 [  0   5   0  25   0]]


  with autocast():
Train Epoch 5: 100%|██████████| 183/183 [03:56<00:00,  1.29s/it]


Epoch 005 — Train Loss: 1.2610, Acc: 0.4489


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]


Epoch 005 — Valid Loss: 0.8103, Acc: 0.6740
  P: 0.5177, R: 0.5326, F1: 0.4896
  Confusion Matrix:
[[171   7   1   0   0]
 [  7  23   6   1   0]
 [  8  25  36  27   4]
 [  0   0   6  10   3]
 [  1  10   9   4   6]]


  with autocast():
Train Epoch 6: 100%|██████████| 183/183 [03:44<00:00,  1.23s/it]


Epoch 006 — Train Loss: 1.1577, Acc: 0.4774


Valid: 100%|██████████| 23/23 [00:23<00:00,  1.02s/it]


Epoch 006 — Valid Loss: 0.7933, Acc: 0.6932
  P: 0.4496, R: 0.5203, F1: 0.4596
  Confusion Matrix:
[[173   6   0   0   0]
 [ 10  14  12   1   0]
 [ 15   5  52  28   0]
 [  1   0   4  14   0]
 [  1   3  16  10   0]]
Early stopping.


Test: 100%|██████████| 23/23 [01:24<00:00,  3.66s/it]


=== Test Set Performance ===
Test Loss : 0.6614
Test Acc  : 0.7671
Precision : 0.6127
Recall    : 0.6571
F1-score  : 0.6178
Confusion Matrix:
[[173   7   0   0   0]
 [  0  22   9   3   3]
 [  0   9  55  22  13]
 [  0   0   4   9   7]
 [  0   1   1   6  21]]





In [None]:
'''
1. 증강 강도 먼저 줄여보기
  - OnDAT의 블러 반경, 밝기·대비·색조 변화 범위 등을 절반 정도로 낮추고, 전체 적용 확률 p를 1.0 → 0.5로 줄여보세요.
    이렇게 하면 모델이 여전히 원본 특징을 어느 정도 학습하면서도 약간의 변형에 대응하도록 훈련됩니다.
2. Sampler 세팅 약간 튜닝하기
  - replacement=True일 때 소수 클래스 샘플이 반복해서 뽑히는 게 부담스럽다면
    num_samples=len(train_ds) 대신 num_samples = 0.5 * len(train_ds) 등으로 줄여 본다거나
    replacement=False로 바꿔서 “각 이미지는 최대 1번” 뽑히게 해보세요.
    이렇게 하면 소수 클래스가 과도하게 중복되지 않습니다.
3. 클래스별 증강 비율 다르게 주기
  - 소수 클래스(예: 클래스 1, 3)에는 강한 증강을 70% 확률로, 다수 클래스(예: 클래스 0, 2)에는 30% 확률로만 적용하도록 분기 처리해 보세요.
    클래스별 sampler와 증강을 결합하면, 양쪽 효과를 더 세밀하게 조절할 수 있습니다.
4. 스케줄 증강
  - 1–5 epoch: 증강 거의 없이 학습
  - 6–15 epoch: 가벼운 증강
  - 16–최종: 강한 증강
이렇게 나눠서, 처음에는 안정적으로 수렴시키고 점차 일반화 능력을 끌어올립니다.
5. 지표 추적 & 비교
  - 매 실험마다 train/val loss, accuracy, macro-F1, 클래스별 recall 변화를 표로 정리하세요.
    어떤 조합이 가장 밸런스 있게 성능을 뽑아내는지 한눈에 비교하기 좋습니다.


1. 베이스라인 확인
Augmentation: OFF
Sampler: OFF
목표: 기존 성능(Acc≈0.77, Val Acc≈0.82) 확인

2. 약한 증강 적용
Augmentation: ON (강도 ↓, 적용 확률 p=0.5)
블러·밝기·대비 등 파라미터 절반
Sampler: OFF
목표: 약한 증강만으로 일반화 개선 여부

3. WeightedSampler 단독 적용
Augmentation: OFF
Sampler: ON (replacement=True, num_samples=len(train))
목표: 소수 클래스 오버샘플링 효과 확인

4. 증강 + Sampler (기본)
Augmentation: ON (기본 OnDAT 세팅)
Sampler: ON (replacement=True)
목표: 두 기법 결합 시 시너지/부작용

5. Sampler 튜닝
Augmentation: OFF
Sampler: ON
replacement=False
또는 num_samples = 0.5 * len(train)
목표: 중복 샘플링 줄인 버전과 비교

6. 약한 증강 + Sampler 튜닝
Augmentation: ON (강도 ↓, p=0.5)
Sampler: ON (replacement=False or num_samples=0.5*len)
목표: 과도한 증강/샘플링 모두 완화

7. 클래스별 맞춤 증강
Augmentation: ON
소수 클래스 p=0.7, 다수 클래스 p=0.3
Sampler: ON (replacement=False)
목표: 클래스별 증강 차별화 효과

8. 스케줄형 증강
Epoch 1–5: Aug OFF
Epoch 6–15: Aug ON (약하게)
Epoch 16+: Aug ON (강하게)
Sampler: ON (replacement=False)
목표: 단계별 적응학습 효과'''

'APTOS 전체 데이터, OnDAT, WeightedRamdomSampler 적용\nRandomRotate\nRandomBrightnessContrast\nGaussianNoise\nHueSaturationValue\nCutout (RandomErasing)\n(참고: Albumentations 라이브러리 기반, 각 증강 강도는 안저(fundus) 영상 문헌에서 자주 쓰이는 범위로 설정)'

In [None]:
'''sampler off, OnDAT 약하게 적용'''

# Colab에서 실행 시 필요한 라이브러리 설치
# !pip install opencv-python timm torch torchvision pandas scikit-learn tqdm albumentations

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a, b])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (약한 증강 포함)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384), augment=False):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        if augment:
            # 약한 증강 설정
            self.transform = A.Compose([
                A.RandomRotate90(p=0.3),
                A.HorizontalFlip(p=0.3),
                A.ShiftScaleRotate(
                    shift_limit=0.03,
                    scale_limit=0.05,
                    rotate_limit=8,
                    p=0.3
                ),
                A.RandomBrightnessContrast(
                    brightness_limit=0.1,
                    contrast_limit=0.1,
                    p=0.3
                ),
                A.GaussNoise(
                    var_limit=(5.0, 25.0),
                    p=0.3
                ),
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2(),
            ])
        else:
            self.transform = A.Compose([
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2(),
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)

        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np = np.array(pil_img)
        img_t = self.transform(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS 데이터 로드 및 필터링
# ----------------------------------------
ROOT    = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path= os.path.join(ROOT, 'train.csv')
img_dir = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
def file_exists(fname):
    if not fname.lower().endswith('.png'):
        fname = f"{fname}.png"
    return os.path.isfile(os.path.join(img_dir, fname))
full_df = full_df[ full_df['image'].apply(file_exists) ].reset_index(drop=True)
print(f"총 {len(full_df)}개 사용")

# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, holdout_idx = next(sss1.split(full_df, full_df['label']))
df_train   = full_df.iloc[train_idx].reset_index(drop=True)
df_holdout = full_df.iloc[holdout_idx].reset_index(drop=True)
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_holdout, df_holdout['label']))
df_val  = df_holdout.iloc[val_idx].reset_index(drop=True)
df_test = df_holdout.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 설정
# ----------------------------------------
class_counts = df_train['label'].value_counts().sort_index().tolist()
class_weights = [1.0 / c for c in class_counts]
sample_weights = df_train['label'].apply(lambda x: class_weights[x]).tolist()
train_sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# ----------------------------------------
# 6) 데이터로더 준비
# ----------------------------------------
batch_size = 16
train_ds = APTOSDataset(df_train, img_dir, augment=True)
val_ds   = APTOSDataset(df_val,   img_dir, augment=False)
test_ds  = APTOSDataset(df_test,  img_dir, augment=False)
train_loader = DataLoader(train_ds, batch_size=batch_size, sampler=train_sampler,
                          num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=batch_size, shuffle=False,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False,
                          num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 옵티마이저 · OneCycleLR · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = timm.create_model(
    'swin_large_patch4_window12_384.ms_in22k_ft_in1k',
    pretrained=True, num_classes=5
).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(
    optimizer, max_lr=1e-3,
    steps_per_epoch=len(train_loader), epochs=NUM_EPOCHS,
    pct_start=0.1, div_factor=25.0
)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0
num_classes  = 5

for epoch in range(1, NUM_EPOCHS+1):
    # Train
    model.train()
    t_loss = t_correct = t_total = 0
    epoch_counts = [0]*num_classes
    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch}'):
        for l in labels.cpu().tolist(): epoch_counts[l] += 1
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast(): out = model(imgs); loss = criterion(out, labels)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update(); scheduler.step()
        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
    print(f"Epoch {epoch:03d} — Train Loss: {t_loss/t_total:.4f}, Acc: {t_correct/t_total:.4f}")
    print("  Epoch Sampling Counts by Class:", epoch_counts)

    # Validate
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []
    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs); loss = criterion(out, labels)
            v_loss    += loss.item()*imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu()); all_l.append(labels.cpu())
    all_p = torch.cat(all_p).numpy(); all_l = torch.cat(all_l).numpy()
    print(f"Epoch {epoch:03d} — Valid Loss: {v_loss/v_total:.4f}, Acc: {v_correct/v_total:.4f}")
    print(f"  P: {precision_score(all_l, all_p, average='macro', zero_division=0):.4f}","
          f" R: {recall_score(all_l, all_p, average='macro', zero_division=0):.4f}","
          f" F1: {f1_score(all_l, all_p, average='macro', zero_division=0):.4f}")
    print("  Confusion Matrix:"); print(confusion_matrix(all_l, all_p))
    if (v_correct/v_total) > best_val_acc:
        best_val_acc = v_correct/v_total; no_improve=0; torch.save(model.state_dict(), 'best_model.pth'); print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience: print("Early stopping."); break

# ----------------------------------------
# 9) Test 평가
# ----------------------------------------
best_model = timm.create_model('swin_large_patch4_window12_384.ms_in22k_ft_in1k', pretrained=False, num_classes=5).to(device)
best_model.load_state_dict(torch.load('best_model.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []
with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out = best_model(imgs); loss = criterion(out, labels)
        t_loss    += loss.item()*imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu()); all_l.append(labels.cpu())
all_p = torch.cat(all_p).numpy(); all_l = torch.cat(all_l).numpy()
print("=== Test Performance ===")
print(f"Acc: {t_correct/t_total:.4f}","
      f" P: {precision_score(all_l, all_p, average='macro', zero_division=0):.4f}","
      f" R: {recall_score(all_l, all_p, average='macro', zero_division=0):.4f}","
      f" F1: {f1_score(all_l, all_p, average='macro', zero_division=0):.4f}")
print("Confusion Matrix:"); print(confusion_matrix(all_l, all_p))


SyntaxError: unterminated string literal (detected at line 231) (<ipython-input-8-a1c371b9b97e>, line 231)

In [None]:
'''APTOS 전체 데이터, OnDAT, WeightedRamdomSampler 적용, Loss weighting (CrossEntropyLoss(weight))
RandomRotate
RandomBrightnessContrast
GaussianNoise
HueSaturationValue
Cutout (RandomErasing)
(참고: Albumentations 라이브러리 기반, 각 증강 강도는 안저(fundus) 영상 문헌에서 자주 쓰이는 범위로 설정)'''

# 필요한 라이브러리 설치 (Colab)
#!pip install --upgrade albumentations opencv-python timm torch torchvision pandas scikit-learn tqdm

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler

# ----------------------------------------
# 1) 전처리 함수 정의 (원판 크롭 + CLAHE + 리사이즈)
# ----------------------------------------
def preprocess_fundus_image(img_path, output_size=(384, 384)):
    img_bgr = cv2.imread(img_path)
    if img_bgr is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {img_path}")
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    binary_closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(binary_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    h, w = img_bgr.shape[:2]

    if not contours or cv2.contourArea(max(contours, key=cv2.contourArea)) < h*w*0.1:
        cropped = img_bgr.copy()
    else:
        cnt = max(contours, key=cv2.contourArea)
        (cx, cy), radius = cv2.minEnclosingCircle(cnt)
        cx, cy, radius = map(int, (cx, cy, radius))
        mask = np.zeros_like(gray)
        cv2.circle(mask, (cx, cy), radius, 255, -1)
        masked = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
        x1, y1 = max(cx-radius, 0), max(cy-radius, 0)
        x2, y2 = min(cx+radius, w), min(cy+radius, h)
        cropped = masked[y1:y2, x1:x2]

    lab = cv2.cvtColor(cropped, cv2.COLOR_BGR2LAB)
    l, a_chan, b_chan = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge([l_clahe, a_chan, b_chan])
    bgr_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    if output_size is not None:
        bgr_clahe = cv2.resize(bgr_clahe, output_size, interpolation=cv2.INTER_AREA)

    img_rgb = cv2.cvtColor(bgr_clahe, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img_rgb)

# ----------------------------------------
# 2) Dataset 정의 (증강 포함)
# ----------------------------------------
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, preprocess_size=(384,384), train=True):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.preprocess_size = preprocess_size
        self.train = train

        if self.train:
            self.aug = A.Compose([
                A.RandomRotate90(p=0.5),
                A.HorizontalFlip(p=0.5),
                A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
                A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.5),
                A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                A.CoarseDropout(min_holes=1, max_holes=8, max_height=32, max_width=32, fill_value=0, p=0.5),
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2()
            ])
        else:
            self.aug = A.Compose([
                A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
                ToTensorV2()
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fname = row['image']
        label = int(row['label'])
        if not fname.lower().endswith('.png'):
            fname = f"{fname}.png"
        img_path = os.path.join(self.img_dir, fname)
        pil_img = preprocess_fundus_image(img_path, output_size=self.preprocess_size)
        img_np  = np.array(pil_img)
        img_t   = self.aug(image=img_np)['image']
        return img_t, torch.tensor(label, dtype=torch.long)

# ----------------------------------------
# 3) APTOS train.csv 로드 및 필터링
# ----------------------------------------
ROOT     = '/content/drive/MyDrive/DL_Project_17/YJ/APTOS'
csv_path = os.path.join(ROOT, 'train.csv')
img_dir  = os.path.join(ROOT, 'train_images')

full_df = pd.read_csv(csv_path).rename(columns={'diagnosis':'label'})
full_df = full_df[ full_df['image'].apply(lambda f: os.path.isfile(os.path.join(img_dir, f if f.lower().endswith('.png') else f+'.png'))) ].reset_index(drop=True)

# ----------------------------------------
# 4) Stratified Split (80/10/10)
# ----------------------------------------
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, holdout_idx = next(sss1.split(full_df, full_df['label']))
df_train   = full_df.iloc[train_idx].reset_index(drop=True)
df_holdout = full_df.iloc[holdout_idx].reset_index(drop=True)

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(sss2.split(df_holdout, df_holdout['label']))
df_val  = df_holdout.iloc[val_idx].reset_index(drop=True)
df_test = df_holdout.iloc[test_idx].reset_index(drop=True)

# ----------------------------------------
# 5) WeightedRandomSampler 준비
# ----------------------------------------
class_counts    = df_train['label'].value_counts().sort_index().values
class_weights_s = 1.0 / class_counts
sample_weights  = class_weights_s[df_train['label'].values]
sampler         = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# ----------------------------------------
# 6) Dataset & DataLoader 준비
# ----------------------------------------
train_ds = APTOSDataset(df_train, img_dir, train=True)
val_ds   = APTOSDataset(df_val,   img_dir, train=False)
test_ds  = APTOSDataset(df_test,  img_dir, train=False)

train_loader = DataLoader(train_ds, batch_size=16, sampler=sampler, num_workers=4, pin_memory=True)
valid_loader = DataLoader(val_ds,   batch_size=16, shuffle=False,   num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False,   num_workers=4, pin_memory=True)

# ----------------------------------------
# 7) 모델 · 손실함수(가중치) · 옵티마이저 · LR 스케줄러 · AMP 세팅
# ----------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = timm.create_model('swin_large_patch4_window12_384.ms_in22k_ft_in1k', pretrained=True, num_classes=5).to(device)

# ---- 여기에 Loss Weighting 적용 ----
class_weights_tensor = torch.tensor(class_weights_s, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
NUM_EPOCHS = 50
scaler     = GradScaler()
scheduler  = OneCycleLR(optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader),
                        epochs=NUM_EPOCHS, pct_start=0.1, div_factor=25.0)

# ----------------------------------------
# 8) Training + Validation 루프
# ----------------------------------------
best_val_acc = 0.0
patience     = 5
no_improve   = 0

for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    t_loss = t_correct = t_total = 0
    for imgs, labels in tqdm(train_loader, desc=f'Train Epoch {epoch:03d}'):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            out  = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        t_loss    += loss.item() * imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)

    train_loss = t_loss / t_total
    train_acc  = t_correct / t_total
    print(f"Epoch {epoch:03d} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")

    # — Validate —
    model.eval()
    v_loss = v_correct = v_total = 0
    all_p, all_l = [], []
    with torch.no_grad():
        for imgs, labels in tqdm(valid_loader, desc='Valid'):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)

            v_loss    += loss.item() * imgs.size(0)
            preds      = out.argmax(dim=1)
            v_correct += (preds==labels).sum().item()
            v_total   += labels.size(0)
            all_p.append(preds.cpu())
            all_l.append(labels.cpu())

    valid_loss = v_loss / v_total
    valid_acc  = v_correct / v_total
    all_p = torch.cat(all_p).numpy()
    all_l = torch.cat(all_l).numpy()
    v_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
    v_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
    v_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
    v_cm   = confusion_matrix(all_l, all_p)

    print(f"Epoch {epoch:03d} — Valid Loss: {valid_loss:.4f}, Acc: {valid_acc:.4f}")
    print(f"  P: {v_prec:.4f}, R: {v_rec:.4f}, F1: {v_f1:.4f}")
    print("  Confusion Matrix:")
    print(v_cm)

    if valid_acc > best_val_acc:
        best_val_acc = valid_acc
        no_improve   = 0
        torch.save(model.state_dict(), 'best_swin_large384_aptos.pth')
        print(">> Best model saved.")
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping.")
            break

# ----------------------------------------
# 9) Test Set 평가
# ----------------------------------------
best_model = timm.create_model('swin_large_patch4_window12_384.ms_in22k_ft_in1k', pretrained=False, num_classes=5).to(device)
best_model.load_state_dict(torch.load('best_swin_large384_aptos.pth', map_location=device))
best_model.eval()

t_loss = t_correct = t_total = 0
all_p, all_l = [], []
with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc='Test'):
        imgs, labels = imgs.to(device), labels.to(device)
        out  = best_model(imgs)
        loss = criterion(out, labels)

        t_loss    += loss.item() * imgs.size(0)
        preds      = out.argmax(dim=1)
        t_correct += (preds==labels).sum().item()
        t_total   += labels.size(0)
        all_p.append(preds.cpu())
        all_l.append(labels.cpu())

test_loss = t_loss / t_total
test_acc  = t_correct / t_total
all_p = torch.cat(all_p).numpy()
all_l = torch.cat(all_l).numpy()
t_prec = precision_score(all_l, all_p, average='macro', zero_division=0)
t_rec  = recall_score(all_l, all_p, average='macro', zero_division=0)
t_f1   = f1_score(all_l, all_p, average='macro', zero_division=0)
t_cm   = confusion_matrix(all_l, all_p)

print("\n=== Test Set Performance ===")
print(f"Test Loss : {test_loss:.4f}")
print(f"Test Acc  : {test_acc:.4f}")
print(f"Precision : {t_prec:.4f}")
print(f"Recall    : {t_rec:.4f}")
print(f"F1-score  : {t_f1:.4f}")
print("Confusion Matrix:")
print(t_cm)

In [None]:
!mv /content/best_swin_large384_3.pth /content/drive/MyDrive/DL_Project_17/.pth