### 싱글모델로 분류

In [1]:

import torch 
import argparse
import yaml
import time
import multiprocessing as mp
import torch.nn.functional as F
from tabulate import tabulate
from tqdm import tqdm
from torch.utils.data import DataLoader
from pathlib import Path
#from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import GradScaler, autocast
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DistributedSampler, RandomSampler
from torch import distributed as dist
from nmc.models import *
from nmc.datasets import * 
from nmc.augmentations import get_train_augmentation, get_val_augmentation
from nmc.losses import get_loss
from nmc.schedulers import get_scheduler
from nmc.optimizers import get_optimizer
from nmc.utils.utils import fix_seeds, setup_cudnn, cleanup_ddp, setup_ddp
from tools.val import evaluate_epi
from nmc.utils.episodic_utils import * 
from scipy.cluster import hierarchy
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from torchvision import models
import torch.nn as nn
from torch.optim import lr_scheduler
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mutual_info_score
from scipy.cluster import hierarchy
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
from torch.utils.data import Dataset, DataLoader, Sampler
from torch.utils.data import Subset
import torch.optim as optim
from torchvision import transforms
from PIL import Image
import cv2
import random

In [2]:
with open('../configs/APTOS.yaml') as f:
    cfg = yaml.load(f, Loader=yaml.SafeLoader)
print(cfg)
fix_seeds(3407)
setup_cudnn()
gpu = setup_ddp()
save_dir = Path(cfg['SAVE_DIR'])
save_dir.mkdir(exist_ok=True)
cleanup_ddp()

{'DEVICE': 'cuda:0', 'SAVE_DIR': 'output', 'MODEL': {'NAME': 'EfficientNetV2MModel', 'BACKBONE': 'EfficientNetV2', 'PRETRAINED': '/workspace/jhmoon/nmc_2024/checkpoints/pretrained/tf_efficientnetv2_m_weights.pth', 'UNFREEZE': 'full', 'VERSION': '384_32'}, 'DATASET': {'NAME': 'APTOSDataset', 'ROOT': '/data/public_data/aptos', 'TRAIN_RATIO': 0.7, 'VALID_RATIO': 0.15, 'TEST_RATIO': 0.15}, 'TRAIN': {'IMAGE_SIZE': [384, 384], 'BATCH_SIZE': 32, 'EPOCHS': 100, 'EVAL_INTERVAL': 25, 'AMP': False, 'DDP': False}, 'LOSS': {'NAME': 'CrossEntropy', 'CLS_WEIGHTS': False}, 'OPTIMIZER': {'NAME': 'adamw', 'LR': 0.001, 'WEIGHT_DECAY': 0.01}, 'SCHEDULER': {'NAME': 'warmuppolylr', 'POWER': 0.9, 'WARMUP': 10, 'WARMUP_RATIO': 0.1}, 'EVAL': {'MODEL_PATH': 'checkpoints/pretrained/FGMaxxVit/FGMaxxVit.FGMaxxVit.APTOS.pth', 'IMAGE_SIZE': [384, 384]}, 'TEST': {'MODEL_PATH': 'checkpoints/pretrained/FGMaxxVit/FGMaxxVit.FGMaxxVit.APTOS.pth', 'FILE': 'assests/ade', 'IMAGE_SIZE': [384, 384], 'OVERLAY': True}}


In [3]:
# Early Stopping
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_score):
        if self.best_score is None:
            self.best_score = val_score
        elif val_score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_score
            self.counter = 0

In [4]:
def get_train_augmentation(size):
    return transforms.Compose([
        transforms.Resize(size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.Lambda(lambda x: x.float() if x.dtype == torch.uint8 else x),
        transforms.Lambda(lambda x: x / 255.0 if x.max() > 1.0 else x),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def get_val_test_transform(size):
    return transforms.Compose([
        transforms.Resize(size),
        transforms.Lambda(lambda x: x.float() if x.dtype == torch.uint8 else x),
        transforms.Lambda(lambda x: x / 255.0 if x.max() > 1.0 else x),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [5]:
# 모든 라벨 비율은 배치내에서 1대1이 되도록 조정
class BinaryBalancedBatchSampler(Sampler):
    def __init__(self, dataset, batch_size, target_class):
        self.dataset = dataset
        self.batch_size = batch_size
        self.target_class = target_class
        
        # 데이터셋에서 레이블 추출
        if hasattr(dataset, 'labels'):
            self.labels = dataset.labels
            if isinstance(self.labels, np.ndarray):
                self.labels = torch.from_numpy(self.labels)
        elif hasattr(dataset, 'targets'):
            self.labels = dataset.targets
            if isinstance(self.labels, np.ndarray):
                self.labels = torch.from_numpy(self.labels)
        else:
            try:
                self.labels = [sample[1] for sample in dataset]
                if isinstance(self.labels[0], np.ndarray):
                    self.labels = torch.from_numpy(np.array(self.labels))
                else:
                    self.labels = torch.tensor(self.labels)
            except:
                raise ValueError("Cannot access labels from dataset")
        
        # 타겟 클래스와 나머지 클래스의 인덱스 저장
        if len(self.labels.shape) > 1:
            self.target_indices = torch.where(self.labels[:, target_class] == 1)[0]
            self.other_indices = torch.where(self.labels[:, target_class] == 0)[0]
        else:
            self.target_indices = torch.where(self.labels == target_class)[0]
            self.other_indices = torch.where(self.labels != target_class)[0]
        
        # 각 배치에서의 샘플 수 계산
        self.samples_per_class = batch_size // 2  # 1:1 비율
        
        self.n_batches = len(self.dataset) // batch_size
        if len(self.dataset) % batch_size != 0:
            self.n_batches += 1
    
    def __iter__(self):
        for _ in range(self.n_batches):
            batch_indices = []
            
            # 타겟 클래스에서 샘플링
            target_selected = self.target_indices[
                torch.randint(len(self.target_indices), 
                            (self.samples_per_class,))
            ]
            batch_indices.extend(target_selected.tolist())
            
            # 다른 클래스들에서 샘플링
            other_selected = self.other_indices[
                torch.randint(len(self.other_indices), 
                            (self.samples_per_class,))
            ]
            batch_indices.extend(other_selected.tolist())
            
            # 배치 셔플
            random.shuffle(batch_indices)
            
            # 배치 크기에 맞게 자르기
            if len(batch_indices) > self.batch_size:
                batch_indices = batch_indices[:self.batch_size]
            
            yield batch_indices
    
    def __len__(self):
        return self.n_batches

In [6]:
def train_epoch(model, dataloader, criterion, optimizer, scaler, device, target_label_idx):
    model.train()
    total_loss = 0
    for images, labels in tqdm(dataloader, desc="Training"):
        images = images.to(device)
        # 특정 라벨만 추출
        target_labels = labels[:, target_label_idx].float().to(device)
        
        optimizer.zero_grad()
        
        with autocast(enabled=scaler is not None):
            outputs = model(images)
            # 차원 처리
            if len(outputs.shape) == 2:
                outputs = outputs.squeeze(1)  # [batch_size, 1] -> [batch_size]
            loss = criterion(outputs, target_labels)
        
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

In [7]:
def evaluate(model, dataloader, device, target_label_idx):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Evaluating"):
            images = images.to(device)
            # 특정 라벨만 추출
            target_labels = labels[:, target_label_idx].to(device)
            
            outputs = model(images)
            
            # 차원을 명시적으로 처리
            if len(outputs.shape) == 2:
                outputs = outputs.squeeze(1)  # [batch_size, 1] -> [batch_size]
            
            # 예측값 계산 (배치 차원 유지)
            preds = (torch.sigmoid(outputs) > 0.5).float()
            
            # 배치 단위로 저장하되 차원 명시적 처리
            all_preds.append(preds.cpu().numpy().reshape(-1))  # 1차원으로 펼치기
            all_labels.append(target_labels.cpu().numpy().reshape(-1))
    
    # 배치 단위의 리스트를 하나의 numpy array로 변환
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    
    # 메트릭 계산
    f1 = f1_score(all_labels, all_preds, average='binary')
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    
    return f1, accuracy, precision, recall

In [8]:
def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, scaler, device, epochs, target_label_idx):
    best_f1 = 0.0
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)
    
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        
        train_loss = train_epoch(model, train_loader, criterion, optimizer, scaler, device, target_label_idx)
        val_f1, val_acc, val_prec, val_rec = evaluate(model, val_loader, device, target_label_idx)
        
        print(f"Training Loss: {train_loss:.4f}")
        print(f"Validation Metrics:")
        print(f"  F1 Score: {val_f1:.4f}")
        print(f"  Accuracy: {val_acc:.4f}")
        print(f"  Precision: {val_prec:.4f}")
        print(f"  Recall: {val_rec:.4f}")
        
        scheduler.step(val_f1)
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), f'model/singlelabel_finetuning/best_model_label_{target_label_idx}_aptos_cnn.pth')
            print("New best model saved!")
        
        early_stopping(val_f1)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break
        
        print()
    
    return best_f1

In [10]:
ncm_aptos_labels = [[0],[2],[1],[1,2],[5,6]]
for target_label_idx in range(5):
    
    start = time.time()
    best_mf1 = 0.0
    device = torch.device(cfg['DEVICE'])
    print("device : ", device)
    num_workers = mp.cpu_count()
    train_cfg, eval_cfg = cfg['TRAIN'], cfg['EVAL']
    dataset_cfg, model_cfg = cfg['DATASET'], cfg['MODEL']
    loss_cfg, optim_cfg, sched_cfg = cfg['LOSS'], cfg['OPTIMIZER'], cfg['SCHEDULER']
    epochs, lr = train_cfg['EPOCHS'], optim_cfg['LR']

    image_size = [256,256]
    image_dir = Path(dataset_cfg['ROOT']) / 'train_images'
    train_transform = get_train_augmentation(image_size)
    val_test_transform = get_val_test_transform(image_size)
    batch_size = 32


    dataset = eval(dataset_cfg['NAME'])(
        dataset_cfg['ROOT'] + '/combined_images',
        dataset_cfg['TRAIN_RATIO'],
        dataset_cfg['VALID_RATIO'],
        dataset_cfg['TEST_RATIO'],
        transform=None
    )
    trainset, valset, testset = dataset.get_splits()
    trainset.transform = train_transform
    valset.transform = val_test_transform
    testset.transform = val_test_transform

    # DataLoader 수정
    trainloader = DataLoader(
        trainset, 
        batch_sampler=BinaryBalancedBatchSampler(trainset, batch_size=batch_size,target_class = target_label_idx),
        num_workers=num_workers,
        pin_memory=True
    )
    #trainloader = DataLoader(trainset, batch_size=batch_size, num_workers=num_workers, drop_last=True, pin_memory=True)
    valloader = DataLoader(valset, batch_size=1, num_workers=1, pin_memory=True)
    testloader = DataLoader(testset, batch_size=1, num_workers=1, pin_memory=True)

    # Model definition (changed to binary classification)
    model = models.efficientnet_v2_m(pretrained=True)
    num_ftrs = model.classifier[1].in_features
    num_targets = len(ncm_aptos_labels[target_label_idx])
    
    if num_targets == 1:
        # 단일 레이블 케이스 (기존 코드와 동일)
        model.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, 1)
        )
        model.load_state_dict(torch.load(f'model/singlelabel/best_model_label_{ncm_aptos_labels[target_label_idx][0]}_nmc_cnn.pth'))
    else:
        # 다중 레이블 케이스
        model.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, num_targets)
        )
        model.load_state_dict(torch.load(f'model/singlelabel/best_model_labels_{"-".join(map(str,ncm_aptos_labels[target_label_idx]))}_nmc_cnn.pth'))
    
    model.classifier = nn.Sequential(
        nn.BatchNorm1d(num_ftrs),
        nn.Linear(num_ftrs, 1)
    )
    model = model.to(device)

    # L2 regularization
    weight_decay = 1e-4
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=weight_decay)
    criterion = nn.BCEWithLogitsLoss()
    scaler = GradScaler(enabled=train_cfg['AMP'])
    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)

    # Main execution code
    epochs = 100
    # 학습하고자 하는 라벨 인덱스


    best_f1 = train_and_evaluate(
        model, 
        trainloader, 
        valloader, 
        criterion, 
        optimizer, 
        scaler, 
        device, 
        epochs,
        target_label_idx
    )

    print(f"Training completed. Best F1 Score: {best_f1:.4f}")





device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Epoch 1/100


Training: 100%|██████████| 81/81 [01:00<00:00,  1.34it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.74it/s]


Training Loss: 0.0630
Validation Metrics:
  F1 Score: 0.9870
  Accuracy: 0.9872
  Precision: 0.9925
  Recall: 0.9815
New best model saved!

Epoch 2/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.69it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.87it/s]


Training Loss: 0.0293
Validation Metrics:
  F1 Score: 0.9907
  Accuracy: 0.9909
  Precision: 1.0000
  Recall: 0.9815
New best model saved!

Epoch 3/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:37<00:00,  5.63it/s]


Training Loss: 0.0114
Validation Metrics:
  F1 Score: 0.9607
  Accuracy: 0.9599
  Precision: 0.9308
  Recall: 0.9926

Epoch 4/100


Training: 100%|██████████| 81/81 [00:47<00:00,  1.69it/s]
Evaluating: 100%|██████████| 549/549 [01:43<00:00,  5.32it/s]


Training Loss: 0.0287
Validation Metrics:
  F1 Score: 0.9850
  Accuracy: 0.9854
  Precision: 1.0000
  Recall: 0.9705

Epoch 5/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.84it/s]


Training Loss: 0.0097
Validation Metrics:
  F1 Score: 0.9871
  Accuracy: 0.9872
  Precision: 0.9889
  Recall: 0.9852

Epoch 6/100


Training: 100%|██████████| 81/81 [00:47<00:00,  1.71it/s]
Evaluating: 100%|██████████| 549/549 [01:38<00:00,  5.59it/s]


Training Loss: 0.0072
Validation Metrics:
  F1 Score: 0.9796
  Accuracy: 0.9800
  Precision: 0.9851
  Recall: 0.9742

Epoch 7/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.60it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.69it/s]


Training Loss: 0.0069
Validation Metrics:
  F1 Score: 0.9796
  Accuracy: 0.9800
  Precision: 0.9851
  Recall: 0.9742

Epoch 8/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.64it/s]
Evaluating: 100%|██████████| 549/549 [01:41<00:00,  5.40it/s]


Training Loss: 0.0050
Validation Metrics:
  F1 Score: 0.9754
  Accuracy: 0.9763
  Precision: 1.0000
  Recall: 0.9520
Epoch 00008: reducing learning rate of group 0 to 1.0000e-05.

Epoch 9/100


Training: 100%|██████████| 81/81 [00:53<00:00,  1.50it/s]
Evaluating: 100%|██████████| 549/549 [01:30<00:00,  6.08it/s]


Training Loss: 0.0078
Validation Metrics:
  F1 Score: 0.9832
  Accuracy: 0.9836
  Precision: 0.9925
  Recall: 0.9742

Epoch 10/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.54it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.34it/s]


Training Loss: 0.0023
Validation Metrics:
  F1 Score: 0.9870
  Accuracy: 0.9872
  Precision: 0.9962
  Recall: 0.9779

Epoch 11/100


Training: 100%|██████████| 81/81 [00:46<00:00,  1.73it/s]
Evaluating: 100%|██████████| 549/549 [01:39<00:00,  5.49it/s]


Training Loss: 0.0037
Validation Metrics:
  F1 Score: 0.9815
  Accuracy: 0.9818
  Precision: 0.9851
  Recall: 0.9779

Epoch 12/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.56it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.37it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Training Loss: 0.0025
Validation Metrics:
  F1 Score: 0.9851
  Accuracy: 0.9854
  Precision: 0.9925
  Recall: 0.9779
Early stopping triggered
Training completed. Best F1 Score: 0.9907
device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550
Epoch 1/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.58it/s]
Evaluating: 100%|██████████| 549/549 [01:28<00:00,  6.18it/s]


Training Loss: 0.2854
Validation Metrics:
  F1 Score: 0.6184
  Accuracy: 0.8944
  Precision: 0.4845
  Recall: 0.8545
New best model saved!

Epoch 2/100


Training: 100%|██████████| 81/81 [00:56<00:00,  1.44it/s]
Evaluating: 100%|██████████| 549/549 [01:25<00:00,  6.44it/s]


Training Loss: 0.1031
Validation Metrics:
  F1 Score: 0.6016
  Accuracy: 0.9107
  Precision: 0.5441
  Recall: 0.6727

Epoch 3/100


Training: 100%|██████████| 81/81 [00:46<00:00,  1.75it/s]
Evaluating: 100%|██████████| 549/549 [01:22<00:00,  6.65it/s]


Training Loss: 0.0675
Validation Metrics:
  F1 Score: 0.5849
  Accuracy: 0.9199
  Precision: 0.6078
  Recall: 0.5636

Epoch 4/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.66it/s]
Evaluating: 100%|██████████| 549/549 [01:31<00:00,  6.00it/s]


Training Loss: 0.0702
Validation Metrics:
  F1 Score: 0.5192
  Accuracy: 0.9089
  Precision: 0.5510
  Recall: 0.4909

Epoch 5/100


Training: 100%|██████████| 81/81 [00:59<00:00,  1.36it/s]
Evaluating: 100%|██████████| 549/549 [01:23<00:00,  6.59it/s]


Training Loss: 0.0344
Validation Metrics:
  F1 Score: 0.5926
  Accuracy: 0.9199
  Precision: 0.6038
  Recall: 0.5818

Epoch 6/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.65it/s]
Evaluating: 100%|██████████| 549/549 [01:30<00:00,  6.03it/s]


Training Loss: 0.0324
Validation Metrics:
  F1 Score: 0.5607
  Accuracy: 0.9144
  Precision: 0.5769
  Recall: 0.5455

Epoch 7/100


Training: 100%|██████████| 81/81 [00:53<00:00,  1.51it/s]
Evaluating: 100%|██████████| 549/549 [01:21<00:00,  6.72it/s]


Training Loss: 0.0268
Validation Metrics:
  F1 Score: 0.5664
  Accuracy: 0.9107
  Precision: 0.5517
  Recall: 0.5818
Epoch 00007: reducing learning rate of group 0 to 1.0000e-05.

Epoch 8/100


Training: 100%|██████████| 81/81 [00:54<00:00,  1.48it/s]
Evaluating: 100%|██████████| 549/549 [01:31<00:00,  5.98it/s]


Training Loss: 0.0125
Validation Metrics:
  F1 Score: 0.5794
  Accuracy: 0.9180
  Precision: 0.5962
  Recall: 0.5636

Epoch 9/100


Training: 100%|██████████| 81/81 [00:46<00:00,  1.74it/s]
Evaluating: 100%|██████████| 549/549 [01:32<00:00,  5.94it/s]


Training Loss: 0.0095
Validation Metrics:
  F1 Score: 0.5872
  Accuracy: 0.9180
  Precision: 0.5926
  Recall: 0.5818

Epoch 10/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.54it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.67it/s]


Training Loss: 0.0098
Validation Metrics:
  F1 Score: 0.5872
  Accuracy: 0.9180
  Precision: 0.5926
  Recall: 0.5818

Epoch 11/100


Training: 100%|██████████| 81/81 [00:47<00:00,  1.71it/s]
Evaluating: 100%|██████████| 549/549 [01:27<00:00,  6.28it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Training Loss: 0.0099
Validation Metrics:
  F1 Score: 0.6000
  Accuracy: 0.9199
  Precision: 0.6000
  Recall: 0.6000
Early stopping triggered
Training completed. Best F1 Score: 0.6184
device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550
Epoch 1/100


Training: 100%|██████████| 81/81 [00:53<00:00,  1.52it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.76it/s]


Training Loss: 0.3091
Validation Metrics:
  F1 Score: 0.7433
  Accuracy: 0.8251
  Precision: 0.6205
  Recall: 0.9267
New best model saved!

Epoch 2/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.59it/s]
Evaluating: 100%|██████████| 549/549 [01:40<00:00,  5.44it/s]


Training Loss: 0.1897
Validation Metrics:
  F1 Score: 0.7762
  Accuracy: 0.8561
  Precision: 0.6749
  Recall: 0.9133
New best model saved!

Epoch 3/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:29<00:00,  6.14it/s]


Training Loss: 0.1362
Validation Metrics:
  F1 Score: 0.7701
  Accuracy: 0.8597
  Precision: 0.6973
  Recall: 0.8600

Epoch 4/100


Training: 100%|██████████| 81/81 [00:56<00:00,  1.43it/s]
Evaluating: 100%|██████████| 549/549 [01:45<00:00,  5.21it/s]


Training Loss: 0.1093
Validation Metrics:
  F1 Score: 0.7664
  Accuracy: 0.8634
  Precision: 0.7193
  Recall: 0.8200

Epoch 5/100


Training: 100%|██████████| 81/81 [00:46<00:00,  1.76it/s]
Evaluating: 100%|██████████| 549/549 [01:34<00:00,  5.83it/s]


Training Loss: 0.0814
Validation Metrics:
  F1 Score: 0.7716
  Accuracy: 0.8652
  Precision: 0.7184
  Recall: 0.8333

Epoch 6/100


Training: 100%|██████████| 81/81 [01:03<00:00,  1.28it/s]
Evaluating: 100%|██████████| 549/549 [01:34<00:00,  5.81it/s]


Training Loss: 0.0615
Validation Metrics:
  F1 Score: 0.7834
  Accuracy: 0.8670
  Precision: 0.7059
  Recall: 0.8800
New best model saved!

Epoch 7/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.61it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.36it/s]


Training Loss: 0.0764
Validation Metrics:
  F1 Score: 0.7732
  Accuracy: 0.8707
  Precision: 0.7423
  Recall: 0.8067

Epoch 8/100


Training: 100%|██████████| 81/81 [00:57<00:00,  1.42it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.77it/s]


Training Loss: 0.0431
Validation Metrics:
  F1 Score: 0.7750
  Accuracy: 0.8689
  Precision: 0.7294
  Recall: 0.8267

Epoch 9/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.65it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.33it/s]


Training Loss: 0.0413
Validation Metrics:
  F1 Score: 0.7628
  Accuracy: 0.8652
  Precision: 0.7346
  Recall: 0.7933

Epoch 10/100


Training: 100%|██████████| 81/81 [00:58<00:00,  1.39it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.73it/s]


Training Loss: 0.0512
Validation Metrics:
  F1 Score: 0.7677
  Accuracy: 0.8743
  Precision: 0.7755
  Recall: 0.7600

Epoch 11/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.66it/s]
Evaluating: 100%|██████████| 549/549 [01:43<00:00,  5.30it/s]


Training Loss: 0.0387
Validation Metrics:
  F1 Score: 0.7566
  Accuracy: 0.8652
  Precision: 0.7468
  Recall: 0.7667

Epoch 12/100


Training: 100%|██████████| 81/81 [01:00<00:00,  1.33it/s]
Evaluating: 100%|██████████| 549/549 [01:37<00:00,  5.65it/s]


Training Loss: 0.0416
Validation Metrics:
  F1 Score: 0.7286
  Accuracy: 0.8616
  Precision: 0.7846
  Recall: 0.6800
Epoch 00012: reducing learning rate of group 0 to 1.0000e-05.

Epoch 13/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.63it/s]
Evaluating: 100%|██████████| 549/549 [01:40<00:00,  5.44it/s]


Training Loss: 0.0334
Validation Metrics:
  F1 Score: 0.7818
  Accuracy: 0.8780
  Precision: 0.7643
  Recall: 0.8000

Epoch 14/100


Training: 100%|██████████| 81/81 [01:00<00:00,  1.33it/s]
Evaluating: 100%|██████████| 549/549 [01:41<00:00,  5.42it/s]


Training Loss: 0.0212
Validation Metrics:
  F1 Score: 0.7682
  Accuracy: 0.8725
  Precision: 0.7632
  Recall: 0.7733

Epoch 15/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.61it/s]
Evaluating: 100%|██████████| 549/549 [01:46<00:00,  5.16it/s]


Training Loss: 0.0194
Validation Metrics:
  F1 Score: 0.7708
  Accuracy: 0.8743
  Precision: 0.7682
  Recall: 0.7733

Epoch 16/100


Training: 100%|██████████| 81/81 [00:54<00:00,  1.47it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.89it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Training Loss: 0.0168
Validation Metrics:
  F1 Score: 0.7682
  Accuracy: 0.8725
  Precision: 0.7632
  Recall: 0.7733
Early stopping triggered
Training completed. Best F1 Score: 0.7834
device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550
Epoch 1/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.66it/s]
Evaluating: 100%|██████████| 549/549 [01:31<00:00,  6.02it/s]


Training Loss: 0.1856
Validation Metrics:
  F1 Score: 0.3636
  Accuracy: 0.9362
  Precision: 0.3846
  Recall: 0.3448
New best model saved!

Epoch 2/100


Training: 100%|██████████| 81/81 [00:58<00:00,  1.39it/s]
Evaluating: 100%|██████████| 549/549 [01:38<00:00,  5.55it/s]


Training Loss: 0.0714
Validation Metrics:
  F1 Score: 0.3704
  Accuracy: 0.9381
  Precision: 0.4000
  Recall: 0.3448
New best model saved!

Epoch 3/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.62it/s]
Evaluating: 100%|██████████| 549/549 [01:31<00:00,  6.00it/s]


Training Loss: 0.0632
Validation Metrics:
  F1 Score: 0.3077
  Accuracy: 0.9344
  Precision: 0.3478
  Recall: 0.2759

Epoch 4/100


Training: 100%|██████████| 81/81 [00:55<00:00,  1.46it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.72it/s]


Training Loss: 0.0348
Validation Metrics:
  F1 Score: 0.4333
  Accuracy: 0.9381
  Precision: 0.4194
  Recall: 0.4483
New best model saved!

Epoch 5/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.64it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.66it/s]


Training Loss: 0.0332
Validation Metrics:
  F1 Score: 0.4127
  Accuracy: 0.9326
  Precision: 0.3824
  Recall: 0.4483

Epoch 6/100


Training: 100%|██████████| 81/81 [00:55<00:00,  1.45it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.71it/s]


Training Loss: 0.0291
Validation Metrics:
  F1 Score: 0.4400
  Accuracy: 0.9490
  Precision: 0.5238
  Recall: 0.3793
New best model saved!

Epoch 7/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.59it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.73it/s]


Training Loss: 0.0273
Validation Metrics:
  F1 Score: 0.2128
  Accuracy: 0.9326
  Precision: 0.2778
  Recall: 0.1724

Epoch 8/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.59it/s]
Evaluating: 100%|██████████| 549/549 [01:39<00:00,  5.53it/s]


Training Loss: 0.0263
Validation Metrics:
  F1 Score: 0.2222
  Accuracy: 0.9490
  Precision: 0.5714
  Recall: 0.1379

Epoch 9/100


Training: 100%|██████████| 81/81 [00:58<00:00,  1.37it/s]
Evaluating: 100%|██████████| 549/549 [01:34<00:00,  5.79it/s]


Training Loss: 0.0155
Validation Metrics:
  F1 Score: 0.3077
  Accuracy: 0.9508
  Precision: 0.6000
  Recall: 0.2069

Epoch 10/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.62it/s]
Evaluating: 100%|██████████| 549/549 [01:39<00:00,  5.51it/s]


Training Loss: 0.0228
Validation Metrics:
  F1 Score: 0.2927
  Accuracy: 0.9472
  Precision: 0.5000
  Recall: 0.2069

Epoch 11/100


Training: 100%|██████████| 81/81 [00:57<00:00,  1.41it/s]
Evaluating: 100%|██████████| 549/549 [01:31<00:00,  6.02it/s]


Training Loss: 0.0232
Validation Metrics:
  F1 Score: 0.3111
  Accuracy: 0.9435
  Precision: 0.4375
  Recall: 0.2414

Epoch 12/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.60it/s]
Evaluating: 100%|██████████| 549/549 [01:39<00:00,  5.49it/s]


Training Loss: 0.0192
Validation Metrics:
  F1 Score: 0.3415
  Accuracy: 0.9508
  Precision: 0.5833
  Recall: 0.2414
Epoch 00012: reducing learning rate of group 0 to 1.0000e-05.

Epoch 13/100


Training: 100%|██████████| 81/81 [01:03<00:00,  1.27it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.90it/s]


Training Loss: 0.0136
Validation Metrics:
  F1 Score: 0.3333
  Accuracy: 0.9490
  Precision: 0.5385
  Recall: 0.2414

Epoch 14/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.86it/s]


Training Loss: 0.0148
Validation Metrics:
  F1 Score: 0.3000
  Accuracy: 0.9490
  Precision: 0.5455
  Recall: 0.2069

Epoch 15/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.70it/s]


Training Loss: 0.0095
Validation Metrics:
  F1 Score: 0.2857
  Accuracy: 0.9454
  Precision: 0.4615
  Recall: 0.2069

Epoch 16/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.59it/s]
Evaluating: 100%|██████████| 549/549 [01:39<00:00,  5.53it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Training Loss: 0.0119
Validation Metrics:
  F1 Score: 0.2857
  Accuracy: 0.9454
  Precision: 0.4615
  Recall: 0.2069
Early stopping triggered
Training completed. Best F1 Score: 0.4400
device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550
Epoch 1/100


Training: 100%|██████████| 81/81 [00:54<00:00,  1.48it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.72it/s]


Training Loss: 0.3001
Validation Metrics:
  F1 Score: 0.5490
  Accuracy: 0.9162
  Precision: 0.4828
  Recall: 0.6364
New best model saved!

Epoch 2/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.68it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.38it/s]


Training Loss: 0.1124
Validation Metrics:
  F1 Score: 0.5641
  Accuracy: 0.9381
  Precision: 0.6471
  Recall: 0.5000
New best model saved!

Epoch 3/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.55it/s]
Evaluating: 100%|██████████| 549/549 [01:29<00:00,  6.10it/s]


Training Loss: 0.0562
Validation Metrics:
  F1 Score: 0.6341
  Accuracy: 0.9454
  Precision: 0.6842
  Recall: 0.5909
New best model saved!

Epoch 4/100


Training: 100%|██████████| 81/81 [00:53<00:00,  1.51it/s]
Evaluating: 100%|██████████| 549/549 [01:43<00:00,  5.29it/s]


Training Loss: 0.0628
Validation Metrics:
  F1 Score: 0.5747
  Accuracy: 0.9326
  Precision: 0.5814
  Recall: 0.5682

Epoch 5/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.63it/s]
Evaluating: 100%|██████████| 549/549 [01:38<00:00,  5.56it/s]


Training Loss: 0.0464
Validation Metrics:
  F1 Score: 0.6582
  Accuracy: 0.9508
  Precision: 0.7429
  Recall: 0.5909
New best model saved!

Epoch 6/100


Training: 100%|██████████| 81/81 [00:53<00:00,  1.51it/s]
Evaluating: 100%|██████████| 549/549 [01:37<00:00,  5.65it/s]


Training Loss: 0.0352
Validation Metrics:
  F1 Score: 0.5789
  Accuracy: 0.9417
  Precision: 0.6875
  Recall: 0.5000

Epoch 7/100


Training: 100%|██████████| 81/81 [00:49<00:00,  1.63it/s]
Evaluating: 100%|██████████| 549/549 [01:46<00:00,  5.17it/s]


Training Loss: 0.0364
Validation Metrics:
  F1 Score: 0.6667
  Accuracy: 0.9508
  Precision: 0.7297
  Recall: 0.6136
New best model saved!

Epoch 8/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.56it/s]
Evaluating: 100%|██████████| 549/549 [01:38<00:00,  5.55it/s]


Training Loss: 0.0196
Validation Metrics:
  F1 Score: 0.6667
  Accuracy: 0.9508
  Precision: 0.7297
  Recall: 0.6136

Epoch 9/100


Training: 100%|██████████| 81/81 [00:50<00:00,  1.61it/s]
Evaluating: 100%|██████████| 549/549 [01:44<00:00,  5.24it/s]


Training Loss: 0.0246
Validation Metrics:
  F1 Score: 0.5263
  Accuracy: 0.9344
  Precision: 0.6250
  Recall: 0.4545

Epoch 10/100


Training: 100%|██████████| 81/81 [00:56<00:00,  1.45it/s]
Evaluating: 100%|██████████| 549/549 [01:35<00:00,  5.77it/s]


Training Loss: 0.0298
Validation Metrics:
  F1 Score: 0.6667
  Accuracy: 0.9490
  Precision: 0.7000
  Recall: 0.6364

Epoch 11/100


Training: 100%|██████████| 81/81 [00:48<00:00,  1.67it/s]
Evaluating: 100%|██████████| 549/549 [01:44<00:00,  5.26it/s]


Training Loss: 0.0277
Validation Metrics:
  F1 Score: 0.6234
  Accuracy: 0.9472
  Precision: 0.7273
  Recall: 0.5455

Epoch 12/100


Training: 100%|██████████| 81/81 [00:51<00:00,  1.57it/s]
Evaluating: 100%|██████████| 549/549 [01:33<00:00,  5.85it/s]


Training Loss: 0.0128
Validation Metrics:
  F1 Score: 0.6234
  Accuracy: 0.9472
  Precision: 0.7273
  Recall: 0.5455

Epoch 13/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.54it/s]
Evaluating: 100%|██████████| 549/549 [01:42<00:00,  5.38it/s]


Training Loss: 0.0252
Validation Metrics:
  F1 Score: 0.6500
  Accuracy: 0.9490
  Precision: 0.7222
  Recall: 0.5909
Epoch 00013: reducing learning rate of group 0 to 1.0000e-05.

Epoch 14/100


Training: 100%|██████████| 81/81 [00:58<00:00,  1.38it/s]
Evaluating: 100%|██████████| 549/549 [01:36<00:00,  5.67it/s]


Training Loss: 0.0124
Validation Metrics:
  F1 Score: 0.6154
  Accuracy: 0.9454
  Precision: 0.7059
  Recall: 0.5455

Epoch 15/100


Training: 100%|██████████| 81/81 [00:54<00:00,  1.47it/s]
Evaluating: 100%|██████████| 549/549 [01:45<00:00,  5.22it/s]


Training Loss: 0.0201
Validation Metrics:
  F1 Score: 0.6329
  Accuracy: 0.9472
  Precision: 0.7143
  Recall: 0.5682

Epoch 16/100


Training: 100%|██████████| 81/81 [00:56<00:00,  1.44it/s]
Evaluating: 100%|██████████| 549/549 [01:38<00:00,  5.55it/s]


Training Loss: 0.0097
Validation Metrics:
  F1 Score: 0.6154
  Accuracy: 0.9454
  Precision: 0.7059
  Recall: 0.5455

Epoch 17/100


Training: 100%|██████████| 81/81 [00:52<00:00,  1.54it/s]
Evaluating: 100%|██████████| 549/549 [01:45<00:00,  5.22it/s]

Training Loss: 0.0109
Validation Metrics:
  F1 Score: 0.6410
  Accuracy: 0.9490
  Precision: 0.7353
  Recall: 0.5682
Early stopping triggered
Training completed. Best F1 Score: 0.6667





In [11]:
start = time.time()
best_mf1 = 0.0
device = torch.device(cfg['DEVICE'])
print("device : ", device)
num_workers = mp.cpu_count()
train_cfg, eval_cfg = cfg['TRAIN'], cfg['EVAL']
dataset_cfg, model_cfg = cfg['DATASET'], cfg['MODEL']
loss_cfg, optim_cfg, sched_cfg = cfg['LOSS'], cfg['OPTIMIZER'], cfg['SCHEDULER']
epochs, lr = train_cfg['EPOCHS'], optim_cfg['LR']

image_size = [256,256]
image_dir = Path(dataset_cfg['ROOT']) / 'train_images'
train_transform = get_train_augmentation(image_size)
val_test_transform = get_val_test_transform(image_size)
batch_size = 32


dataset = eval(dataset_cfg['NAME'])(
    dataset_cfg['ROOT'] + '/combined_images',
    dataset_cfg['TRAIN_RATIO'],
    dataset_cfg['VALID_RATIO'],
    dataset_cfg['TEST_RATIO'],
    transform=None
)
trainset, valset, testset = dataset.get_splits()
trainset.transform = train_transform
valset.transform = val_test_transform
testset.transform = val_test_transform

#trainloader = DataLoader(trainset, batch_size=batch_size, num_workers=num_workers, drop_last=True, pin_memory=True)
valloader = DataLoader(valset, batch_size=1, num_workers=1, pin_memory=True)
testloader = DataLoader(testset, batch_size=1, num_workers=1, pin_memory=True)

device :  cuda:0
/data/public_data/aptos/combined_images
0    1263
2     699
1     259
4     207
3     135
Name: diagnosis, dtype: int64
Train size: 2563
0    271
2    150
1     55
4     44
3     29
Name: diagnosis, dtype: int64
Validation size: 549
0    271
2    150
1     56
4     44
3     29
Name: diagnosis, dtype: int64
Test size: 550


In [13]:
for target_label_idx in range(5):
    # Model definition (changed to binary classification)
    efficientnet = models.efficientnet_v2_m(pretrained=True)
    num_ftrs = efficientnet.classifier[1].in_features
    
    efficientnet.classifier = nn.Sequential(
        nn.BatchNorm1d(num_ftrs),
        nn.Linear(num_ftrs, 1)
    )
    efficientnet = efficientnet.to(device)
    
    # Final evaluation on test set
    efficientnet.load_state_dict(torch.load(f'model/singlelabel_finetuning/best_model_label_{target_label_idx}_aptos_cnn.pth'))
    test_f1, test_acc, test_prec, test_rec = evaluate(efficientnet, testloader, device, target_label_idx)
    print(f"Test Results:")
    print(f"  F1 Score: {test_f1:.4f}")
    print(f"  Accuracy: {test_acc:.4f}")
    print(f"  Precision: {test_prec:.4f}")
    print(f"  Recall: {test_rec:.4f}")

Evaluating: 100%|██████████| 550/550 [01:32<00:00,  5.94it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.9851
  Accuracy: 0.9855
  Precision: 0.9962
  Recall: 0.9742


Evaluating: 100%|██████████| 550/550 [01:36<00:00,  5.72it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.5417
  Accuracy: 0.8800
  Precision: 0.4432
  Recall: 0.6964


Evaluating: 100%|██████████| 550/550 [01:28<00:00,  6.25it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.7600
  Accuracy: 0.8473
  Precision: 0.6650
  Recall: 0.8867


Evaluating: 100%|██████████| 550/550 [01:29<00:00,  6.16it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.4483
  Accuracy: 0.9418
  Precision: 0.4483
  Recall: 0.4483


Evaluating: 100%|██████████| 550/550 [01:28<00:00,  6.21it/s]

Test Results:
  F1 Score: 0.6000
  Accuracy: 0.9418
  Precision: 0.6667
  Recall: 0.5455



