### 싱글모델로 분류

In [1]:

import torch 
import argparse
import yaml
import time
import multiprocessing as mp
import torch.nn.functional as F
from tabulate import tabulate
from tqdm import tqdm
from torch.utils.data import DataLoader
from pathlib import Path
#from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import GradScaler, autocast
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DistributedSampler, RandomSampler
from torch import distributed as dist
from nmc.models import *
from nmc.datasets import * 
from nmc.augmentations import get_train_augmentation, get_val_augmentation
from nmc.losses import get_loss
from nmc.schedulers import get_scheduler
from nmc.optimizers import get_optimizer
from nmc.utils.utils import fix_seeds, setup_cudnn, cleanup_ddp, setup_ddp
from tools.val import evaluate_epi
from nmc.utils.episodic_utils import * 
from scipy.cluster import hierarchy
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from torchvision import models
import torch.nn as nn
from torch.optim import lr_scheduler
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mutual_info_score
from scipy.cluster import hierarchy
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
from torch.utils.data import Dataset, DataLoader, Sampler
from torch.utils.data import Subset
import torch.optim as optim
from torchvision import transforms
from PIL import Image
import cv2
import random

In [2]:
with open('../configs/NMC.yaml') as f:
    cfg = yaml.load(f, Loader=yaml.SafeLoader)
print(cfg)
fix_seeds(3407)
setup_cudnn()
gpu = setup_ddp()
save_dir = Path(cfg['SAVE_DIR'])
save_dir.mkdir(exist_ok=True)
cleanup_ddp()

{'DEVICE': 'cuda:0', 'SAVE_DIR': 'output', 'MODEL': {'NAME': 'EfficientNetV2MModelMulti', 'BACKBONE': 'EfficientNetV2', 'PRETRAINED': '/workspace/jhmoon/nmc_2024/checkpoints/pretrained/tf_efficientnetv2_m_weights.pth', 'UNFREEZE': 'full', 'VERSION': "384_32_loss'"}, 'DATASET': {'NAME': 'NMCDataset', 'ROOT': '/datas/fundus_dataset/NMC', 'TRAIN_RATIO': 0.7, 'VALID_RATIO': 0.15, 'TEST_RATIO': 0.15}, 'TRAIN': {'IMAGE_SIZE': [384, 384], 'BATCH_SIZE': 32, 'EPOCHS': 100, 'EVAL_INTERVAL': 1, 'AMP': False, 'DDP': False}, 'LOSS': {'NAME': 'BCEWithLogitsLoss', 'CLS_WEIGHTS': False}, 'OPTIMIZER': {'NAME': 'adamw', 'LR': 0.1, 'WEIGHT_DECAY': 0.01}, 'SCHEDULER': {'NAME': 'warmuppolylr', 'POWER': 0.9, 'WARMUP': 10, 'WARMUP_RATIO': 0.1}, 'EVAL': {'MODEL_PATH': 'checkpoints/pretrained/FGMaxxVit/FGMaxxVit.FGMaxxVit.NMC.pth', 'IMAGE_SIZE': [384, 384]}, 'TEST': {'MODEL_PATH': 'checkpoints/pretrained/FGMaxxVit/FGMaxxVit.FGMaxxVit.NMC.pth', 'FILE': 'assests/ade', 'IMAGE_SIZE': [384, 384], 'OVERLAY': True}}


In [3]:
# Early Stopping
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_score):
        if self.best_score is None:
            self.best_score = val_score
        elif val_score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_score
            self.counter = 0

In [4]:
def get_train_augmentation(size):
    return transforms.Compose([
        transforms.Resize(size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.Lambda(lambda x: x.float() if x.dtype == torch.uint8 else x),
        transforms.Lambda(lambda x: x / 255.0 if x.max() > 1.0 else x),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def get_val_test_transform(size):
    return transforms.Compose([
        transforms.Resize(size),
        transforms.Lambda(lambda x: x.float() if x.dtype == torch.uint8 else x),
        transforms.Lambda(lambda x: x / 255.0 if x.max() > 1.0 else x),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [5]:
class MultiTargetBalancedBatchSampler(Sampler):
    def __init__(self, dataset, batch_size, target_classes):
        self.dataset = dataset
        self.batch_size = batch_size
        self.target_classes = target_classes
        
        # 데이터셋에서 레이블 추출
        if hasattr(dataset, 'labels'):
            self.labels = dataset.labels
            if isinstance(self.labels, np.ndarray):
                self.labels = torch.from_numpy(self.labels)
        elif hasattr(dataset, 'targets'):
            self.labels = dataset.targets
            if isinstance(self.labels, np.ndarray):
                self.labels = torch.from_numpy(self.labels)
        else:
            try:
                self.labels = [sample[1] for sample in dataset]
                if isinstance(self.labels[0], np.ndarray):
                    self.labels = torch.from_numpy(np.array(self.labels))
                else:
                    self.labels = torch.tensor(self.labels)
            except:
                raise ValueError("Cannot access labels from dataset")
        
        # 각 타겟 클래스와 나머지 클래스의 인덱스 저장
        self.target_indices = {}
        for target in target_classes:
            if len(self.labels.shape) > 1:
                self.target_indices[target] = torch.where(self.labels[:, target] == 1)[0]
            else:
                self.target_indices[target] = torch.where(self.labels == target)[0]
        
        # 나머지 클래스의 인덱스 저장
        if len(self.labels.shape) > 1:
            self.other_indices = torch.where(
                torch.sum(self.labels[:, target_classes], dim=1) == 0)[0]
        else:
            mask = torch.ones_like(self.labels, dtype=torch.bool)
            for target in target_classes:
                mask &= (self.labels != target)
            self.other_indices = torch.where(mask)[0]
        
        # 각 그룹당 샘플 수 계산
        n_groups = len(target_classes) + 1  # 타겟 클래스들 + 나머지
        self.samples_per_group = batch_size // n_groups
        
        self.n_batches = len(self.dataset) // batch_size
        if len(self.dataset) % batch_size != 0:
            self.n_batches += 1
    
    def __iter__(self):
        for _ in range(self.n_batches):
            batch_indices = []
            
            # 각 타겟 클래스에서 샘플링
            for target in self.target_classes:
                target_selected = self.target_indices[target][
                    torch.randint(len(self.target_indices[target]), 
                                (self.samples_per_group,))
                ]
                batch_indices.extend(target_selected.tolist())
            
            # 나머지 클래스들에서 샘플링
            other_selected = self.other_indices[
                torch.randint(len(self.other_indices), 
                            (self.samples_per_group,))
            ]
            batch_indices.extend(other_selected.tolist())
            
            # 배치 셔플
            random.shuffle(batch_indices)
            
            # 배치 크기에 맞게 자르기 (나누어 떨어지지 않는 경우 처리)
            if len(batch_indices) > self.batch_size:
                batch_indices = batch_indices[:self.batch_size]
            
            yield batch_indices
    
    def __len__(self):
        return self.n_batches

In [6]:
def train_epoch(model, dataloader, criterion, optimizer, scaler, device, target_label_idx):
    model.train()
    running_loss = 0.0
    num_targets = len(target_label_idx)

    for images, labels in tqdm(dataloader, desc="Training"):
        images = images.to(device)
        
        if num_targets == 1:
            # 단일 레이블 케이스
            target_labels = labels[:, target_label_idx].to(device)
            
            optimizer.zero_grad()
            
            with autocast(enabled=True):
                outputs = model(images)
                # 차원을 맞춰줌
                outputs = outputs.view(-1)  # or outputs.squeeze()
                target_labels = target_labels.view(-1)  # or target_labels.squeeze()
                loss = criterion(outputs, target_labels)
        else:
            # 다중 레이블 케이스
            target_labels = labels[:, target_label_idx].to(device)
            
            optimizer.zero_grad()
            
            with autocast(enabled=True):
                outputs = model(images)
                loss = criterion(outputs, target_labels)

        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)

In [7]:
def evaluate(model, dataloader, device, target_label_idx):
   model.eval()
   all_preds = []
   all_labels = []
   num_targets = len(target_label_idx)
   
   with torch.no_grad():
       for images, labels in tqdm(dataloader, desc="Evaluating"):
           images = images.to(device)
           
           if num_targets == 1:
               # 단일 레이블 케이스
               target_labels = labels[:, target_label_idx].to(device)
               outputs = model(images)
               
               # 차원 처리
               if len(outputs.shape) == 2:
                   outputs = outputs.squeeze(1)
               
               preds = (torch.sigmoid(outputs) > 0.5).float()
               
               all_preds.append(preds.cpu().numpy().reshape(-1))
               all_labels.append(target_labels.cpu().numpy().reshape(-1))
           
           else:
               # 다중 레이블 케이스
               target_labels = labels[:, target_label_idx].to(device)
               outputs = model(images)
               
               # 각 레이블에 대한 예측
               preds = (torch.sigmoid(outputs) > 0.5).float()
               
               all_preds.append(preds.cpu().numpy())
               all_labels.append(target_labels.cpu().numpy())
   
   # numpy array로 변환
   all_preds = np.concatenate(all_preds)
   all_labels = np.concatenate(all_labels)
   
   if num_targets == 1:
       # 단일 레이블 메트릭
       f1 = f1_score(all_labels, all_preds, average='binary')
       accuracy = accuracy_score(all_labels, all_preds)
       precision = precision_score(all_labels, all_preds)
       recall = recall_score(all_labels, all_preds)
       
       return f1, accuracy, precision, recall
   else:
       # 다중 레이블 메트릭
       f1 = f1_score(all_labels, all_preds, average='macro')
       accuracy = accuracy_score(all_labels, all_preds)
       precision = precision_score(all_labels, all_preds, average='macro')
       recall = recall_score(all_labels, all_preds, average='macro')
       
       return f1, accuracy, precision, recall

In [8]:
def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, scaler, device, epochs, target_label_idx):
    best_f1 = 0.0
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)
    num_targets = len(target_label_idx)
    
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        
        train_loss = train_epoch(model, train_loader, criterion, optimizer, scaler, device, target_label_idx)
        
        if num_targets == 1:
            val_f1, val_acc, val_prec, val_rec = evaluate(model, val_loader, device, target_label_idx)
            
            print(f"Training Loss: {train_loss:.4f}")
            print(f"Validation Metrics:")
            print(f"  F1 Score: {val_f1:.4f}")
            print(f"  Accuracy: {val_acc:.4f}") 
            print(f"  Precision: {val_prec:.4f}")
            print(f"  Recall: {val_rec:.4f}")
            
            scheduler.step(val_f1)
            
            if val_f1 > best_f1:
                best_f1 = val_f1
                torch.save(model.state_dict(), f'model/singlelabel/best_model_label_{target_label_idx[0]}_nmc_cnn.pth')
                print("New best model saved!")
        else:
            # 모든 메트릭을 받아서 f1만 사용
            val_f1, val_acc, val_prec, val_rec = evaluate(model, val_loader, device, target_label_idx)
            
            print(f"Training Loss: {train_loss:.4f}")
            print(f"Validation Metrics:")
            print(f"  Macro F1 Score: {val_f1:.4f}")
            print(f"  Macro Accuracy: {val_acc:.4f}")
            print(f"  Macro Precision: {val_prec:.4f}")
            print(f"  Macro Recall: {val_rec:.4f}")
            
            scheduler.step(val_f1)
            
            if val_f1 > best_f1:
                best_f1 = val_f1
                torch.save(model.state_dict(), f'model/singlelabel/best_model_labels_{"-".join(map(str,target_label_idx))}_nmc_cnn.pth')
                print("New best model saved!")
        
        early_stopping(val_f1)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break
            
        print()
    
    return best_f1

In [10]:
ncm_aptos_labels = [[0],[2],[1],[1,2],[5,6]]
for target_label_idx in ncm_aptos_labels:
    if target_label_idx == [0] or target_label_idx == [2] or target_label_idx == [1]:
        continue
    start = time.time()
    best_mf1 = 0.0
    device = torch.device(cfg['DEVICE'])
    print("device : ", device)
    num_workers = mp.cpu_count()
    train_cfg, eval_cfg = cfg['TRAIN'], cfg['EVAL']
    dataset_cfg, model_cfg = cfg['DATASET'], cfg['MODEL']
    loss_cfg, optim_cfg, sched_cfg = cfg['LOSS'], cfg['OPTIMIZER'], cfg['SCHEDULER']
    epochs, lr = train_cfg['EPOCHS'], optim_cfg['LR']

    image_size = [256,256]
    image_dir = Path(dataset_cfg['ROOT']) / 'train_images'
    train_transform = get_train_augmentation(image_size)
    val_test_transform = get_val_test_transform(image_size)
    batch_size = 32


    dataset = eval(dataset_cfg['NAME'])(
        dataset_cfg['ROOT'] + '/cropped_images',
        dataset_cfg['TRAIN_RATIO'],
        dataset_cfg['VALID_RATIO'],
        dataset_cfg['TEST_RATIO'],
        transform=None
    )
    trainset, valset, testset = dataset.get_splits()
    trainset.transform = train_transform
    valset.transform = val_test_transform
    testset.transform = val_test_transform



    # DataLoader 수정
    trainloader = DataLoader(
        trainset, 
        batch_sampler=MultiTargetBalancedBatchSampler(trainset, batch_size=batch_size, target_classes =target_label_idx),
        num_workers=num_workers,
        pin_memory=True
    )
    # trainloader = DataLoader(trainset, batch_size=batch_size, num_workers=num_workers, drop_last=True, pin_memory=True)
    valloader = DataLoader(valset, batch_size=1, num_workers=1, pin_memory=True)
    testloader = DataLoader(testset, batch_size=1, num_workers=1, pin_memory=True)
    
    # Model definition (changed to binary classification)
    efficientnet = models.efficientnet_v2_m(pretrained=True)
    num_ftrs = efficientnet.classifier[1].in_features
    num_targets = len(target_label_idx)
    
    
    if num_targets == 1:
        # 단일 레이블 케이스 (기존 코드와 동일)
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, 1)
        )
    else:
        # 다중 레이블 케이스
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, num_targets)
        )
    efficientnet = efficientnet.to(device)
    
    # L2 regularization
    weight_decay = 1e-4
    optimizer = torch.optim.AdamW(efficientnet.parameters(), lr=0.0001, weight_decay=weight_decay)
    criterion = nn.BCEWithLogitsLoss()
    scaler = GradScaler(enabled=train_cfg['AMP'])
    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)

    # Main execution code
    epochs = 100


    # best_f1 = train_and_evaluate(
    #     efficientnet, 
    #     trainloader, 
    #     valloader, 
    #     criterion, 
    #     optimizer, 
    #     scaler, 
    #     device, 
    #     epochs,
    #     target_label_idx
    # )

    print(f"Training completed. Best F1 Score: {best_f1:.4f}")

device :  cuda:0
/datas/fundus_dataset/NMC/cropped_images
label
(0,)               1935
(3,)                538
(1, 2, 3)           533
(1, 2)              288
(2,)                229
(1, 2, 3, 4)        186
(2, 3)              164
(1,)                151
(4,)                 40
(1, 3)               32
(1, 2, 4)            26
(3, 4)               26
(1, 4)               13
(2, 4)               11
(1, 2, 3, 4, 5)      11
(5,)                  9
(2, 3, 4)             9
(1, 2, 3, 5)          8
(1, 2, 5)             7
(1, 2, 3, 5, 6)       5
(1, 2, 3, 6)          4
(1, 3, 4)             2
(1, 3, 6)             1
(6,)                  1
(1, 2, 6)             1
(1, 2, 3, 4, 6)       1
Name: count, dtype: int64
train size: 4231
label
(0,)            415
(1, 2, 3)       112
(3,)            112
(1, 2)           69
(1, 2, 3, 4)     45
(2,)             43
(2, 3)           39
(1,)             26
(1, 3)           10
(4,)              9
(3, 4)            5
(5,)              5
(2, 4)            5
(1,



NameError: name 'best_f1' is not defined

In [13]:
def evaluate(model, dataloader, device, target_label_idx):
   model.eval()
   all_preds = []
   all_labels = []
   num_targets = len(target_label_idx)
   
   with torch.no_grad():
       for images, labels in tqdm(dataloader, desc="Evaluating"):
           images = images.to(device)
           
           if num_targets == 1:
               # 단일 레이블 케이스
               target_labels = labels[:, target_label_idx].to(device)
               outputs = model(images)
               
               # 차원 처리
               if len(outputs.shape) == 2:
                   outputs = outputs.squeeze(1)
               
               preds = (torch.sigmoid(outputs) > 0.5).float()
               
               all_preds.append(preds.cpu().numpy().reshape(-1))
               all_labels.append(target_labels.cpu().numpy().reshape(-1))
           
           else:
               # 다중 레이블 케이스
               target_labels = labels[:, target_label_idx].to(device)
               outputs = model(images)
               
               # 각 레이블에 대한 예측
               preds = (torch.sigmoid(outputs) > 0.5).float()
               
               all_preds.append(preds.cpu().numpy())
               all_labels.append(target_labels.cpu().numpy())
   
   # numpy array로 변환
   all_preds = np.concatenate(all_preds)
   all_labels = np.concatenate(all_labels)
   
   if num_targets == 1:
       # 단일 레이블 메트릭
       f1 = f1_score(all_labels, all_preds, average='binary')
       accuracy = accuracy_score(all_labels, all_preds)
       precision = precision_score(all_labels, all_preds)
       recall = recall_score(all_labels, all_preds)
       
       return f1, accuracy, precision, recall
   else:
       # 다중 레이블 메트릭
       # 클래스별 F1 점수 계산
       class_f1s = []
       for i in range(num_targets):
           class_f1 = f1_score(all_labels[:, i], all_preds[:, i], average='binary')
           class_f1s.append(class_f1)
           precision = precision_score(all_labels[:, i], all_preds[:, i])
           recall = recall_score(all_labels[:, i], all_preds[:, i])
           print(f"Class {target_label_idx[i]} F1: {class_f1:.4f}")
           print("precision: ", precision)
           print("recal: ",recall)       
       # 전체 평균 메트릭
       macro_f1 = np.mean(class_f1s)
       accuracy = accuracy_score(all_labels, all_preds)
       precision = precision_score(all_labels, all_preds, average='macro')
       recall = recall_score(all_labels, all_preds, average='macro')
       
       return macro_f1, accuracy, precision, recall

In [15]:
ncm_aptos_labels = [[0],[2],[1],[1,2],[5,6]]
for target_label_idx in ncm_aptos_labels:
    # Model definition (changed to binary classification)
    efficientnet = models.efficientnet_v2_m(pretrained=True)
    num_ftrs = efficientnet.classifier[1].in_features
    num_targets = len(target_label_idx)
    
    
    if num_targets == 1:
        # 단일 레이블 케이스 (기존 코드와 동일)
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, 1)
        )
    else:
        # 다중 레이블 케이스
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, num_targets)
        )
    efficientnet = efficientnet.to(device)
    
    if len(target_label_idx)==1:
        # Final evaluation on test set
        efficientnet.load_state_dict(torch.load(f'model/singlelabel/best_model_label_{target_label_idx[0]}_nmc_cnn.pth'))
    else:
        efficientnet.load_state_dict(torch.load(f'model/singlelabel/best_model_labels_{"-".join(map(str,target_label_idx))}_nmc_cnn.pth'))
    test_f1, test_acc, test_prec, test_rec = evaluate(efficientnet, testloader, device, target_label_idx)
    print(f"Test Results:")
    print(f"  F1 Score: {test_f1:.4f}")
    print(f"  Accuracy: {test_acc:.4f}")
    print(f"  Precision: {test_prec:.4f}")
    print(f"  Recall: {test_rec:.4f}")

Evaluating: 100%|██████████| 907/907 [00:40<00:00, 22.21it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.8921
  Accuracy: 0.9008
  Precision: 0.8878
  Recall: 0.8964


Evaluating: 100%|██████████| 907/907 [00:42<00:00, 21.46it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.8644
  Accuracy: 0.9052
  Precision: 0.8671
  Recall: 0.8616


Evaluating: 100%|██████████| 907/907 [00:42<00:00, 21.48it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Test Results:
  F1 Score: 0.8535
  Accuracy: 0.9107
  Precision: 0.8399
  Recall: 0.8676


Evaluating: 100%|██████████| 907/907 [00:41<00:00, 21.91it/s]
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Class 1 F1: 0.8491
Class 2 F1: 0.8545
Test Results:
  F1 Score: 0.8518
  Accuracy: 0.8379
  Precision: 0.8613
  Recall: 0.8428


Evaluating: 100%|██████████| 907/907 [00:42<00:00, 21.58it/s]


Class 5 F1: 0.4000
Class 6 F1: 0.0000
Test Results:
  F1 Score: 0.2000
  Accuracy: 0.9901
  Precision: 0.5000
  Recall: 0.1250


In [14]:
ncm_aptos_labels = [[0],[2],[1],[1,2],[5,6]]
for target_label_idx in ncm_aptos_labels:
    # Model definition (changed to binary classification)
    efficientnet = models.efficientnet_v2_m(pretrained=True)
    num_ftrs = efficientnet.classifier[1].in_features
    num_targets = len(target_label_idx)
    
    
    if num_targets == 1:
        # 단일 레이블 케이스 (기존 코드와 동일)
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, 1)
        )
    else:
        # 다중 레이블 케이스
        efficientnet.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Linear(num_ftrs, num_targets)
        )
    efficientnet = efficientnet.to(device)
    
    if len(target_label_idx)==1:
        # Final evaluation on test set
        efficientnet.load_state_dict(torch.load(f'model/singlelabel/best_model_label_{target_label_idx[0]}_nmc_cnn.pth'))
    else:
        efficientnet.load_state_dict(torch.load(f'model/singlelabel/best_model_labels_{"-".join(map(str,target_label_idx))}_nmc_cnn.pth'))
    test_f1, test_acc, test_prec, test_rec = evaluate(efficientnet, testloader, device, target_label_idx)
    print(f"Test Results:")
    print(f"  F1 Score: {test_f1:.4f}")
    print(f"  Accuracy: {test_acc:.4f}")
    print(f"  Precision: {test_prec:.4f}")
    print(f"  Recall: {test_rec:.4f}")

Evaluating: 100%|██████████| 916/916 [00:30<00:00, 29.71it/s]


Test Results:
  F1 Score: 0.9173
  Accuracy: 0.9258
  Precision: 0.9263
  Recall: 0.9084


Evaluating: 100%|██████████| 916/916 [00:28<00:00, 31.89it/s]


Test Results:
  F1 Score: 0.8858
  Accuracy: 0.9203
  Precision: 0.8816
  Recall: 0.8899


Evaluating:  28%|██▊       | 261/916 [00:08<00:21, 31.06it/s]


KeyboardInterrupt: 