# 🧪 Đánh giá tất cả Models trên Test Set

Notebook này thực hiện đánh giá tất cả các models đã train trên test set:

## 🎯 Models được đánh giá:
- **SegFormer**: Transformer-based segmentation
- **U-Net EfficientNet**: CNN với EfficientNet backbone
- **ViT Segmentation**: Pure Vision Transformer
- **DeepLabV3+ ResNet**: Atrous convolution với ResNet

## 📊 Metrics:
- **Dice Coefficient**: Overlap measure
- **Jaccard Index (IoU)**: Intersection over Union
- **Test Loss**: Combined BCE + Dice loss
- **Statistical Analysis**: Mean, std, distribution

## 📈 Output:
- **JSON Results**: Detailed metrics cho từng model
- **Comparison Table**: So sánh performance
- **Visualizations**: Sample predictions và charts

In [1]:
# Import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
import json
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Deep learning libraries
import timm
import segmentation_models_pytorch as smp
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

# Data loading
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


In [2]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

if device.type == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

Using device: cuda
GPU: NVIDIA GeForce RTX 5070 Ti
Memory: 15.5 GB


In [3]:
# Dataset class (same as training)
class SkinLesionDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        
        # Get all image files
        self.image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        # Load image
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        image = np.array(Image.open(img_path).convert('RGB'))
        
        # Load mask
        mask_name = img_name.replace('.jpg', '_segmentation.png')
        mask_path = os.path.join(self.masks_dir, mask_name)
        mask = np.array(Image.open(mask_path).convert('L'))
        mask = (mask > 127).astype(np.float32)  # Binary threshold
        
        # Apply transforms
        if self.transform:
            transformed = self.transform(image=image, mask=mask)
            image = transformed['image']
            mask = transformed['mask']
        
        return image, mask

# Test transforms (same as training)
test_transform = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# Create test dataset
test_dataset = SkinLesionDataset(
    images_dir='data/test/images',
    masks_dir='data/test/masks',
    transform=test_transform
)

test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

print(f"✅ Test dataset loaded: {len(test_dataset)} samples")
print(f"   - Test batches: {len(test_loader)}")

✅ Test dataset loaded: 1000 samples
   - Test batches: 250


In [4]:
# Loss functions và metrics (consistent với training)
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-6):
        super().__init__()
        self.smooth = smooth
        
    def forward(self, pred, target):
        pred = pred.contiguous().view(-1)
        target = target.contiguous().view(-1)
        
        intersection = (pred * target).sum()
        dice = (2. * intersection + self.smooth) / (pred.sum() + target.sum() + self.smooth)
        
        return 1 - dice

class CombinedLoss(nn.Module):
    def __init__(self, alpha=0.5):
        super().__init__()
        self.alpha = alpha
        self.bce = nn.BCEWithLogitsLoss()  # Safe for both logits and probabilities
        self.dice = DiceLoss()
        
    def forward(self, pred, target):
        # Handle shape differences
        if pred.dim() == 4 and target.dim() == 3:
            pred = pred.squeeze(1)
        elif pred.dim() == 3 and target.dim() == 4:
            target = target.squeeze(1)
        
        # Handle both logits and probabilities
        if pred.min() < 0 or pred.max() > 1:
            # Logits: use directly for BCE, convert for Dice
            bce_loss = self.bce(pred, target)
            dice_pred = torch.sigmoid(pred)
            dice_loss = self.dice(dice_pred, target)
        else:
            # Probabilities: convert to logits for BCE
            pred_logits = torch.logit(pred.clamp(1e-7, 1-1e-7))
            bce_loss = self.bce(pred_logits, target)
            dice_loss = self.dice(pred, target)
        
        return self.alpha * bce_loss + (1 - self.alpha) * dice_loss

def calculate_dice_batch(pred, target, threshold=0.5):
    """Calculate Dice coefficient for a batch"""
    # Convert to probabilities if needed
    if pred.min() < 0 or pred.max() > 1:
        pred = torch.sigmoid(pred)
    
    # Handle shape differences
    if pred.dim() == 4 and target.dim() == 3:
        pred = pred.squeeze(1)
    elif pred.dim() == 3 and target.dim() == 4:
        target = target.squeeze(1)
    
    pred_binary = (pred > threshold).float()
    target_binary = target.float()
    
    intersection = (pred_binary * target_binary).sum()
    dice = (2. * intersection) / (pred_binary.sum() + target_binary.sum() + 1e-6)
    
    return dice.item()

def calculate_jaccard_batch(pred, target, threshold=0.5):
    """Calculate Jaccard Index (IoU) for a batch"""
    # Convert to probabilities if needed
    if pred.min() < 0 or pred.max() > 1:
        pred = torch.sigmoid(pred)
    
    # Handle shape differences
    if pred.dim() == 4 and target.dim() == 3:
        pred = pred.squeeze(1)
    elif pred.dim() == 3 and target.dim() == 4:
        target = target.squeeze(1)
    
    pred_bool = (pred > threshold).bool()
    target_bool = target.bool()
    
    intersection = (pred_bool & target_bool).float().sum()
    union = (pred_bool | target_bool).float().sum()
    
    jaccard = intersection / (union + 1e-6)
    return jaccard.item()

print("✅ Loss functions và metrics defined!")

✅ Loss functions và metrics defined!


In [5]:
# Evaluation function
def evaluate_model(model, test_loader, model_name='Model'):
    """Comprehensive evaluation on test set"""
    model.eval()
    
    test_loss = 0.0
    all_dice_scores = []
    all_jaccard_scores = []
    
    criterion = CombinedLoss(alpha=0.5)
    
    print(f'🧪 Evaluating {model_name} on test set...')
    print('=' * 60)
    
    with torch.no_grad():
        test_pbar = tqdm(test_loader, desc=f'Testing {model_name}')
        for images, masks in test_pbar:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)
            
            test_loss += loss.item()
            
            # Calculate metrics for each sample in batch
            for i in range(images.shape[0]):
                # Extract single samples
                mask_sample = masks[i:i+1]
                output_sample = outputs[i:i+1]
                
                dice = calculate_dice_batch(output_sample, mask_sample)
                jaccard = calculate_jaccard_batch(output_sample, mask_sample)
                
                all_dice_scores.append(dice)
                all_jaccard_scores.append(jaccard)
            
            # Update progress bar
            test_pbar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Dice': f'{np.mean(all_dice_scores[-images.shape[0]:]):.4f}',
                'IoU': f'{np.mean(all_jaccard_scores[-images.shape[0]:]):.4f}'
            })
    
    # Calculate final metrics
    avg_test_loss = test_loss / len(test_loader)
    avg_dice = np.mean(all_dice_scores)
    std_dice = np.std(all_dice_scores)
    avg_jaccard = np.mean(all_jaccard_scores)
    std_jaccard = np.std(all_jaccard_scores)
    
    # Print results
    print(f'\n📊 {model_name} Test Results:')
    print(f'   - Test Loss: {avg_test_loss:.4f}')
    print(f'   - Dice Score: {avg_dice:.4f} ± {std_dice:.4f}')
    print(f'   - Jaccard (IoU): {avg_jaccard:.4f} ± {std_jaccard:.4f}')
    print(f'   - Samples evaluated: {len(all_dice_scores)}')
    
    return {
        'model_name': model_name,
        'test_loss': avg_test_loss,
        'dice_mean': avg_dice,
        'dice_std': std_dice,
        'jaccard_mean': avg_jaccard,
        'jaccard_std': std_jaccard,
        'dice_scores': all_dice_scores,
        'jaccard_scores': all_jaccard_scores,
        'num_samples': len(all_dice_scores)
    }

print("✅ Evaluation function defined!")

✅ Evaluation function defined!


## 🎯 Model 1: SegFormer

In [6]:
# SegFormer Model Evaluation
print("\n" + "="*50)
print("🔍 EVALUATING SEGFORMER MODEL")
print("="*50)

if os.path.exists('models/segformer_model_best.pth'):
    print('🔄 Loading SegFormer model...')
    
    # Load SegFormer (same as training)
    segformer_model = SegformerForSemanticSegmentation.from_pretrained(
        "nvidia/segformer-b0-finetuned-ade-512-512",
        num_labels=1,
        ignore_mismatched_sizes=True
    ).to(device)
    
    # Load state dict with prefix handling
    state_dict = torch.load('models/segformer_model_best.pth')
    
    # Handle potential prefix mismatch
    if any(key.startswith('segformer.segformer.') for key in state_dict.keys()):
        # Remove extra 'segformer.' prefix
        new_state_dict = {}
        for key, value in state_dict.items():
            if key.startswith('segformer.segformer.'):
                new_key = key.replace('segformer.segformer.', 'segformer.')
                new_state_dict[new_key] = value
            elif key.startswith('segformer.decode_head.'):
                new_key = key.replace('segformer.decode_head.', 'decode_head.')
                new_state_dict[new_key] = value
            else:
                new_state_dict[key] = value
        state_dict = new_state_dict
    
    segformer_model.load_state_dict(state_dict)
    segformer_model.eval()
    
    print('✅ SegFormer model loaded successfully!')
    
    # Evaluate
    segformer_results = evaluate_model(segformer_model, test_loader, 'SegFormer')
    
    # Save results
    with open('models/segformer_test_results.json', 'w') as f:
        results_to_save = segformer_results.copy()
        results_to_save['dice_scores'] = [float(x) for x in segformer_results['dice_scores']]
        results_to_save['jaccard_scores'] = [float(x) for x in segformer_results['jaccard_scores']]
        json.dump(results_to_save, f, indent=2)
    
    print('💾 SegFormer results saved!')
else:
    print('❌ SegFormer model not found. Please train it first.')
    segformer_results = None


🔍 EVALUATING SEGFORMER MODEL
🔄 Loading SegFormer model...


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([1]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([1, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: Error(s) in loading state_dict for SegformerForSemanticSegmentation:
	Missing key(s) in state_dict: "segformer.encoder.patch_embeddings.0.proj.weight", "segformer.encoder.patch_embeddings.0.proj.bias", "segformer.encoder.patch_embeddings.0.layer_norm.weight", "segformer.encoder.patch_embeddings.0.layer_norm.bias", "segformer.encoder.patch_embeddings.1.proj.weight", "segformer.encoder.patch_embeddings.1.proj.bias", "segformer.encoder.patch_embeddings.1.layer_norm.weight", "segformer.encoder.patch_embeddings.1.layer_norm.bias", "segformer.encoder.patch_embeddings.2.proj.weight", "segformer.encoder.patch_embeddings.2.proj.bias", "segformer.encoder.patch_embeddings.2.layer_norm.weight", "segformer.encoder.patch_embeddings.2.layer_norm.bias", "segformer.encoder.patch_embeddings.3.proj.weight", "segformer.encoder.patch_embeddings.3.proj.bias", "segformer.encoder.patch_embeddings.3.layer_norm.weight", "segformer.encoder.patch_embeddings.3.layer_norm.bias", "segformer.encoder.block.0.0.layer_norm_1.weight", "segformer.encoder.block.0.0.layer_norm_1.bias", "segformer.encoder.block.0.0.attention.self.query.weight", "segformer.encoder.block.0.0.attention.self.query.bias", "segformer.encoder.block.0.0.attention.self.key.weight", "segformer.encoder.block.0.0.attention.self.key.bias", "segformer.encoder.block.0.0.attention.self.value.weight", "segformer.encoder.block.0.0.attention.self.value.bias", "segformer.encoder.block.0.0.attention.self.sr.weight", "segformer.encoder.block.0.0.attention.self.sr.bias", "segformer.encoder.block.0.0.attention.self.layer_norm.weight", "segformer.encoder.block.0.0.attention.self.layer_norm.bias", "segformer.encoder.block.0.0.attention.output.dense.weight", "segformer.encoder.block.0.0.attention.output.dense.bias", "segformer.encoder.block.0.0.layer_norm_2.weight", "segformer.encoder.block.0.0.layer_norm_2.bias", "segformer.encoder.block.0.0.mlp.dense1.weight", "segformer.encoder.block.0.0.mlp.dense1.bias", "segformer.encoder.block.0.0.mlp.dwconv.dwconv.weight", "segformer.encoder.block.0.0.mlp.dwconv.dwconv.bias", "segformer.encoder.block.0.0.mlp.dense2.weight", "segformer.encoder.block.0.0.mlp.dense2.bias", "segformer.encoder.block.0.1.layer_norm_1.weight", "segformer.encoder.block.0.1.layer_norm_1.bias", "segformer.encoder.block.0.1.attention.self.query.weight", "segformer.encoder.block.0.1.attention.self.query.bias", "segformer.encoder.block.0.1.attention.self.key.weight", "segformer.encoder.block.0.1.attention.self.key.bias", "segformer.encoder.block.0.1.attention.self.value.weight", "segformer.encoder.block.0.1.attention.self.value.bias", "segformer.encoder.block.0.1.attention.self.sr.weight", "segformer.encoder.block.0.1.attention.self.sr.bias", "segformer.encoder.block.0.1.attention.self.layer_norm.weight", "segformer.encoder.block.0.1.attention.self.layer_norm.bias", "segformer.encoder.block.0.1.attention.output.dense.weight", "segformer.encoder.block.0.1.attention.output.dense.bias", "segformer.encoder.block.0.1.layer_norm_2.weight", "segformer.encoder.block.0.1.layer_norm_2.bias", "segformer.encoder.block.0.1.mlp.dense1.weight", "segformer.encoder.block.0.1.mlp.dense1.bias", "segformer.encoder.block.0.1.mlp.dwconv.dwconv.weight", "segformer.encoder.block.0.1.mlp.dwconv.dwconv.bias", "segformer.encoder.block.0.1.mlp.dense2.weight", "segformer.encoder.block.0.1.mlp.dense2.bias", "segformer.encoder.block.1.0.layer_norm_1.weight", "segformer.encoder.block.1.0.layer_norm_1.bias", "segformer.encoder.block.1.0.attention.self.query.weight", "segformer.encoder.block.1.0.attention.self.query.bias", "segformer.encoder.block.1.0.attention.self.key.weight", "segformer.encoder.block.1.0.attention.self.key.bias", "segformer.encoder.block.1.0.attention.self.value.weight", "segformer.encoder.block.1.0.attention.self.value.bias", "segformer.encoder.block.1.0.attention.self.sr.weight", "segformer.encoder.block.1.0.attention.self.sr.bias", "segformer.encoder.block.1.0.attention.self.layer_norm.weight", "segformer.encoder.block.1.0.attention.self.layer_norm.bias", "segformer.encoder.block.1.0.attention.output.dense.weight", "segformer.encoder.block.1.0.attention.output.dense.bias", "segformer.encoder.block.1.0.layer_norm_2.weight", "segformer.encoder.block.1.0.layer_norm_2.bias", "segformer.encoder.block.1.0.mlp.dense1.weight", "segformer.encoder.block.1.0.mlp.dense1.bias", "segformer.encoder.block.1.0.mlp.dwconv.dwconv.weight", "segformer.encoder.block.1.0.mlp.dwconv.dwconv.bias", "segformer.encoder.block.1.0.mlp.dense2.weight", "segformer.encoder.block.1.0.mlp.dense2.bias", "segformer.encoder.block.1.1.layer_norm_1.weight", "segformer.encoder.block.1.1.layer_norm_1.bias", "segformer.encoder.block.1.1.attention.self.query.weight", "segformer.encoder.block.1.1.attention.self.query.bias", "segformer.encoder.block.1.1.attention.self.key.weight", "segformer.encoder.block.1.1.attention.self.key.bias", "segformer.encoder.block.1.1.attention.self.value.weight", "segformer.encoder.block.1.1.attention.self.value.bias", "segformer.encoder.block.1.1.attention.self.sr.weight", "segformer.encoder.block.1.1.attention.self.sr.bias", "segformer.encoder.block.1.1.attention.self.layer_norm.weight", "segformer.encoder.block.1.1.attention.self.layer_norm.bias", "segformer.encoder.block.1.1.attention.output.dense.weight", "segformer.encoder.block.1.1.attention.output.dense.bias", "segformer.encoder.block.1.1.layer_norm_2.weight", "segformer.encoder.block.1.1.layer_norm_2.bias", "segformer.encoder.block.1.1.mlp.dense1.weight", "segformer.encoder.block.1.1.mlp.dense1.bias", "segformer.encoder.block.1.1.mlp.dwconv.dwconv.weight", "segformer.encoder.block.1.1.mlp.dwconv.dwconv.bias", "segformer.encoder.block.1.1.mlp.dense2.weight", "segformer.encoder.block.1.1.mlp.dense2.bias", "segformer.encoder.block.2.0.layer_norm_1.weight", "segformer.encoder.block.2.0.layer_norm_1.bias", "segformer.encoder.block.2.0.attention.self.query.weight", "segformer.encoder.block.2.0.attention.self.query.bias", "segformer.encoder.block.2.0.attention.self.key.weight", "segformer.encoder.block.2.0.attention.self.key.bias", "segformer.encoder.block.2.0.attention.self.value.weight", "segformer.encoder.block.2.0.attention.self.value.bias", "segformer.encoder.block.2.0.attention.self.sr.weight", "segformer.encoder.block.2.0.attention.self.sr.bias", "segformer.encoder.block.2.0.attention.self.layer_norm.weight", "segformer.encoder.block.2.0.attention.self.layer_norm.bias", "segformer.encoder.block.2.0.attention.output.dense.weight", "segformer.encoder.block.2.0.attention.output.dense.bias", "segformer.encoder.block.2.0.layer_norm_2.weight", "segformer.encoder.block.2.0.layer_norm_2.bias", "segformer.encoder.block.2.0.mlp.dense1.weight", "segformer.encoder.block.2.0.mlp.dense1.bias", "segformer.encoder.block.2.0.mlp.dwconv.dwconv.weight", "segformer.encoder.block.2.0.mlp.dwconv.dwconv.bias", "segformer.encoder.block.2.0.mlp.dense2.weight", "segformer.encoder.block.2.0.mlp.dense2.bias", "segformer.encoder.block.2.1.layer_norm_1.weight", "segformer.encoder.block.2.1.layer_norm_1.bias", "segformer.encoder.block.2.1.attention.self.query.weight", "segformer.encoder.block.2.1.attention.self.query.bias", "segformer.encoder.block.2.1.attention.self.key.weight", "segformer.encoder.block.2.1.attention.self.key.bias", "segformer.encoder.block.2.1.attention.self.value.weight", "segformer.encoder.block.2.1.attention.self.value.bias", "segformer.encoder.block.2.1.attention.self.sr.weight", "segformer.encoder.block.2.1.attention.self.sr.bias", "segformer.encoder.block.2.1.attention.self.layer_norm.weight", "segformer.encoder.block.2.1.attention.self.layer_norm.bias", "segformer.encoder.block.2.1.attention.output.dense.weight", "segformer.encoder.block.2.1.attention.output.dense.bias", "segformer.encoder.block.2.1.layer_norm_2.weight", "segformer.encoder.block.2.1.layer_norm_2.bias", "segformer.encoder.block.2.1.mlp.dense1.weight", "segformer.encoder.block.2.1.mlp.dense1.bias", "segformer.encoder.block.2.1.mlp.dwconv.dwconv.weight", "segformer.encoder.block.2.1.mlp.dwconv.dwconv.bias", "segformer.encoder.block.2.1.mlp.dense2.weight", "segformer.encoder.block.2.1.mlp.dense2.bias", "segformer.encoder.block.3.0.layer_norm_1.weight", "segformer.encoder.block.3.0.layer_norm_1.bias", "segformer.encoder.block.3.0.attention.self.query.weight", "segformer.encoder.block.3.0.attention.self.query.bias", "segformer.encoder.block.3.0.attention.self.key.weight", "segformer.encoder.block.3.0.attention.self.key.bias", "segformer.encoder.block.3.0.attention.self.value.weight", "segformer.encoder.block.3.0.attention.self.value.bias", "segformer.encoder.block.3.0.attention.output.dense.weight", "segformer.encoder.block.3.0.attention.output.dense.bias", "segformer.encoder.block.3.0.layer_norm_2.weight", "segformer.encoder.block.3.0.layer_norm_2.bias", "segformer.encoder.block.3.0.mlp.dense1.weight", "segformer.encoder.block.3.0.mlp.dense1.bias", "segformer.encoder.block.3.0.mlp.dwconv.dwconv.weight", "segformer.encoder.block.3.0.mlp.dwconv.dwconv.bias", "segformer.encoder.block.3.0.mlp.dense2.weight", "segformer.encoder.block.3.0.mlp.dense2.bias", "segformer.encoder.block.3.1.layer_norm_1.weight", "segformer.encoder.block.3.1.layer_norm_1.bias", "segformer.encoder.block.3.1.attention.self.query.weight", "segformer.encoder.block.3.1.attention.self.query.bias", "segformer.encoder.block.3.1.attention.self.key.weight", "segformer.encoder.block.3.1.attention.self.key.bias", "segformer.encoder.block.3.1.attention.self.value.weight", "segformer.encoder.block.3.1.attention.self.value.bias", "segformer.encoder.block.3.1.attention.output.dense.weight", "segformer.encoder.block.3.1.attention.output.dense.bias", "segformer.encoder.block.3.1.layer_norm_2.weight", "segformer.encoder.block.3.1.layer_norm_2.bias", "segformer.encoder.block.3.1.mlp.dense1.weight", "segformer.encoder.block.3.1.mlp.dense1.bias", "segformer.encoder.block.3.1.mlp.dwconv.dwconv.weight", "segformer.encoder.block.3.1.mlp.dwconv.dwconv.bias", "segformer.encoder.block.3.1.mlp.dense2.weight", "segformer.encoder.block.3.1.mlp.dense2.bias", "segformer.encoder.layer_norm.0.weight", "segformer.encoder.layer_norm.0.bias", "segformer.encoder.layer_norm.1.weight", "segformer.encoder.layer_norm.1.bias", "segformer.encoder.layer_norm.2.weight", "segformer.encoder.layer_norm.2.bias", "segformer.encoder.layer_norm.3.weight", "segformer.encoder.layer_norm.3.bias", "decode_head.linear_c.0.proj.weight", "decode_head.linear_c.0.proj.bias", "decode_head.linear_c.1.proj.weight", "decode_head.linear_c.1.proj.bias", "decode_head.linear_c.2.proj.weight", "decode_head.linear_c.2.proj.bias", "decode_head.linear_c.3.proj.weight", "decode_head.linear_c.3.proj.bias", "decode_head.linear_fuse.weight", "decode_head.batch_norm.weight", "decode_head.batch_norm.bias", "decode_head.batch_norm.running_mean", "decode_head.batch_norm.running_var", "decode_head.classifier.weight", "decode_head.classifier.bias". 
	Unexpected key(s) in state_dict: "segformer.segformer.encoder.patch_embeddings.0.proj.weight", "segformer.segformer.encoder.patch_embeddings.0.proj.bias", "segformer.segformer.encoder.patch_embeddings.0.layer_norm.weight", "segformer.segformer.encoder.patch_embeddings.0.layer_norm.bias", "segformer.segformer.encoder.patch_embeddings.1.proj.weight", "segformer.segformer.encoder.patch_embeddings.1.proj.bias", "segformer.segformer.encoder.patch_embeddings.1.layer_norm.weight", "segformer.segformer.encoder.patch_embeddings.1.layer_norm.bias", "segformer.segformer.encoder.patch_embeddings.2.proj.weight", "segformer.segformer.encoder.patch_embeddings.2.proj.bias", "segformer.segformer.encoder.patch_embeddings.2.layer_norm.weight", "segformer.segformer.encoder.patch_embeddings.2.layer_norm.bias", "segformer.segformer.encoder.patch_embeddings.3.proj.weight", "segformer.segformer.encoder.patch_embeddings.3.proj.bias", "segformer.segformer.encoder.patch_embeddings.3.layer_norm.weight", "segformer.segformer.encoder.patch_embeddings.3.layer_norm.bias", "segformer.segformer.encoder.block.0.0.layer_norm_1.weight", "segformer.segformer.encoder.block.0.0.layer_norm_1.bias", "segformer.segformer.encoder.block.0.0.attention.self.query.weight", "segformer.segformer.encoder.block.0.0.attention.self.query.bias", "segformer.segformer.encoder.block.0.0.attention.self.key.weight", "segformer.segformer.encoder.block.0.0.attention.self.key.bias", "segformer.segformer.encoder.block.0.0.attention.self.value.weight", "segformer.segformer.encoder.block.0.0.attention.self.value.bias", "segformer.segformer.encoder.block.0.0.attention.self.sr.weight", "segformer.segformer.encoder.block.0.0.attention.self.sr.bias", "segformer.segformer.encoder.block.0.0.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.0.0.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.0.0.attention.output.dense.weight", "segformer.segformer.encoder.block.0.0.attention.output.dense.bias", "segformer.segformer.encoder.block.0.0.layer_norm_2.weight", "segformer.segformer.encoder.block.0.0.layer_norm_2.bias", "segformer.segformer.encoder.block.0.0.mlp.dense1.weight", "segformer.segformer.encoder.block.0.0.mlp.dense1.bias", "segformer.segformer.encoder.block.0.0.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.0.0.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.0.0.mlp.dense2.weight", "segformer.segformer.encoder.block.0.0.mlp.dense2.bias", "segformer.segformer.encoder.block.0.1.layer_norm_1.weight", "segformer.segformer.encoder.block.0.1.layer_norm_1.bias", "segformer.segformer.encoder.block.0.1.attention.self.query.weight", "segformer.segformer.encoder.block.0.1.attention.self.query.bias", "segformer.segformer.encoder.block.0.1.attention.self.key.weight", "segformer.segformer.encoder.block.0.1.attention.self.key.bias", "segformer.segformer.encoder.block.0.1.attention.self.value.weight", "segformer.segformer.encoder.block.0.1.attention.self.value.bias", "segformer.segformer.encoder.block.0.1.attention.self.sr.weight", "segformer.segformer.encoder.block.0.1.attention.self.sr.bias", "segformer.segformer.encoder.block.0.1.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.0.1.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.0.1.attention.output.dense.weight", "segformer.segformer.encoder.block.0.1.attention.output.dense.bias", "segformer.segformer.encoder.block.0.1.layer_norm_2.weight", "segformer.segformer.encoder.block.0.1.layer_norm_2.bias", "segformer.segformer.encoder.block.0.1.mlp.dense1.weight", "segformer.segformer.encoder.block.0.1.mlp.dense1.bias", "segformer.segformer.encoder.block.0.1.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.0.1.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.0.1.mlp.dense2.weight", "segformer.segformer.encoder.block.0.1.mlp.dense2.bias", "segformer.segformer.encoder.block.1.0.layer_norm_1.weight", "segformer.segformer.encoder.block.1.0.layer_norm_1.bias", "segformer.segformer.encoder.block.1.0.attention.self.query.weight", "segformer.segformer.encoder.block.1.0.attention.self.query.bias", "segformer.segformer.encoder.block.1.0.attention.self.key.weight", "segformer.segformer.encoder.block.1.0.attention.self.key.bias", "segformer.segformer.encoder.block.1.0.attention.self.value.weight", "segformer.segformer.encoder.block.1.0.attention.self.value.bias", "segformer.segformer.encoder.block.1.0.attention.self.sr.weight", "segformer.segformer.encoder.block.1.0.attention.self.sr.bias", "segformer.segformer.encoder.block.1.0.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.1.0.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.1.0.attention.output.dense.weight", "segformer.segformer.encoder.block.1.0.attention.output.dense.bias", "segformer.segformer.encoder.block.1.0.layer_norm_2.weight", "segformer.segformer.encoder.block.1.0.layer_norm_2.bias", "segformer.segformer.encoder.block.1.0.mlp.dense1.weight", "segformer.segformer.encoder.block.1.0.mlp.dense1.bias", "segformer.segformer.encoder.block.1.0.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.1.0.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.1.0.mlp.dense2.weight", "segformer.segformer.encoder.block.1.0.mlp.dense2.bias", "segformer.segformer.encoder.block.1.1.layer_norm_1.weight", "segformer.segformer.encoder.block.1.1.layer_norm_1.bias", "segformer.segformer.encoder.block.1.1.attention.self.query.weight", "segformer.segformer.encoder.block.1.1.attention.self.query.bias", "segformer.segformer.encoder.block.1.1.attention.self.key.weight", "segformer.segformer.encoder.block.1.1.attention.self.key.bias", "segformer.segformer.encoder.block.1.1.attention.self.value.weight", "segformer.segformer.encoder.block.1.1.attention.self.value.bias", "segformer.segformer.encoder.block.1.1.attention.self.sr.weight", "segformer.segformer.encoder.block.1.1.attention.self.sr.bias", "segformer.segformer.encoder.block.1.1.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.1.1.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.1.1.attention.output.dense.weight", "segformer.segformer.encoder.block.1.1.attention.output.dense.bias", "segformer.segformer.encoder.block.1.1.layer_norm_2.weight", "segformer.segformer.encoder.block.1.1.layer_norm_2.bias", "segformer.segformer.encoder.block.1.1.mlp.dense1.weight", "segformer.segformer.encoder.block.1.1.mlp.dense1.bias", "segformer.segformer.encoder.block.1.1.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.1.1.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.1.1.mlp.dense2.weight", "segformer.segformer.encoder.block.1.1.mlp.dense2.bias", "segformer.segformer.encoder.block.2.0.layer_norm_1.weight", "segformer.segformer.encoder.block.2.0.layer_norm_1.bias", "segformer.segformer.encoder.block.2.0.attention.self.query.weight", "segformer.segformer.encoder.block.2.0.attention.self.query.bias", "segformer.segformer.encoder.block.2.0.attention.self.key.weight", "segformer.segformer.encoder.block.2.0.attention.self.key.bias", "segformer.segformer.encoder.block.2.0.attention.self.value.weight", "segformer.segformer.encoder.block.2.0.attention.self.value.bias", "segformer.segformer.encoder.block.2.0.attention.self.sr.weight", "segformer.segformer.encoder.block.2.0.attention.self.sr.bias", "segformer.segformer.encoder.block.2.0.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.2.0.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.2.0.attention.output.dense.weight", "segformer.segformer.encoder.block.2.0.attention.output.dense.bias", "segformer.segformer.encoder.block.2.0.layer_norm_2.weight", "segformer.segformer.encoder.block.2.0.layer_norm_2.bias", "segformer.segformer.encoder.block.2.0.mlp.dense1.weight", "segformer.segformer.encoder.block.2.0.mlp.dense1.bias", "segformer.segformer.encoder.block.2.0.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.2.0.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.2.0.mlp.dense2.weight", "segformer.segformer.encoder.block.2.0.mlp.dense2.bias", "segformer.segformer.encoder.block.2.1.layer_norm_1.weight", "segformer.segformer.encoder.block.2.1.layer_norm_1.bias", "segformer.segformer.encoder.block.2.1.attention.self.query.weight", "segformer.segformer.encoder.block.2.1.attention.self.query.bias", "segformer.segformer.encoder.block.2.1.attention.self.key.weight", "segformer.segformer.encoder.block.2.1.attention.self.key.bias", "segformer.segformer.encoder.block.2.1.attention.self.value.weight", "segformer.segformer.encoder.block.2.1.attention.self.value.bias", "segformer.segformer.encoder.block.2.1.attention.self.sr.weight", "segformer.segformer.encoder.block.2.1.attention.self.sr.bias", "segformer.segformer.encoder.block.2.1.attention.self.layer_norm.weight", "segformer.segformer.encoder.block.2.1.attention.self.layer_norm.bias", "segformer.segformer.encoder.block.2.1.attention.output.dense.weight", "segformer.segformer.encoder.block.2.1.attention.output.dense.bias", "segformer.segformer.encoder.block.2.1.layer_norm_2.weight", "segformer.segformer.encoder.block.2.1.layer_norm_2.bias", "segformer.segformer.encoder.block.2.1.mlp.dense1.weight", "segformer.segformer.encoder.block.2.1.mlp.dense1.bias", "segformer.segformer.encoder.block.2.1.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.2.1.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.2.1.mlp.dense2.weight", "segformer.segformer.encoder.block.2.1.mlp.dense2.bias", "segformer.segformer.encoder.block.3.0.layer_norm_1.weight", "segformer.segformer.encoder.block.3.0.layer_norm_1.bias", "segformer.segformer.encoder.block.3.0.attention.self.query.weight", "segformer.segformer.encoder.block.3.0.attention.self.query.bias", "segformer.segformer.encoder.block.3.0.attention.self.key.weight", "segformer.segformer.encoder.block.3.0.attention.self.key.bias", "segformer.segformer.encoder.block.3.0.attention.self.value.weight", "segformer.segformer.encoder.block.3.0.attention.self.value.bias", "segformer.segformer.encoder.block.3.0.attention.output.dense.weight", "segformer.segformer.encoder.block.3.0.attention.output.dense.bias", "segformer.segformer.encoder.block.3.0.layer_norm_2.weight", "segformer.segformer.encoder.block.3.0.layer_norm_2.bias", "segformer.segformer.encoder.block.3.0.mlp.dense1.weight", "segformer.segformer.encoder.block.3.0.mlp.dense1.bias", "segformer.segformer.encoder.block.3.0.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.3.0.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.3.0.mlp.dense2.weight", "segformer.segformer.encoder.block.3.0.mlp.dense2.bias", "segformer.segformer.encoder.block.3.1.layer_norm_1.weight", "segformer.segformer.encoder.block.3.1.layer_norm_1.bias", "segformer.segformer.encoder.block.3.1.attention.self.query.weight", "segformer.segformer.encoder.block.3.1.attention.self.query.bias", "segformer.segformer.encoder.block.3.1.attention.self.key.weight", "segformer.segformer.encoder.block.3.1.attention.self.key.bias", "segformer.segformer.encoder.block.3.1.attention.self.value.weight", "segformer.segformer.encoder.block.3.1.attention.self.value.bias", "segformer.segformer.encoder.block.3.1.attention.output.dense.weight", "segformer.segformer.encoder.block.3.1.attention.output.dense.bias", "segformer.segformer.encoder.block.3.1.layer_norm_2.weight", "segformer.segformer.encoder.block.3.1.layer_norm_2.bias", "segformer.segformer.encoder.block.3.1.mlp.dense1.weight", "segformer.segformer.encoder.block.3.1.mlp.dense1.bias", "segformer.segformer.encoder.block.3.1.mlp.dwconv.dwconv.weight", "segformer.segformer.encoder.block.3.1.mlp.dwconv.dwconv.bias", "segformer.segformer.encoder.block.3.1.mlp.dense2.weight", "segformer.segformer.encoder.block.3.1.mlp.dense2.bias", "segformer.segformer.encoder.layer_norm.0.weight", "segformer.segformer.encoder.layer_norm.0.bias", "segformer.segformer.encoder.layer_norm.1.weight", "segformer.segformer.encoder.layer_norm.1.bias", "segformer.segformer.encoder.layer_norm.2.weight", "segformer.segformer.encoder.layer_norm.2.bias", "segformer.segformer.encoder.layer_norm.3.weight", "segformer.segformer.encoder.layer_norm.3.bias", "segformer.decode_head.linear_c.0.proj.weight", "segformer.decode_head.linear_c.0.proj.bias", "segformer.decode_head.linear_c.1.proj.weight", "segformer.decode_head.linear_c.1.proj.bias", "segformer.decode_head.linear_c.2.proj.weight", "segformer.decode_head.linear_c.2.proj.bias", "segformer.decode_head.linear_c.3.proj.weight", "segformer.decode_head.linear_c.3.proj.bias", "segformer.decode_head.linear_fuse.weight", "segformer.decode_head.batch_norm.weight", "segformer.decode_head.batch_norm.bias", "segformer.decode_head.batch_norm.running_mean", "segformer.decode_head.batch_norm.running_var", "segformer.decode_head.batch_norm.num_batches_tracked", "segformer.decode_head.classifier.weight", "segformer.decode_head.classifier.bias". 

## 🎯 Model 2: U-Net EfficientNet

In [None]:
# U-Net EfficientNet Model Evaluation
print("\n" + "="*50)
print("🔍 EVALUATING U-NET EFFICIENTNET MODEL")
print("="*50)

if os.path.exists('models/unet_efficientnet_model_best.pth'):
    print('🔄 Loading U-Net EfficientNet model...')
    
    # Load U-Net EfficientNet (same as training)
    unet_efficientnet_model = smp.Unet(
        encoder_name="efficientnet-b0",
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
        activation=None  # We handle sigmoid manually
    ).to(device)
    
    # Load state dict
    unet_efficientnet_model.load_state_dict(torch.load('models/unet_efficientnet_model_best.pth'))
    unet_efficientnet_model.eval()
    
    print('✅ U-Net EfficientNet model loaded successfully!')
    
    # Evaluate
    unet_efficientnet_results = evaluate_model(unet_efficientnet_model, test_loader, 'U-Net EfficientNet')
    
    # Save results
    with open('models/unet_efficientnet_test_results.json', 'w') as f:
        results_to_save = unet_efficientnet_results.copy()
        results_to_save['dice_scores'] = [float(x) for x in unet_efficientnet_results['dice_scores']]
        results_to_save['jaccard_scores'] = [float(x) for x in unet_efficientnet_results['jaccard_scores']]
        json.dump(results_to_save, f, indent=2)
    
    print('💾 U-Net EfficientNet results saved!')
else:
    print('❌ U-Net EfficientNet model not found. Please train it first.')
    unet_efficientnet_results = None

## 🎯 Model 3: ViT Segmentation

In [None]:
# ViT Segmentation Model Definition (same as training)
class ViTSegmentation(nn.Module):
    def __init__(self, vit_model_name="vit_tiny_patch16_224", num_classes=1, pretrained=True):
        super().__init__()
        
        # Load Vision Transformer từ timm
        self.vit = timm.create_model(
            vit_model_name, 
            pretrained=pretrained,
            num_classes=0  # Remove classification head
        )
        
        # Get feature dimension
        self.feature_dim = self.vit.num_features
        
        # Simple segmentation head
        self.seg_head = nn.Sequential(
            nn.Linear(self.feature_dim, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(256, 14 * 14 * num_classes)  # 14x14 patches for 224x224 input
        )
        
        self.num_classes = num_classes
    
    def forward(self, x):
        # Resize input cho ViT (224x224)
        original_size = x.shape[-2:]
        x_resized = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
        
        # Extract global features từ ViT
        features = self.vit(x_resized)  # [batch, feature_dim]
        
        # Generate segmentation map
        seg_output = self.seg_head(features)  # [batch, 14*14*num_classes]
        
        # Reshape to spatial format
        batch_size = seg_output.shape[0]
        seg_map = seg_output.view(batch_size, self.num_classes, 14, 14)
        
        # Upsample về original input size
        seg_map = F.interpolate(seg_map, size=original_size, mode='bilinear', align_corners=False)
        
        # Return logits (no sigmoid) for BCEWithLogitsLoss
        return seg_map

print("✅ ViT Segmentation model defined!")

In [None]:
# ViT Segmentation Model Evaluation
print("\n" + "="*50)
print("🔍 EVALUATING VIT SEGMENTATION MODEL")
print("="*50)

if os.path.exists('models/unet_vit_model_best.pth'):
    print('🔄 Loading ViT Segmentation model...')
    
    # Load ViT Segmentation (same as training)
    vit_segmentation_model = ViTSegmentation(
        vit_model_name="vit_tiny_patch16_224",
        num_classes=1,
        pretrained=True
    ).to(device)
    
    # Load state dict
    vit_segmentation_model.load_state_dict(torch.load('models/unet_vit_model_best.pth'))
    vit_segmentation_model.eval()
    
    print('✅ ViT Segmentation model loaded successfully!')
    
    # Evaluate
    vit_segmentation_results = evaluate_model(vit_segmentation_model, test_loader, 'ViT Segmentation')
    
    # Save results
    with open('models/vit_segmentation_test_results.json', 'w') as f:
        results_to_save = vit_segmentation_results.copy()
        results_to_save['dice_scores'] = [float(x) for x in vit_segmentation_results['dice_scores']]
        results_to_save['jaccard_scores'] = [float(x) for x in vit_segmentation_results['jaccard_scores']]
        json.dump(results_to_save, f, indent=2)
    
    print('💾 ViT Segmentation results saved!')
else:
    print('❌ ViT Segmentation model not found. Please train it first.')
    vit_segmentation_results = None

## 🎯 Model 4: DeepLabV3+ ResNet

In [None]:
# DeepLabV3+ ResNet Model Evaluation
print("\n" + "="*50)
print("🔍 EVALUATING DEEPLABV3+ RESNET MODEL")
print("="*50)

if os.path.exists('models/deeplabv3_resnet_model_best.pth'):
    print('🔄 Loading DeepLabV3+ ResNet model...')
    
    # Load DeepLabV3+ ResNet (same as training)
    deeplabv3_resnet_model = smp.DeepLabV3Plus(
        encoder_name="resnet50",
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
        activation=None  # We handle sigmoid manually
    ).to(device)
    
    # Load state dict
    deeplabv3_resnet_model.load_state_dict(torch.load('models/deeplabv3_resnet_model_best.pth'))
    deeplabv3_resnet_model.eval()
    
    print('✅ DeepLabV3+ ResNet model loaded successfully!')
    
    # Evaluate
    deeplabv3_resnet_results = evaluate_model(deeplabv3_resnet_model, test_loader, 'DeepLabV3+ ResNet')
    
    # Save results
    with open('models/deeplabv3_resnet_test_results.json', 'w') as f:
        results_to_save = deeplabv3_resnet_results.copy()
        results_to_save['dice_scores'] = [float(x) for x in deeplabv3_resnet_results['dice_scores']]
        results_to_save['jaccard_scores'] = [float(x) for x in deeplabv3_resnet_results['jaccard_scores']]
        json.dump(results_to_save, f, indent=2)
    
    print('💾 DeepLabV3+ ResNet results saved!')
else:
    print('❌ DeepLabV3+ ResNet model not found. Please train it first.')
    deeplabv3_resnet_results = None

## 📊 Results Comparison và Analysis

In [None]:
# Collect all results
all_results = []

# Load results from JSON files if they exist
model_files = {
    'SegFormer': 'models/segformer_test_results.json',
    'U-Net EfficientNet': 'models/unet_efficientnet_test_results.json',
    'ViT Segmentation': 'models/vit_segmentation_test_results.json',
    'DeepLabV3+ ResNet': 'models/deeplabv3_resnet_test_results.json'
}

for model_name, file_path in model_files.items():
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            results = json.load(f)
            all_results.append(results)
        print(f'✅ Loaded {model_name} results')
    else:
        print(f'❌ {model_name} results not found')

print(f'\n📊 Total models evaluated: {len(all_results)}')

In [None]:
# Create comparison table
if all_results:
    print("\n" + "="*80)
    print("📊 MODEL COMPARISON RESULTS")
    print("="*80)
    
    # Create DataFrame
    comparison_data = []
    for result in all_results:
        comparison_data.append({
            'Model': result['model_name'],
            'Test Loss': f"{result['test_loss']:.4f}",
            'Dice Score': f"{result['dice_mean']:.4f} ± {result['dice_std']:.4f}",
            'Jaccard (IoU)': f"{result['jaccard_mean']:.4f} ± {result['jaccard_std']:.4f}",
            'Samples': result['num_samples']
        })
    
    df_comparison = pd.DataFrame(comparison_data)
    print(df_comparison.to_string(index=False))
    
    # Find best model
    best_dice_idx = np.argmax([r['dice_mean'] for r in all_results])
    best_jaccard_idx = np.argmax([r['jaccard_mean'] for r in all_results])
    
    print(f"\n🏆 BEST PERFORMANCE:")
    print(f"   - Best Dice Score: {all_results[best_dice_idx]['model_name']} ({all_results[best_dice_idx]['dice_mean']:.4f})")
    print(f"   - Best Jaccard (IoU): {all_results[best_jaccard_idx]['model_name']} ({all_results[best_jaccard_idx]['jaccard_mean']:.4f})")
    
    # Save comparison table
    df_comparison.to_csv('models/model_comparison_results.csv', index=False)
    print(f"\n💾 Comparison table saved to: models/model_comparison_results.csv")
else:
    print("❌ No results found for comparison. Please run model evaluations first.")

In [None]:
# Visualization
if all_results:
    plt.figure(figsize=(15, 10))
    
    # Plot 1: Dice Score Comparison
    plt.subplot(2, 2, 1)
    models = [r['model_name'] for r in all_results]
    dice_means = [r['dice_mean'] for r in all_results]
    dice_stds = [r['dice_std'] for r in all_results]
    
    bars = plt.bar(models, dice_means, yerr=dice_stds, capsize=5, alpha=0.7)
    plt.title('Dice Score Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Dice Score')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    for bar, mean in zip(bars, dice_means):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{mean:.3f}', ha='center', va='bottom', fontweight='bold')
    
    # Plot 2: Jaccard (IoU) Comparison
    plt.subplot(2, 2, 2)
    jaccard_means = [r['jaccard_mean'] for r in all_results]
    jaccard_stds = [r['jaccard_std'] for r in all_results]
    
    bars = plt.bar(models, jaccard_means, yerr=jaccard_stds, capsize=5, alpha=0.7, color='orange')
    plt.title('Jaccard Index (IoU) Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Jaccard Index')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    for bar, mean in zip(bars, jaccard_means):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{mean:.3f}', ha='center', va='bottom', fontweight='bold')
    
    # Plot 3: Test Loss Comparison
    plt.subplot(2, 2, 3)
    test_losses = [r['test_loss'] for r in all_results]
    
    bars = plt.bar(models, test_losses, alpha=0.7, color='red')
    plt.title('Test Loss Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Test Loss')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    for bar, loss in zip(bars, test_losses):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{loss:.3f}', ha='center', va='bottom', fontweight='bold')
    
    # Plot 4: Dice Score Distribution (Box Plot)
    plt.subplot(2, 2, 4)
    dice_distributions = [r['dice_scores'] for r in all_results]
    
    plt.boxplot(dice_distributions, labels=models)
    plt.title('Dice Score Distribution', fontsize=14, fontweight='bold')
    plt.ylabel('Dice Score')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('models/model_comparison_plots.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("💾 Comparison plots saved to: models/model_comparison_plots.png")
else:
    print("❌ No results available for visualization.")

## 🎉 Evaluation Complete!

### 📁 Output Files:
- **Individual Results**: `models/*_test_results.json`
- **Comparison Table**: `models/model_comparison_results.csv`
- **Visualization**: `models/model_comparison_plots.png`

### 🔍 Next Steps:
1. **Analyze Results**: Compare performance metrics
2. **Select Best Model**: Based on Dice/IoU scores
3. **Error Analysis**: Investigate failure cases
4. **Model Ensemble**: Combine best performing models