Task 1: Impact of image resolution on the final outcome

Task 1: Impact of Image Resolution on U-Net Segmentation Performance
Dataset: Kvasir-SEG (Polyp Segmentation)
Workflow: Original Image -> Scale to [512, 256, 128, 64] -> Rescale to 256x256 -> U-Net

This notebook investigates how different input resolutions affect segmentation quality.

(Accuracy, IOU, F1, DICE, MCC, precision, sensitivity)

In [None]:
# ============================================================================
# IMPORTS AND SETUP
# ============================================================================

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split
import pandas as pd 
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as TF
from PIL import Image

# Set random seeds
SEED = 55
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cpu


In [None]:
# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    # Dataset paths (update these)
    DATASET_PATH = "data\kvasir-seg"
    IMAGE_DIR = "images"
    MASK_DIR = "masks"
    
    # Experiment parameters
    RESOLUTIONS = [512, 256, 128, 64]
    TARGET_SIZE = 256
    
    # Training parameters
    BATCH_SIZE = 8
    NUM_EPOCHS = 25
    LEARNING_RATE = 1e-4
    TRAIN_SPLIT = 0.7
    
    # Model parameters
    IN_CHANNELS = 3
    OUT_CHANNELS = 1
    FEATURES = [64, 128, 256, 512]
    
    RESULTS_DIR = "results_task1"
    
config = Config()
os.makedirs(config.RESULTS_DIR, exist_ok=True)


In [None]:
# ============================================================================
# DATASET CLASS
# ============================================================================

class KvasirDataset(Dataset):
    """Kvasir-SEG Dataset with resolution scaling"""
    
    def __init__(self, image_paths, mask_paths, resolution, target_size=256):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.resolution = resolution
        self.target_size = target_size
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image and mask
        image = Image.open(self.image_paths[idx]).convert('RGB')
        mask = Image.open(self.mask_paths[idx]).convert('L')
        
        # Scale to test resolution (information loss)
        image = TF.resize(image, (self.resolution, self.resolution), 
                         interpolation=Image.BILINEAR)
        mask = TF.resize(mask, (self.resolution, self.resolution), 
                        interpolation=Image.NEAREST)
        
        # Scale back to target size
        image = TF.resize(image, (self.target_size, self.target_size), 
                         interpolation=Image.BILINEAR)
        mask = TF.resize(mask, (self.target_size, self.target_size), 
                        interpolation=Image.NEAREST)
        
        # Convert to tensors
        image = TF.to_tensor(image)
        mask = TF.to_tensor(mask)
        mask = (mask > 0.5).float()
        
        return image, mask

In [None]:
# ============================================================================
# PATH COLLECTION AND VALIDATION
# ============================================================================

try:
    # Use glob for real path collection
    base_path = Path(config.DATASET_PATH)
    image_paths = sorted(list((base_path / config.IMAGE_DIR).glob('*.jpg')))
    mask_paths = sorted(list((base_path / config.MASK_DIR).glob('*.jpg')))

    if not image_paths or not mask_paths:
        raise FileNotFoundError(f"Could not find images or masks in {config.DATASET_PATH}. Simulating.")

except (FileNotFoundError, NotADirectoryError, OSError):
    # Fallback to simulation if the local path structure is not present
    num_samples = 100
    image_paths = [Path(f"/simulated/path/images/{i:03d}.jpg") for i in range(num_samples)]
    mask_paths = [Path(f"/simulated/path/masks/{i:03d}.jpg") for i in range(num_samples)]

# Split the data
if len(image_paths) != len(mask_paths):
    raise ValueError("Error: Number of images and masks do not match.")

train_images, test_images, train_masks, test_masks = train_test_split(
    image_paths, 
    mask_paths, 
    test_size=1 - config.TRAIN_SPLIT, 
    random_state=SEED
)

# --- SIMPLE VALIDATION ---
print("\n--- Dataset Info ---")
print(f"Total samples found: {len(image_paths)}")
print(f"Train samples: {len(train_images)}")
print(f"Test samples: {len(test_images)}")


--- Dataset Info ---
Total samples found: 1000
Train samples: 699
Test samples: 301


In [None]:
# ============================================================================
# U-NET ARCHITECTURE
# ============================================================================

class DoubleConv(nn.Module):
    """Conv -> BN -> ReLU -> Conv -> BN -> ReLU"""
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    """U-Net for Medical Image Segmentation"""
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Encoder
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature
        
        # Bottleneck
        self.bottleneck = DoubleConv(features[-1], features[-1] * 2)
        
        # Decoder
        for feature in reversed(features):
            self.ups.append(
                nn.ConvTranspose2d(feature * 2, feature, kernel_size=2, stride=2)
            )
            self.ups.append(DoubleConv(feature * 2, feature))
        
        # Final output
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
    
    def forward(self, x):
        skip_connections = []
        
        # Encoder
        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)
        
        # Bottleneck
        x = self.bottleneck(x)
        skip_connections = skip_connections[::-1]
        
        # Decoder
        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip = skip_connections[idx // 2]
            
            # Handle size mismatch
            if x.shape != skip.shape:
                x = TF.resize(x, size=skip.shape[2:])
            
            concat_skip = torch.cat((skip, x), dim=1)
            x = self.ups[idx + 1](concat_skip)
        
        return torch.sigmoid(self.final_conv(x))


In [None]:
# ============================================================================
# METRICS
# ============================================================================

def dice_coefficient(pred, target, smooth=1e-6):
    """Dice Coefficient (F1 Score for segmentation)"""
    pred = pred.view(-1)
    target = target.view(-1)
    intersection = (pred * target).sum()
    return (2. * intersection + smooth) / (pred.sum() + target.sum() + smooth)

def iou_score(pred, target, smooth=1e-6):
    """Intersection over Union (Jaccard Index)"""
    pred = pred.view(-1)
    target = target.view(-1)
    intersection = (pred * target).sum()
    union = pred.sum() + target.sum() - intersection
    return (intersection + smooth) / (union + smooth)

def pixel_accuracy(pred, target):
    """Pixel-wise Accuracy"""
    pred = pred.view(-1)
    target = target.view(-1)
    correct = (pred == target).sum()
    return correct.float() / target.numel()

def precision_score(pred, target, smooth=1e-6):
    """Precision: TP / (TP + FP)"""
    pred = pred.view(-1)
    target = target.view(-1)
    tp = (pred * target).sum()
    return (tp + smooth) / (pred.sum() + smooth)

def recall_score(pred, target, smooth=1e-6):
    """Recall (Sensitivity): TP / (TP + FN)"""
    pred = pred.view(-1)
    target = target.view(-1)
    tp = (pred * target).sum()
    return (tp + smooth) / (target.sum() + smooth)

def f1_score(pred, target, smooth=1e-6):
    """F1 Score: 2 * (Precision * Recall) / (Precision + Recall)"""
    prec = precision_score(pred, target, smooth)
    rec = recall_score(pred, target, smooth)
    return 2 * (prec * rec) / (prec + rec + smooth)

def mcc_score(pred, target, smooth=1e-6):
    """Matthews Correlation Coefficient"""
    pred = pred.view(-1)
    target = target.view(-1)
    
    tp = (pred * target).sum()
    tn = ((1 - pred) * (1 - target)).sum()
    fp = (pred * (1 - target)).sum()
    fn = ((1 - pred) * target).sum()
    
    numerator = tp * tn - fp * fn
    denominator = torch.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    
    return numerator / (denominator + smooth)

def compute_all_metrics(pred, target, threshold=0.5):
    """Compute all metrics"""
    pred_binary = (pred > threshold).float()
    
    metrics = {
        'dice': dice_coefficient(pred_binary, target).item(),
        'iou': iou_score(pred_binary, target).item(),
        'f1': f1_score(pred_binary, target).item(),
        'accuracy': pixel_accuracy(pred_binary, target).item(),
        'precision': precision_score(pred_binary, target).item(),
        'recall': recall_score(pred_binary, target).item(),
        'mcc': mcc_score(pred_binary, target).item()
    }
    
    return metrics


In [None]:
# ============================================================================
# TRAINING AND EVALUATION
# ============================================================================

class DiceBCELoss(nn.Module):
    """Combined Dice Loss and BCE Loss"""
    def __init__(self, weight_dice=0.5, weight_bce=0.5):
        super(DiceBCELoss, self).__init__()
        self.weight_dice = weight_dice
        self.weight_bce = weight_bce
        self.bce = nn.BCELoss()
    
    def forward(self, pred, target):
        # Dice Loss
        dice = dice_coefficient(pred, target)
        dice_loss = 1 - dice
        
        # BCE Loss
        bce_loss = self.bce(pred, target)
        
        return self.weight_dice * dice_loss + self.weight_bce * bce_loss

def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    total_dice = 0
    
    pbar = tqdm(loader, desc='Training')
    for images, masks in pbar:
        images = images.to(device)
        masks = masks.to(device)
        
        # Forward
        outputs = model(images)
        loss = criterion(outputs, masks)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Metrics
        with torch.no_grad():
            dice = dice_coefficient(outputs > 0.5, masks)
        
        total_loss += loss.item()
        total_dice += dice.item()
        
        pbar.set_postfix({'loss': loss.item(), 'dice': dice.item()})
    
    return total_loss / len(loader), total_dice / len(loader)

def validate(model, loader, criterion, device):
    """Validate model"""
    model.eval()
    total_loss = 0
    all_metrics = {
        'dice': [], 'iou': [], 'f1': [], 'accuracy': [],
        'precision': [], 'recall': [], 'mcc': []
    }
    
    with torch.no_grad():
        pbar = tqdm(loader, desc='Validation')
        for images, masks in pbar:
            images = images.to(device)
            masks = masks.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, masks)
            total_loss += loss.item()
            
            # Compute metrics for each sample
            for i in range(outputs.size(0)):
                metrics = compute_all_metrics(outputs[i], masks[i])
                for key, value in metrics.items():
                    all_metrics[key].append(value)
    
    # Average metrics
    avg_metrics = {key: np.mean(values) for key, values in all_metrics.items()}
    avg_metrics['loss'] = total_loss / len(loader)
    
    return avg_metrics

def train_model(model, train_loader, val_loader, num_epochs, device):
    """Complete training loop"""
    criterion = DiceBCELoss()
    optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', patience=5, factor=0.5
    )
    
    history = {
        'train_loss': [], 'train_dice': [],
        'val_loss': [], 'val_dice': [], 'val_iou': [],
        'val_f1': [], 'val_accuracy': [], 'val_precision': [],
        'val_recall': [], 'val_mcc': []
    }
    
    best_dice = 0
    
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        
        # Train
        train_loss, train_dice = train_epoch(
            model, train_loader, criterion, optimizer, device
        )
        
        # Validate
        val_metrics = validate(model, val_loader, criterion, device)
        
        # Update history
        history['train_loss'].append(train_loss)
        history['train_dice'].append(train_dice)
        history['val_loss'].append(val_metrics['loss'])
        history['val_dice'].append(val_metrics['dice'])
        history['val_iou'].append(val_metrics['iou'])
        history['val_f1'].append(val_metrics['f1'])
        history['val_accuracy'].append(val_metrics['accuracy'])
        history['val_precision'].append(val_metrics['precision'])
        history['val_recall'].append(val_metrics['recall'])
        history['val_mcc'].append(val_metrics['mcc'])
        
        # Scheduler step
        scheduler.step(val_metrics['dice'])
        
        # Print metrics
        print(f'Train Loss: {train_loss:.4f}, Train Dice: {train_dice:.4f}')
        print(f'Val Loss: {val_metrics["loss"]:.4f}')
        print(f'Val Dice: {val_metrics["dice"]:.4f}, IoU: {val_metrics["iou"]:.4f}')
        print(f'Val F1: {val_metrics["f1"]:.4f}, Acc: {val_metrics["accuracy"]:.4f}')
        print(f'Val Precision: {val_metrics["precision"]:.4f}, Recall: {val_metrics["recall"]:.4f}')
        print(f'Val MCC: {val_metrics["mcc"]:.4f}')
        
        # Save best model
        if val_metrics['dice'] > best_dice:
            best_dice = val_metrics['dice']
            print(f'New best Dice: {best_dice:.4f}')
    
    return history, best_dice


In [None]:
# ============================================================================
# DATA LOADING
# ============================================================================
def load_dataset_paths(base_path, image_dir, mask_dir):
    """Load image and mask file paths"""
    image_path = Path(base_path) / image_dir
    mask_path = Path(base_path) / mask_dir
    
    image_files = sorted(list(image_path.glob('*.jpg')) + list(image_path.glob('*.png')))
    mask_files = sorted(list(mask_path.glob('*.jpg')) + list(mask_path.glob('*.png')))
    
    print(f"Found {len(image_files)} images and {len(mask_files)} masks")
    
    return image_files, mask_files

# Load dataset
image_paths, mask_paths = load_dataset_paths(
    config.DATASET_PATH, config.IMAGE_DIR, config.MASK_DIR
)

# Train/val split (70/30)
train_images, val_images, train_masks, val_masks = train_test_split(
    image_paths, mask_paths, test_size=(1 - config.TRAIN_SPLIT), random_state=SEED
)

print(f"Training samples: {len(train_images)}")
print(f"Validation samples: {len(val_images)}")


Found 1000 images and 1000 masks
Training samples: 699
Validation samples: 301


In [None]:
# ============================================================================
# EXPERIMENT: TEST DIFFERENT RESOLUTIONS
# ============================================================================

results = {}

for resolution in config.RESOLUTIONS:
    print(f"\n{'='*60}")
    print(f"Testing Resolution: {resolution}x{resolution}")
    print(f"{'='*60}")
    
    # Create datasets
    train_dataset = KvasirDataset(
        train_images, train_masks, 
        resolution=resolution, 
        target_size=config.TARGET_SIZE
    )
    val_dataset = KvasirDataset(
        val_images, val_masks, 
        resolution=resolution, 
        target_size=config.TARGET_SIZE
    )
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=config.BATCH_SIZE, 
        shuffle=True, 
        num_workers=0
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=config.BATCH_SIZE, 
        shuffle=False, 
        num_workers=0
    )
    
    # Create model
    model = UNet(
        in_channels=config.IN_CHANNELS,
        out_channels=config.OUT_CHANNELS,
        features=config.FEATURES
    ).to(device)
    
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Train
    history, best_dice = train_model(
        model, train_loader, val_loader, config.NUM_EPOCHS, device
    )
    
    # Store results
    results[resolution] = {
        'history': history,
        'best_dice': best_dice,
        'final_metrics': {
            'dice': history['val_dice'][-1],
            'iou': history['val_iou'][-1],
            'f1': history['val_f1'][-1],
            'accuracy': history['val_accuracy'][-1],
            'precision': history['val_precision'][-1],
            'recall': history['val_recall'][-1],
            'mcc': history['val_mcc'][-1]
        }
    }
    
    # Save model
    torch.save(model.state_dict(), 
               f"{config.RESULTS_DIR}/model_res{resolution}.pth")



Testing Resolution: 512x512
Model parameters: 31,037,633

Epoch 1/25


Training:   2%|‚ñè         | 2/88 [00:26<18:55, 13.20s/it, loss=0.769, dice=0.206]


KeyboardInterrupt: 

In [None]:

# ============================================================================
# VISUALIZATION AND ANALYSIS
# ============================================================================

# Plot training curves for all resolutions
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

for resolution in config.RESOLUTIONS:
    history = results[resolution]['history']
    
    # Loss
    axes[0, 0].plot(history['train_loss'], label=f'{resolution}x{resolution}')
    axes[0, 1].plot(history['val_loss'], label=f'{resolution}x{resolution}')
    
    # Dice
    axes[1, 0].plot(history['train_dice'], label=f'{resolution}x{resolution}')
    axes[1, 1].plot(history['val_dice'], label=f'{resolution}x{resolution}')

axes[0, 0].set_title('Training Loss')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

axes[0, 1].set_title('Validation Loss')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

axes[1, 0].set_title('Training Dice Score')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Dice')
axes[1, 0].legend()
axes[1, 0].grid(True)

axes[1, 1].set_title('Validation Dice Score')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Dice')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig(f"{config.RESULTS_DIR}/training_curves.png", dpi=300, bbox_inches='tight')
plt.show()

# Compare all metrics across resolutions
metrics_df = pd.DataFrame([
    {
        'Resolution': res,
        'Dice': results[res]['final_metrics']['dice'],
        'IoU': results[res]['final_metrics']['iou'],
        'F1': results[res]['final_metrics']['f1'],
        'Accuracy': results[res]['final_metrics']['accuracy'],
        'Precision': results[res]['final_metrics']['precision'],
        'Recall': results[res]['final_metrics']['recall'],
        'MCC': results[res]['final_metrics']['mcc']
    }
    for res in config.RESOLUTIONS
])

print("\n" + "="*60)
print("FINAL RESULTS: Impact of Resolution on Segmentation")
print("="*60)
print(metrics_df.to_string(index=False))

# Save results
metrics_df.to_csv(f"{config.RESULTS_DIR}/metrics_comparison.csv", index=False)

# Heatmap of metrics
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(
    metrics_df.set_index('Resolution').T, 
    annot=True, fmt='.4f', cmap='RdYlGn', 
    vmin=0, vmax=1, ax=ax, cbar_kws={'label': 'Score'}
)
ax.set_title('Segmentation Metrics Across Different Resolutions', fontsize=14, fontweight='bold')
ax.set_xlabel('Resolution', fontsize=12)
ax.set_ylabel('Metric', fontsize=12)
plt.tight_layout()
plt.savefig(f"{config.RESULTS_DIR}/metrics_heatmap.png", dpi=300, bbox_inches='tight')
plt.show()

# Bar plots for each metric
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
axes = axes.ravel()

metric_names = ['Dice', 'IoU', 'F1', 'Accuracy', 'Precision', 'Recall', 'MCC']

for idx, metric in enumerate(metric_names):
    axes[idx].bar(
        metrics_df['Resolution'].astype(str), 
        metrics_df[metric],
        color=sns.color_palette('viridis', len(config.RESOLUTIONS))
    )
    axes[idx].set_title(metric, fontsize=12, fontweight='bold')
    axes[idx].set_xlabel('Resolution')
    axes[idx].set_ylabel('Score')
    axes[idx].set_ylim([0, 1])
    axes[idx].grid(True, alpha=0.3)
    
    # Add value labels on bars
    for i, v in enumerate(metrics_df[metric]):
        axes[idx].text(i, v + 0.02, f'{v:.3f}', ha='center', va='bottom', fontsize=9)

# Hide extra subplot
axes[-1].axis('off')

plt.suptitle('Comparison of Segmentation Metrics Across Resolutions', 
             fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.savefig(f"{config.RESULTS_DIR}/metrics_barplots.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:

# ============================================================================
# QUALITATIVE ANALYSIS: Visualize Predictions
# ============================================================================

def visualize_predictions(model, dataset, device, num_samples=5):
    """Visualize model predictions"""
    model.eval()
    indices = np.random.choice(len(dataset), num_samples, replace=False)
    
    fig, axes = plt.subplots(num_samples, 4, figsize=(16, 4*num_samples))
    
    with torch.no_grad():
        for i, idx in enumerate(indices):
            image, mask = dataset[idx]
            image_input = image.unsqueeze(0).to(device)
            
            pred = model(image_input)
            pred = pred.squeeze().cpu().numpy()
            pred_binary = (pred > 0.5).astype(np.float32)
            
            # Display
            image_np = image.permute(1, 2, 0).numpy()
            mask_np = mask.squeeze().numpy()
            
            axes[i, 0].imshow(image_np)
            axes[i, 0].set_title('Input Image')
            axes[i, 0].axis('off')
            
            axes[i, 1].imshow(mask_np, cmap='gray')
            axes[i, 1].set_title('Ground Truth')
            axes[i, 1].axis('off')
            
            axes[i, 2].imshow(pred, cmap='gray')
            axes[i, 2].set_title('Prediction (Probability)')
            axes[i, 2].axis('off')
            
            axes[i, 3].imshow(pred_binary, cmap='gray')
            axes[i, 3].set_title('Prediction (Binary)')
            axes[i, 3].axis('off')
    
    plt.tight_layout()
    return fig

# Visualize for each resolution
for resolution in config.RESOLUTIONS:
    print(f"\nGenerating visualizations for {resolution}x{resolution}...")
    
    # Load model
    model = UNet(
        in_channels=config.IN_CHANNELS,
        out_channels=config.OUT_CHANNELS,
        features=config.FEATURES
    ).to(device)
    model.load_state_dict(
        torch.load(f"{config.RESULTS_DIR}/model_res{resolution}.pth", 
                   map_location=device)
    )
    
    # Create dataset
    val_dataset = KvasirDataset(
        val_images, val_masks, 
        resolution=resolution, 
        target_size=config.TARGET_SIZE
    )
    
    # Visualize
    fig = visualize_predictions(model, val_dataset, device, num_samples=5)
    fig.suptitle(f'Predictions at {resolution}x{resolution} Resolution', 
                 fontsize=16, fontweight='bold')
    plt.savefig(f"{config.RESULTS_DIR}/predictions_res{resolution}.png", 
                dpi=300, bbox_inches='tight')
    plt.show()

print("\n" + "="*60)
print("TASK 1 COMPLETE!")
print("="*60)
print(f"All results saved to: {config.RESULTS_DIR}/")
print("\nKey Findings:")
print(f"- Best Resolution: {metrics_df.loc[metrics_df['Dice'].idxmax(), 'Resolution']}")
print(f"- Best Dice Score: {metrics_df['Dice'].max():.4f}")
print(f"- Worst Resolution: {metrics_df.loc[metrics_df['Dice'].idxmin(), 'Resolution']}")
print(f"- Worst Dice Score: {metrics_df['Dice'].min():.4f}")
print(f"\nDice Score Range: {metrics_df['Dice'].min():.4f} - {metrics_df['Dice'].max():.4f}")
print(f"Performance Drop: {(metrics_df['Dice'].max() - metrics_df['Dice'].min()):.4f}")