# COVID-19 Classification - Model Comparison

Compare VGG16, ResNet50, and DenseNet121 performance side-by-side.

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import json
from pathlib import Path

import config

print('Setup complete!')

## Load Training History

In [None]:
import torch

models_to_compare = ['vgg16', 'resnet50', 'densenet121']
histories = {}
test_accs = {}

for model_name in models_to_compare:
    checkpoint_path = os.path.join(config.CHECKPOINT_DIR, f'{model_name}_final.pth')
    
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        histories[model_name] = checkpoint.get('history', {})
        test_accs[model_name] = checkpoint.get('test_acc', None)
        print(f"Loaded {model_name}: Test Acc = {test_accs[model_name]:.2f}%")
    else:
        print(f"Warning: Checkpoint not found for {model_name}")

## Training Curves Comparison

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

colors = {'vgg16': 'blue', 'resnet50': 'red', 'densenet121': 'green'}
markers = {'vgg16': 'o', 'resnet50': 's', 'densenet121': '^'} 

# Plot 1: Training Loss
for model_name, history in histories.items():
    epochs = range(1, len(history['train_loss']) + 1)
    axes[0, 0].plot(epochs, history['train_loss'], 
                    color=colors[model_name], linewidth=2,
                    marker=markers[model_name], markersize=4, markevery=5,
                    label=model_name.upper())

axes[0, 0].set_title('Training Loss Comparison', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch', fontsize=12)
axes[0, 0].set_ylabel('Loss', fontsize=12)
axes[0, 0].legend(fontsize=10)
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Validation Loss
for model_name, history in histories.items():
    epochs = range(1, len(history['val_loss']) + 1)
    axes[0, 1].plot(epochs, history['val_loss'], 
                    color=colors[model_name], linewidth=2,
                    marker=markers[model_name], markersize=4, markevery=5,
                    label=model_name.upper())

axes[0, 1].set_title('Validation Loss Comparison', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch', fontsize=12)
axes[0, 1].set_ylabel('Loss', fontsize=12)
axes[0, 1].legend(fontsize=10)
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Training Accuracy
for model_name, history in histories.items():
    epochs = range(1, len(history['train_acc']) + 1)
    axes[1, 0].plot(epochs, history['train_acc'], 
                    color=colors[model_name], linewidth=2,
                    marker=markers[model_name], markersize=4, markevery=5,
                    label=model_name.upper())

axes[1, 0].set_title('Training Accuracy Comparison', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch', fontsize=12)
axes[1, 0].set_ylabel('Accuracy (%)', fontsize=12)
axes[1, 0].legend(fontsize=10)
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Validation Accuracy
for model_name, history in histories.items():
    epochs = range(1, len(history['val_acc']) + 1)
    axes[1, 1].plot(epochs, history['val_acc'], 
                    color=colors[model_name], linewidth=2,
                    marker=markers[model_name], markersize=4, markevery=5,
                    label=model_name.upper())

axes[1, 1].set_title('Validation Accuracy Comparison', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch', fontsize=12)
axes[1, 1].set_ylabel('Accuracy (%)', fontsize=12)
axes[1, 1].legend(fontsize=10)
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(config.RESULTS_DIR, 'training_curves_comparison.png'), 
            dpi=300, bbox_inches='tight')
plt.show()

## Test Accuracy Comparison

In [None]:
# Bar chart comparing test accuracies
fig, ax = plt.subplots(figsize=(10, 6))

model_names_upper = [m.upper() for m in test_accs.keys()]
accuracies = list(test_accs.values())
bar_colors = [colors[m] for m in test_accs.keys()]

bars = ax.bar(model_names_upper, accuracies, color=bar_colors, alpha=0.7, edgecolor='black', linewidth=1.5)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.2f}%',
            ha='center', va='bottom', fontsize=12, fontweight='bold')

ax.set_ylabel('Test Accuracy (%)', fontsize=12)
ax.set_title('Model Performance Comparison on Test Set', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim([0, 105])

plt.tight_layout()
plt.savefig(os.path.join(config.RESULTS_DIR, 'test_accuracy_comparison.png'), 
            dpi=300, bbox_inches='tight')
plt.show()

# Find best model
best_model = max(test_accs, key=test_accs.get)
best_acc = test_accs[best_model]
print(f"\n🏆 Best performing model: {best_model.upper()} with {best_acc:.2f}% test accuracy")

## Final Epochs Comparison

In [None]:
# Show final training metrics
comparison_data = []

for model_name, history in histories.items():
    comparison_data.append({
        'Model': model_name.upper(),
        'Final Train Acc (%)': f"{history['train_acc'][-1]:.2f}",
        'Final Val Acc (%)': f"{history['val_acc'][-1]:.2f}",
        'Test Acc (%)': f"{test_accs[model_name]:.2f}",
        'Epochs Trained': len(history['train_acc'])
    })

comparison_df = pd.DataFrame(comparison_data)
print("\nFinal Model Comparison:")
print(comparison_df.to_string(index=False))

# Save comparison
comparison_df.to_csv(os.path.join(config.RESULTS_DIR, 'final_comparison.csv'), index=False)
print(f"\nComparison saved to {os.path.join(config.RESULTS_DIR, 'final_comparison.csv')}")

## Load Detailed Metrics

In [None]:
# Load evaluation metrics from JSON files
detailed_metrics = {}

for model_name in models_to_compare:
    metrics_path = os.path.join(config.RESULTS_DIR, f'{model_name}_metrics.json')
    
    if os.path.exists(metrics_path):
        with open(metrics_path, 'r') as f:
            detailed_metrics[model_name] = json.load(f)
        print(f"Loaded metrics for {model_name}")
    else:
        print(f"Metrics file not found for {model_name}. Run evaluate.ipynb first.")

## Per-Class Performance Heatmap

In [None]:
if detailed_metrics:
    # Create heatmap of F1-scores
    f1_data = []
    
    for model_name in models_to_compare:
        if model_name in detailed_metrics:
            model_f1 = []
            for class_name in config.CLASS_NAMES:
                f1 = detailed_metrics[model_name]['per_class_metrics'][class_name]['f1-score']
                model_f1.append(f1)
            f1_data.append(model_f1)
    
    # Create DataFrame
    f1_df = pd.DataFrame(f1_data, 
                        index=[m.upper() for m in models_to_compare if m in detailed_metrics],
                        columns=config.CLASS_NAMES)
    
    # Plot heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(f1_df, annot=True, fmt='.3f', cmap='YlGnBu', 
                cbar_kws={'label': 'F1-Score'}, vmin=0, vmax=1)
    plt.title('Per-Class F1-Score Heatmap', fontsize=14, fontweight='bold')
    plt.ylabel('Model', fontsize=12)
    plt.xlabel('Class', fontsize=12)
    plt.tight_layout()
    
    plt.savefig(os.path.join(config.RESULTS_DIR, 'f1_score_heatmap.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("No detailed metrics available. Run evaluate.ipynb first.")

## Training Time & Efficiency

In [None]:
# Compare training efficiency
efficiency_data = []

for model_name in models_to_compare:
    checkpoint_path = os.path.join(config.CHECKPOINT_DIR, f'{model_name}_final.pth')
    
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        training_time = checkpoint.get('training_time', 0)
        test_acc = checkpoint.get('test_acc', 0)
        
        efficiency_data.append({
            'Model': model_name.upper(),
            'Training Time (min)': f"{training_time / 60:.2f}",
            'Test Accuracy (%)': f"{test_acc:.2f}",
            'Acc per Min': f"{test_acc / (training_time / 60):.2f}" if training_time > 0 else 'N/A'
        })

if efficiency_data:
    efficiency_df = pd.DataFrame(efficiency_data)
    print("\nTraining Efficiency Comparison:")
    print(efficiency_df.to_string(index=False))
else:
    print("Training time information not available.")

## Summary & Recommendations

Based on the comparison above:

1. **Best Overall Model**: The model with highest test accuracy
2. **Most Efficient**: Model with best accuracy-to-time ratio
3. **Most Stable**: Model with smallest gap between train and validation accuracy

Consider these factors when choosing a model for deployment.