# Faster R-CNN Model Evaluation

This notebook evaluates Faster R-CNN models with ResNet-18 and ResNet-50 backbones, compares their performance, and visualizes predictions alongside ground truths.

## Import Required Libraries

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import cv2
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
import time
import tqdm



## Load Pretrained Models

Load our fine-tuned Faster R-CNN models with ResNet-18 and ResNet-50 backbones.

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Function to create ResNet18 Faster R-CNN model
def get_fasterrcnn_resnet18(num_classes):
    # Load a pre-trained Faster R-CNN model with ResNet18 backbone
    # 1. Load pre-trained ResNet-18
    backbone = torchvision.models.resnet18(pretrained=True)  # pretrained=True for older PyTorch versions

    # 2. Select layers to use - remove the avg pool and fc layers
    backbone = torch.nn.Sequential(*(list(backbone.children())[:-2]))
    
    # 3. Create FPN on top of it
    backbone.out_channels = 512  # ResNet18's last layer channels

    # 4. Create anchor generator
    anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),)
    )

    # 5. Create ROI pooler
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],
        output_size=7,
        sampling_ratio=2
    )

    # 6. Put everything together
    model = torchvision.models.detection.FasterRCNN(
        backbone,
        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler
    )
    
    return model

# Function to create ResNet50 Faster R-CNN model
def get_fasterrcnn_resnet50(num_classes):
    # Load a pre-trained Faster R-CNN model with ResNet50 backbone
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
    
    # Replace the classifier with a new one for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

# Define number of classes (background + object classes)
num_classes = 3  # Example: 20 PASCAL VOC classes + background

# Load the trained models
resnet18_model_path = 'e:/ML/VIP_Cup/models/best_detector_resnet18.pth'
resnet50_model_path = 'e:/ML/VIP_Cup/models/best_detector_resnet50.pth'

# Initialize models
resnet18_model = get_fasterrcnn_resnet18(num_classes)
resnet50_model = get_fasterrcnn_resnet50(num_classes)

# Load trained weights
try:
    resnet18_model.load_state_dict(torch.load(resnet18_model_path, map_location=device))
    print(f"Successfully loaded ResNet-18 model from {resnet18_model_path}")
except Exception as e:
    print(f"Error loading ResNet-18 model: {e}")

try:
    resnet50_model.load_state_dict(torch.load(resnet50_model_path, map_location=device))
    print(f"Successfully loaded ResNet-50 model from {resnet50_model_path}")
except Exception as e:
    print(f"Error loading ResNet-50 model: {e}")

# Set models to evaluation mode
resnet18_model.to(device).eval()
resnet50_model.to(device).eval()

Using device: cuda




Successfully loaded ResNet-18 model from e:/ML/VIP_Cup/models/best_detector_resnet18.pth
Successfully loaded ResNet-50 model from e:/ML/VIP_Cup/models/best_detector_resnet50.pth


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

## Load Dataset and Ground Truths

Load the RGB dataset from releasev1-detection&tracking folder, where images are in the images folder and annotations are in the labels folder as txt files.

In [3]:
# Define the class names for drone and bird detection
class_names = ['background', 'drone', 'bird']

# Define paths for the dataset
data_root = 'e:/ML/VIP_Cup/releasev1-detection&tracking/RGB'
img_dir = os.path.join(data_root, 'images')
label_dir = os.path.join(data_root, 'labels')

# Check if the directories exist
if not os.path.exists(img_dir):
    print(f"Warning: Image directory not found at {img_dir}")
if not os.path.exists(label_dir):
    print(f"Warning: Label directory not found at {label_dir}")

# Get all image files
image_files = sorted([os.path.join(img_dir, f) for f in os.listdir(img_dir)
                     if f.endswith(('.jpg', '.jpeg', '.png'))])

print(f"Found {len(image_files)} images in {img_dir}")

# Create a dataset class similar to what was used in training
class DroneDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, label_dir, sample_fraction=1.0):
        self.img_dir = img_dir
        self.label_dir = label_dir
        all_img_files = sorted([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
     
        if sample_fraction < 1.0:
            num_samples = int(len(all_img_files) * sample_fraction)
            # Use random seed for reproducibility
            np.random.seed(42)
            indices = np.random.choice(len(all_img_files), size=num_samples, replace=False)
            self.img_files = [all_img_files[i] for i in indices]
            print(f"Randomly sampled {len(self.img_files)} images from a total of {len(all_img_files)} images")
        else:
            self.img_files = all_img_files


    def __len__(self):
        return len(self.img_files)
        
    def __getitem__(self, idx):
        img_name = self.img_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        
        # Load image
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width = image.shape[:2]
        
        # Load labels from txt file (YOLO format)
        label_name = os.path.splitext(img_name)[0] + '.txt'
        label_path = os.path.join(self.label_dir, label_name)
        
        boxes = []
        labels = []
        
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    data = line.strip().split()
                    class_id = int(data[0])
                    # YOLO format: class_id, x_center, y_center, width, height (normalized)
                    x_center, y_center, w, h = map(float, data[1:5])
                    
                    # Convert to [x_min, y_min, x_max, y_max]
                    x_min = (x_center - w/2) * width
                    y_min = (y_center - h/2) * height
                    x_max = (x_center + w/2) * width
                    y_max = (y_center + h/2) * height
                    
                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(class_id + 1)  # +1 since 0 is background for Faster R-CNN
        
        # Handle empty boxes case
        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros(0, dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
        
        # Convert image to tensor
        image = torch.from_numpy(image.transpose(2, 0, 1)).float() / 255.0
        
        # Get image id from filename
        image_id = torch.from_numpy(np.array([idx])).long()
        
        # Calculate area
        if len(boxes) > 0:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        else:
            area = torch.zeros(0, dtype=torch.float32)
            
        # Create target dictionary
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": image_id,
            "area": area,
            "iscrowd": torch.zeros((len(boxes),), dtype=torch.int64),
            "orig_filename": img_name
        }
        
        return image, target

# Create dataset and dataloader
test_dataset = DroneDataset(img_dir, label_dir, 0.1)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=False,
    collate_fn=lambda x: tuple(zip(*x))  # This is required for variable size boxes
)

# Check a few samples
print("Sample data from the dataset:")
for i in range(min(3, len(test_dataset))):
    image, target = test_dataset[i]
    print(f"Sample {i}:")
    print(f"  - Image shape: {image.shape}")
    print(f"  - Number of objects: {len(target['boxes'])}")
    if len(target['boxes']) > 0:
        print(f"  - Labels: {target['labels'].tolist()}")
        for j, label in enumerate(target['labels'].tolist()):
            class_name = class_names[label] if label < len(class_names) else f"Class {label}"
            print(f"    Object {j}: {class_name}, Box: {target['boxes'][j].tolist()}")

Found 57580 images in e:/ML/VIP_Cup/releasev1-detection&tracking/RGB\images
Randomly sampled 5758 images from a total of 57580 images
Sample data from the dataset:
Sample 0:
  - Image shape: torch.Size([3, 256, 320])
  - Number of objects: 1
  - Labels: [2]
    Object 0: bird, Box: [181.5, 112.5, 207.5, 131.5]
Sample 1:
  - Image shape: torch.Size([3, 256, 320])
  - Number of objects: 1
  - Labels: [1]
    Object 0: drone, Box: [108.5, 191.5, 125.5, 204.5]
Sample 2:
  - Image shape: torch.Size([3, 256, 320])
  - Number of objects: 1
  - Labels: [2]
    Object 0: bird, Box: [215.0, 139.0, 229.0, 148.0]


## Evaluate Models

Run both models on the dataset and compute evaluation metrics such as mAP (mean Average Precision).

In [None]:

def evaluate_model(model, dataloader, num_images=None ):
    """Evaluate a model on a set of images."""
    results = []
    model.to(device)
    model.eval()
    # Calculate how many batches to process
    total_batches = len(dataloader)
    if num_images:
        # Calculate how many batches we need for the requested number of images
        batch_size = dataloader.batch_size
        total_batches = min(total_batches, (num_images + batch_size - 1) // batch_size)
    
    # Create progress bar
    progress_bar = tqdm.tqdm(total=total_batches, desc="Evaluating model", 
                           position=0, leave=True, ncols=100)

    image_count = 0
    batch_count = 0

    with torch.no_grad():
        for images, targets in dataloader:
            batch_count += 1
            if batch_count > total_batches:
                break            
            
            device_images = [img.to(device) for img in images]
            predictions = model(device_images)
               
            # Process results for each image in the batch
            for img_idx, (prediction, target) in enumerate(zip(predictions, targets)):
                if num_images and image_count >= num_images:
                    break
                    
                image_id = target["image_id"].item()
                filename = target["orig_filename"]
                # Get predictions above threshold
                threshold = 0.5
                keep_indices = prediction['scores'] >= threshold
                
                boxes = prediction['boxes'][keep_indices].cpu().numpy()
                scores = prediction['scores'][keep_indices].cpu().numpy()
                labels = prediction['labels'][keep_indices].cpu().numpy()
                
                # Store results for each detection
                for box, score, label in zip(boxes, scores, labels):
                    x1, y1, x2, y2 = box
                    width = x2 - x1
                    height = y2 - y1
                    
                    results.append({
                        'image_id': image_id,
                        'filename': filename,
                        'category_id': int(label),
                        'bbox': [float(x1), float(y1), float(width), float(height)],
                        'score': float(score)
                    })
                
                image_count += 1
                
            # Update progress bar
            progress_bar.update(1)
            
            # Break if we've processed enough images
            if num_images and image_count >= num_images:
                break
    
    progress_bar.close()
    print(f"Evaluated {image_count} images, found {len(results)} detections")
    return results



# Evaluate ResNet-18 model
print("Evaluating Faster R-CNN with ResNet-18 backbone...")
try:
    resnet18_results = evaluate_model(resnet18_model, test_dataloader)  # Limit to 10 images for demonstration
    print("ResNet-18 evaluation completed.")
except Exception as e:
    print(f"Error evaluating ResNet-18 model: {e}")

# Evaluate ResNet-50 model
print("\nEvaluating Faster R-CNN with ResNet-50 backbone...")
#try:
resnet50_results = evaluate_model(resnet50_model, test_dataloader)  # Limit to 10 images for demonstration
print("ResNet-50 evaluation completed.")
#except Exception as e:
#    print(f"Error evaluating ResNet-50 model: {e}")

# Compare results
print("\nComparison of results:")
for i in range(len(resnet18_results)):
    resnet18_score = resnet18_results[i]['score']
    resnet50_score = resnet50_results[i]['score']
    image_id = resnet18_results[i]['image_id']
    
    print(f"Image ID {image_id}: ResNet-18 Score = {resnet18_score:.4f}, ResNet-50 Score = {resnet50_score:.4f}")
    if resnet50_score > resnet18_score:
        print(f"  -> ResNet-50 performed better on this image.")
    elif resnet18_score > resnet50_score:
        print(f"  -> ResNet-18 performed better on this image.")
    else:
        print(f"  -> Both models performed equally on this image.")

In [11]:
print(len(resnet18_results))

8171


## Visualize Predictions and Ground Truths

Plot images with model predictions (bounding boxes and labels) alongside ground truth annotations for comparison.

In [None]:
def visualize_combined(image_path, model_results, ground_truth, model_name="Model", class_names=None):
    """
    Visualize detections and ground truth on the same image with different colored boxes
    
    Args:
        image_path: Path to the image
        model_results: List of detection dictionaries for this image
        ground_truth: Ground truth annotation dictionary for this image
        model_name: Name to display for the model
        class_names: List of class names for label display
    """
    if class_names is None:
        class_names = ['background', 'bird', 'drone']
    
    # Load image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load image at {image_path}")
        return None
        
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
    
    # Display image
    ax.imshow(img)
    ax.set_title(f"{model_name} Predictions & Ground Truth - {os.path.basename(image_path)}")
    
    # Plot ground truth boxes in green
    gt_boxes = ground_truth["boxes"]
    gt_labels = ground_truth["labels"]
    
    for box, label_idx in zip(gt_boxes, gt_labels):
        x1, y1, x2, y2 = box.tolist()
        width = x2 - x1
        height = y2 - y1
        
        # Adjust class index since background=0
        class_idx = label_idx.item()
        class_name = class_names[class_idx] if class_idx < len(class_names) else f"Class {class_idx}"
        
        # Create rectangle patch with green color for ground truth
        rect = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='green', facecolor='none')
        ax.add_patch(rect)
        
        # Add label with a solid green box background for ground truth
        ax.text(x1, y1-5, f"GT: {class_name}", color='white', 
                fontsize=10, bbox=dict(facecolor='green', alpha=0.7))
    
    # Plot predicted boxes in blue
    for detection in model_results:
        x1, y1, width, height = detection['bbox']
        class_idx = detection['category_id']
        score = detection['score']
        
        class_name = class_names[class_idx] if class_idx < len(class_names) else f"Class {class_idx}"
        
        # Create rectangle patch with blue color for predictions
        rect = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='blue', facecolor='none')
        ax.add_patch(rect)
        
        # Add label with confidence score and a solid blue box background for predictions
        ax.text(x1, y1+height+15, f"Pred: {class_name} ({score:.2f})", color='white', 
                fontsize=10, bbox=dict(facecolor='blue', alpha=0.7))
    
    # Add a legend
    legend_elements = [
        patches.Patch(edgecolor='green', facecolor='none', linewidth=2, label='Ground Truth'),
        patches.Patch(edgecolor='blue', facecolor='none', linewidth=2, label='Model Prediction')
    ]
    ax.legend(handles=legend_elements, loc='upper right')
    
    plt.tight_layout()
    return fig

def create_combined_visualizations(dataset, resnet18_results, resnet50_results, num_samples=5):
    """
    Create combined visualizations showing both ground truth and predictions on the same image
    """
    # Get image paths and convert to dictionary for easier lookup
    img_paths = {}
    for img_name in dataset.img_files:
        img_path = os.path.join(dataset.img_dir, img_name)
        img_paths[img_name] = img_path
    
    # Group results by image_id for easier lookup
    resnet18_by_id = {}
    for result in resnet18_results:
        image_id = result['image_id']
        if image_id not in resnet18_by_id:
            resnet18_by_id[image_id] = []
        resnet18_by_id[image_id].append(result)
    
    resnet50_by_id = {}
    for result in resnet50_results:
        image_id = result['image_id']
        if image_id not in resnet50_by_id:
            resnet50_by_id[image_id] = []
        resnet50_by_id[image_id].append(result)
    
    # Find image IDs with detections from both models
    common_ids = list(set(resnet18_by_id.keys()) & set(resnet50_by_id.keys()))
    
    # Select random samples to visualize
    if len(common_ids) > num_samples:
        sample_ids = np.random.choice(common_ids, size=num_samples, replace=False)
    else:
        sample_ids = common_ids[:num_samples]
    
    # Create visualizations for each sample
    for idx, image_id in enumerate(sample_ids):
        # Get ground truth for this image
        image, target = dataset[image_id]
        filename = target['orig_filename']
        img_path = img_paths[filename]
        
        print(f"\nVisualization {idx+1}/{len(sample_ids)}: Image ID {image_id} ({filename})")
        
        # Visualize ResNet-18 results
        print("ResNet-18 model predictions with ground truth:")
        fig1 = visualize_combined(
            img_path,
            resnet18_by_id[image_id],
            target,
            model_name="ResNet-18",
            class_names=class_names
        )
        plt.show()
        
        # Visualize ResNet-50 results
        print("ResNet-50 model predictions with ground truth:")
        fig2 = visualize_combined(
            img_path,
            resnet50_by_id[image_id],
            target,
            model_name="ResNet-50",
            class_names=class_names
        )
        plt.show()
        
        # Optional: Add a side-by-side comparison of the models for this image
        print(f"Detection counts - ResNet-18: {len(resnet18_by_id[image_id])}, ResNet-50: {len(resnet50_by_id[image_id])}")


In [None]:
print("\nVisualizing results with combined ground truth and predictions...")
create_combined_visualizations(test_dataset, resnet18_results, resnet50_results, num_samples=500)


Side by Side results

In [19]:
def visualize_side_by_side(image_path, resnet18_results, resnet50_results, ground_truth, class_names=None):
    """
    Visualize both ResNet-18 and ResNet-50 predictions side by side with ground truth
    
    Args:
        image_path: Path to the image
        resnet18_results: Detection results from ResNet-18 model
        resnet50_results: Detection results from ResNet-50 model
        ground_truth: Ground truth annotation dictionary
        class_names: List of class names for label display
    """
    if class_names is None:
        class_names = ['background', 'bird', 'drone']
    
    # Load image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load image at {image_path}")
        return None
        
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Create figure with two subplots side by side
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))
    fig.suptitle(f"Model Comparison - {os.path.basename(image_path)}", fontsize=16)
    
    # Display image in both subplots
    ax1.imshow(img)
    ax2.imshow(img)
    
    ax1.set_title("ResNet-18 Predictions")
    ax2.set_title("ResNet-50 Predictions")
    
    # Plot ground truth boxes in green on both plots
    gt_boxes = ground_truth["boxes"]
    gt_labels = ground_truth["labels"]
    
    for box, label_idx in zip(gt_boxes, gt_labels):
        x1, y1, x2, y2 = box.tolist()
        width = x2 - x1
        height = y2 - y1
        
        class_idx = label_idx.item()
        class_name = class_names[class_idx] if class_idx < len(class_names) else f"Class {class_idx}"
        
        # Create rectangle patches for ground truth (green)
        rect1 = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='green', facecolor='none')
        rect2 = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='green', facecolor='none')
        
        # Add rectangles to both axes
        ax1.add_patch(rect1)
        ax2.add_patch(rect2)
        
        # Add labels to both axes
        ax1.text(x1, y1-5, f"GT: {class_name}", color='white', 
                fontsize=9, bbox=dict(facecolor='green', alpha=0.7))
        ax2.text(x1, y1-5, f"GT: {class_name}", color='white', 
                fontsize=9, bbox=dict(facecolor='green', alpha=0.7))
    
    # Plot ResNet-18 predictions in blue on the left plot
    for detection in resnet18_results:
        x1, y1, width, height = detection['bbox']
        class_idx = detection['category_id']
        score = detection['score']
        
        class_name = class_names[class_idx] if class_idx < len(class_names) else f"Class {class_idx}"
        
        # Create rectangle for ResNet-18 prediction
        rect = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='blue', facecolor='none')
        ax1.add_patch(rect)
        
        # Add label
        ax1.text(x1, y1+height+15, f"{class_name}: {score:.2f}", color='white', 
                fontsize=9, bbox=dict(facecolor='blue', alpha=0.7))
    
    # Plot ResNet-50 predictions in red on the right plot
    for detection in resnet50_results:
        x1, y1, width, height = detection['bbox']
        class_idx = detection['category_id']
        score = detection['score']
        
        class_name = class_names[class_idx] if class_idx < len(class_names) else f"Class {class_idx}"
        
        # Create rectangle for ResNet-50 prediction
        rect = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='red', facecolor='none')
        ax2.add_patch(rect)
        
        # Add label
        ax2.text(x1, y1+height+15, f"{class_name}: {score:.2f}", color='white', 
                fontsize=9, bbox=dict(facecolor='red', alpha=0.7))
    
    # Add legends
    legend_elements1 = [
        patches.Patch(edgecolor='green', facecolor='none', linewidth=2, label='Ground Truth'),
        patches.Patch(edgecolor='blue', facecolor='none', linewidth=2, label='ResNet-18 Prediction')
    ]
    legend_elements2 = [
        patches.Patch(edgecolor='green', facecolor='none', linewidth=2, label='Ground Truth'),
        patches.Patch(edgecolor='red', facecolor='none', linewidth=2, label='ResNet-50 Prediction')
    ]
    
    ax1.legend(handles=legend_elements1, loc='upper right')
    ax2.legend(handles=legend_elements2, loc='upper right')
    
    # Remove axis ticks
    ax1.set_xticks([])
    ax1.set_yticks([])
    ax2.set_xticks([])
    ax2.set_yticks([])
    
    plt.tight_layout()
    plt.subplots_adjust(top=0.9)  # Adjust for the suptitle
    return fig

def create_side_by_side_visualizations(dataset, resnet18_results, resnet50_results, num_samples=5):
    """
    Create side-by-side visualizations comparing ResNet-18 and ResNet-50 predictions
    """
    # Get image paths for easier lookup
    img_paths = {}
    for img_name in dataset.img_files:
        img_path = os.path.join(dataset.img_dir, img_name)
        img_paths[img_name] = img_path
    
    # Group results by image_id
    resnet18_by_id = {}
    for result in resnet18_results:
        image_id = result['image_id']
        if image_id not in resnet18_by_id:
            resnet18_by_id[image_id] = []
        resnet18_by_id[image_id].append(result)
    
    resnet50_by_id = {}
    for result in resnet50_results:
        image_id = result['image_id']
        if image_id not in resnet50_by_id:
            resnet50_by_id[image_id] = []
        resnet50_by_id[image_id].append(result)
    
    # Find image IDs where both models made predictions
    common_ids = list(set(resnet18_by_id.keys()) & set(resnet50_by_id.keys()))
    
    if len(common_ids) == 0:
        print("No common detections found between models!")
        # Try to find any images with at least one model's predictions
        all_ids = list(set(resnet18_by_id.keys()) | set(resnet50_by_id.keys()))
        if len(all_ids) > 0:
            print(f"Showing {min(num_samples, len(all_ids))} images with at least one model's predictions")
            common_ids = all_ids
        else:
            return
    
    # Select random samples to visualize
    if len(common_ids) > num_samples:
        sample_ids = np.random.choice(common_ids, size=num_samples, replace=False)
    else:
        sample_ids = common_ids[:num_samples]
    
    # Create visualizations for each sample
    for idx, image_id in enumerate(sample_ids):
        # Get ground truth for this image
        image, target = dataset[image_id]
        filename = target['orig_filename']
        img_path = img_paths[filename]
        
        # Get results for this image (or empty list if none)
        r18_results = resnet18_by_id.get(image_id, [])
        r50_results = resnet50_by_id.get(image_id, [])
        
        print(f"\nVisualization {idx+1}/{len(sample_ids)}: Image ID {image_id} ({filename})")
        print(f"  ResNet-18: {len(r18_results)} detections, ResNet-50: {len(r50_results)} detections")
        
        # Create side-by-side visualization
        fig = visualize_side_by_side(
            img_path,
            r18_results,
            r50_results,
            target,
            class_names=class_names
        )
        plt.show()



In [None]:
print("\nCreating side-by-side visualizations of ResNet-18 and ResNet-50 predictions...")
create_side_by_side_visualizations(test_dataset, resnet18_results, resnet50_results, num_samples=20)

## Summary and Conclusion

Summarize the evaluation results and compare the performance of the ResNet-18 and ResNet-50 Faster R-CNN models.

In [17]:
import json

def save_results_to_json(results, filename):
    """Save detection results to a JSON file"""
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'w') as f:
        json.dump(results, f, indent=4)
    print(f"Results saved to {filename}")

# Save individual model results
save_results_to_json(resnet18_results, 'e:/ML/VIP_Cup/results/resnet18_results.json')
save_results_to_json(resnet50_results, 'e:/ML/VIP_Cup/results/resnet50_results.json')


Results saved to e:/ML/VIP_Cup/results/resnet18_results.json
Results saved to e:/ML/VIP_Cup/results/resnet50_results.json
