In [2]:
#!/usr/bin/env python3
import os
import time
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import cv2
import requests
from PIL import Image
from io import BytesIO
import pandas as pd
import seaborn as sns
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.transforms import functional as F
from torchvision.utils import draw_bounding_boxes

In [3]:
# COCO class names
COCO_CLASSES = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
    'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
    'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
    'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]


In [4]:
# Create directory for images and results
os.makedirs('data', exist_ok=True)
os.makedirs('results', exist_ok=True)

In [5]:
def download_coco_sample_images(num_images=5):
    """Download sample images from COCO dataset"""
    # Use more reliable image sources
    image_urls = [
        "https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg",        # Person
        "https://github.com/ultralytics/yolov5/raw/master/data/images/bus.jpg",           # Bus
        "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/horses.jpg", # Horses
        "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/coco.jpg",   # Mixed objects
        "https://github.com/pjreddie/darknet/raw/master/data/dog.jpg"                     # Dog
    ]

    image_paths = []
    for i, url in enumerate(image_urls[:num_images]):
        try:
            print(f"Downloading image {i+1} from {url}")
            response = requests.get(url, stream=True)
            response.raise_for_status()  # Raise exception for HTTP errors

            save_path = f"data/image_{i+1}.jpg"

            # Save the image directly from response content
            with open(save_path, 'wb') as f:
                f.write(response.content)

            # Verify the image can be opened
            try:
                img = Image.open(save_path)
                img.verify()  # Verify it's a valid image
                image_paths.append(save_path)
                print(f"Successfully downloaded image {i+1} to {save_path}")
            except Exception as e:
                print(f"Downloaded file is not a valid image: {e}")
                continue

        except Exception as e:
            print(f"Error downloading image {i+1}: {e}")

    if not image_paths:
        # Fallback to local sample images if all downloads fail
        print("All downloads failed. Using local sample images or creating dummy images.")
        for i in range(num_images):
            # Create a simple colored image with text
            img = np.ones((640, 640, 3), dtype=np.uint8) * 255  # White background
            img = cv2.putText(img, f"Sample Image {i+1}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

            save_path = f"data/image_{i+1}.jpg"
            cv2.imwrite(save_path, img)
            image_paths.append(save_path)
            print(f"Created dummy image: {save_path}")

    return image_paths

def load_yolov5_model():
    """Load YOLOv5 model from PyTorch Hub"""
    print("Loading YOLOv5 model...")
    try:
        # First try to load directly from pytorch hub
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, trust_repo=True)
    except Exception as e:
        print(f"Error loading from hub directly: {e}")
        # Fallback to loading from GitHub
        try:
            print("Trying to load from GitHub...")
            model = torch.hub.load('ultralytics/yolov5:master', 'yolov5s', pretrained=True, force_reload=True, trust_repo=True)
        except Exception as e2:
            print(f"Error loading from GitHub: {e2}")
            # Final fallback - clone the repo manually
            print("Fallback: Cloning repository and loading locally...")
            import subprocess
            subprocess.run(["git", "clone", "https://github.com/ultralytics/yolov5.git"], check=True)
            import sys
            sys.path.append("yolov5")
            from models.experimental import attempt_load
            model = attempt_load('yolov5s.pt')

    model.eval()
    return model

def load_faster_rcnn_model():
    """Load Faster R-CNN model from torchvision"""
    print("Loading Faster R-CNN model...")
    try:
        # Try the latest API first
        weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
        model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.7)
    except Exception as e:
        print(f"Warning: Could not load V2 model, falling back to V1: {e}")
        # Fallback to older model version
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        model.roi_heads.score_thresh = 0.7

        # Create a simple transform function
        def transform(image):
            return torchvision.transforms.functional.to_tensor(image)
        return model, transform

    model.eval()
    return model, weights.transforms()

def predict_yolo(model, img_path):
    """Run prediction with YOLOv5 model"""
    try:
        start_time = time.time()
        results = model(img_path)
        inference_time = time.time() - start_time

        # Get predictions
        pred = results.pandas().xyxy[0]
        return pred, inference_time
    except Exception as e:
        print(f"Error in YOLOv5 prediction: {e}")
        # Return empty DataFrame and zero inference time in case of error
        return pd.DataFrame(), 0.0

def predict_faster_rcnn(model, transform, img_path):
    """Run prediction with Faster R-CNN model"""
    try:
        image = Image.open(img_path).convert("RGB")
        tensor_image = transform(image).unsqueeze(0)

        if torch.cuda.is_available():
            tensor_image = tensor_image.to('cuda')
            model = model.to('cuda')

        start_time = time.time()
        with torch.no_grad():
            predictions = model(tensor_image)
        inference_time = time.time() - start_time

        # Convert predictions to DataFrame format similar to YOLOv5
        boxes = predictions[0]['boxes'].cpu().numpy()
        scores = predictions[0]['scores'].cpu().numpy()
        labels = predictions[0]['labels'].cpu().numpy()

        results = []
        for box, score, label in zip(boxes, scores, labels):
            if score > 0.7:  # Filter by confidence
                x1, y1, x2, y2 = box
                label_idx = int(label) - 1  # Adjust for 0-indexing

                # Handle out of range indices
                if label_idx < 0 or label_idx >= len(COCO_CLASSES):
                    class_name = f"unknown_{label_idx}"
                else:
                    class_name = COCO_CLASSES[label_idx]

                results.append({
                    'xmin': x1,
                    'ymin': y1,
                    'xmax': x2,
                    'ymax': y2,
                    'confidence': float(score),
                    'class': label_idx,
                    'name': class_name
                })

        return pd.DataFrame(results), inference_time

    except Exception as e:
        print(f"Error in Faster R-CNN prediction: {e}")
        return pd.DataFrame(), 0.0

def visualize_results(img_path, yolo_pred, rcnn_pred, img_id):
    """Visualize detection results"""
    # Load image
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Create copies for visualization
    img_yolo = image.copy()
    img_rcnn = image.copy()

    # Draw YOLO predictions
    for _, row in yolo_pred.iterrows():
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        label = f"{row['name']}: {row['confidence']:.2f}"
        cv2.rectangle(img_yolo, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img_yolo, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Draw Faster R-CNN predictions
    for _, row in rcnn_pred.iterrows():
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        label = f"{row['name']}: {row['confidence']:.2f}"
        cv2.rectangle(img_rcnn, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(img_rcnn, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Create comparison figure
    plt.figure(figsize=(15, 10))

    plt.subplot(1, 3, 1)
    plt.imshow(image)
    plt.title("Original Image")
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(img_yolo)
    plt.title("YOLOv5 Detection")
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.imshow(img_rcnn)
    plt.title("Faster R-CNN Detection")
    plt.axis('off')

    plt.savefig(f"results/comparison_{img_id}.jpg", bbox_inches='tight')
    plt.close()

def compare_metrics(yolo_results, rcnn_results):
    """Compare performance metrics between models"""
    metrics = {
        'Model': ['YOLOv5', 'Faster R-CNN'],
        'Avg Inference Time (s)': [
            np.mean([result['inference_time'] for result in yolo_results]),
            np.mean([result['inference_time'] for result in rcnn_results])
        ],
        'Avg Objects Detected': [
            np.mean([len(result['predictions']) for result in yolo_results]),
            np.mean([len(result['predictions']) for result in rcnn_results])
        ],
        'Avg Confidence': [
            np.mean([result['predictions']['confidence'].mean() if not result['predictions'].empty else 0
                    for result in yolo_results]),
            np.mean([result['predictions']['confidence'].mean() if not result['predictions'].empty else 0
                    for result in rcnn_results])
        ]
    }

    # Create DataFrame and visualize
    df = pd.DataFrame(metrics)
    print("\nPerformance Comparison:")
    print(df)

    # Generate comparison chart
    plt.figure(figsize=(12, 6))

    # Inference time comparison
    plt.subplot(1, 2, 1)
    sns.barplot(x='Model', y='Avg Inference Time (s)', data=df)
    plt.title('Average Inference Time (seconds)')
    plt.ylabel('Time (s)')

    # Objects detected comparison
    plt.subplot(1, 2, 2)
    sns.barplot(x='Model', y='Avg Objects Detected', data=df)
    plt.title('Average Objects Detected')
    plt.ylabel('Count')

    plt.tight_layout()
    plt.savefig("results/metrics_comparison.jpg")

    return df

def main():
    print("Comparing YOLOv5 and Faster R-CNN for object detection")

    # Check for GPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # Download sample images
    try:
        image_paths = download_coco_sample_images()
        if not image_paths:
            print("Failed to download or create any images. Exiting.")
            return
    except Exception as e:
        print(f"Error downloading images: {e}")
        return

    # Load models
    try:
        print("\n--- Loading Models ---")
        yolo_model = load_yolov5_model()
        rcnn_model, rcnn_transform = load_faster_rcnn_model()
    except Exception as e:
        print(f"Error loading models: {e}")
        return

    # Process images with both models
    yolo_results = []
    rcnn_results = []

    print("\n--- Running Inference ---")
    for i, img_path in enumerate(image_paths):
        print(f"\nProcessing image {i+1}: {img_path}")

        try:
            # YOLOv5 prediction
            yolo_pred, yolo_time = predict_yolo(yolo_model, img_path)
            print(f"YOLOv5 detected {len(yolo_pred)} objects in {yolo_time:.4f} seconds")
            yolo_results.append({
                'image_id': i+1,
                'predictions': yolo_pred,
                'inference_time': yolo_time
            })

            # Faster R-CNN prediction
            rcnn_pred, rcnn_time = predict_faster_rcnn(rcnn_model, rcnn_transform, img_path)
            print(f"Faster R-CNN detected {len(rcnn_pred)} objects in {rcnn_time:.4f} seconds")
            rcnn_results.append({
                'image_id': i+1,
                'predictions': rcnn_pred,
                'inference_time': rcnn_time
            })

            # Only visualize if both models produced results
            if not yolo_pred.empty or not rcnn_pred.empty:
                try:
                    visualize_results(img_path, yolo_pred, rcnn_pred, i+1)
                    print(f"Visualization saved for image {i+1}")
                except Exception as e:
                    print(f"Error visualizing results for image {i+1}: {e}")
        except Exception as e:
            print(f"Error processing image {i+1}: {e}")

    # Skip metrics comparison if no results
    if not yolo_results or not rcnn_results:
        print("No valid results to compare. Exiting.")
        return

    print("\n--- Comparing Metrics ---")
    try:
        # Compare metrics
        metrics_df = compare_metrics(yolo_results, rcnn_results)

        if metrics_df['Avg Inference Time (s)'][0] > 0:
            speed_ratio = metrics_df['Avg Inference Time (s)'][1] / metrics_df['Avg Inference Time (s)'][0]
            print(f"\nComparison Summary:")
            print(f"- YOLOv5 is {speed_ratio:.2f}x faster than Faster R-CNN")

        # Calculate class distribution
        yolo_classes = []
        rcnn_classes = []

        for result in yolo_results:
            if not result['predictions'].empty and 'name' in result['predictions'].columns:
                yolo_classes.extend(result['predictions']['name'].tolist())

        for result in rcnn_results:
            if not result['predictions'].empty and 'name' in result['predictions'].columns:
                rcnn_classes.extend(result['predictions']['name'].tolist())

        if yolo_classes:
            print("\nTop classes detected by YOLOv5:", pd.Series(yolo_classes).value_counts().head(5).to_dict())
        if rcnn_classes:
            print("Top classes detected by Faster R-CNN:", pd.Series(rcnn_classes).value_counts().head(5).to_dict())

        print("\nResults saved in the 'results' directory")
    except Exception as e:
        print(f"Error in metrics comparison: {e}")



In [6]:
if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"Unhandled error: {e}")

Comparing YOLOv5 and Faster R-CNN for object detection
Using device: cuda
Downloading image 1 from https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg
Successfully downloaded image 1 to data/image_1.jpg
Downloading image 2 from https://github.com/ultralytics/yolov5/raw/master/data/images/bus.jpg
Successfully downloaded image 2 to data/image_2.jpg
Downloading image 3 from https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/horses.jpg
Error downloading image 3: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/horses.jpg
Downloading image 4 from https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/coco.jpg
Error downloading image 4: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/coco.jpg
Downloading image 5 from https://github.com/pjreddie/darknet/raw/master/data/dog.jpg
Successfully downloaded image 5 to data/

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-4-16 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Loading Faster R-CNN model...


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:01<00:00, 117MB/s]



--- Running Inference ---

Processing image 1: data/image_1.jpg


  with amp.autocast(autocast):


YOLOv5 detected 4 objects in 0.4918 seconds
Faster R-CNN detected 5 objects in 0.5442 seconds
Visualization saved for image 1

Processing image 2: data/image_2.jpg
YOLOv5 detected 5 objects in 0.0620 seconds


  with amp.autocast(autocast):


Faster R-CNN detected 7 objects in 0.1845 seconds
Visualization saved for image 2

Processing image 3: data/image_5.jpg
YOLOv5 detected 3 objects in 0.0420 seconds
Faster R-CNN detected 4 objects in 0.1302 seconds


  with amp.autocast(autocast):


Visualization saved for image 3

--- Comparing Metrics ---

Performance Comparison:
          Model  Avg Inference Time (s)  Avg Objects Detected  Avg Confidence
0        YOLOv5                0.198565              4.000000        0.711262
1  Faster R-CNN                0.286287              5.333333        0.929349

Comparison Summary:
- YOLOv5 is 1.44x faster than Faster R-CNN

Top classes detected by YOLOv5: {'person': 6, 'tie': 2, 'bus': 1, 'dog': 1, 'car': 1}
Top classes detected by Faster R-CNN: {'person': 6, 'snowboard': 4, 'bus': 1, 'traffic light': 1, 'bicycle': 1}

Results saved in the 'results' directory
