In [None]:
import torch
import torchvision
from torchvision.transforms import functional as F
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle',
    'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
    'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
    'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
    'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def detect_objects(image_path, confidence_threshold=0.5):
    # Load image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image from '{image_path}'. Please check the file path and ensure the image exists.")
        return None

    original_image = image.copy()
    #convert image to tensor and normalize
    image_tensor=F.to_tensor(image)

    with torch.no_grad():
        predictions=model([image_tensor])
    boxes=predictions[0]['boxes'].cpu().numpy()
    labels=predictions[0]['labels'].cpu().numpy()
    scores=predictions[0]['scores'].cpu().numpy()
    for i, box in enumerate(boxes):
        if scores[i] >= confidence_threshold:
            label = COCO_INSTANCE_CATEGORY_NAMES[labels[i]]
            score = scores[i]
            start_point = (int(box[0]), int(box[1]))
            end_point=(int(box[2]), int(box[3]))
            cv2.rectangle(original_image, start_point, end_point, (0, 255, 0), 2)
            cv2.putText(original_image, f"{label}: {score:.2f}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    return original_image

if __name__ == "__main__":
    IMAGE_PATH = "/content/cars.jpg"  # Replace with your image path
    detected_image = detect_objects(IMAGE_PATH)

    # Display the output
    if detected_image is not None:
        cv2_imshow(detected_image)


In [None]:
import torch
import torchvision
from torchvision.transforms import functional as F
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import os

# Load the Faster R-CNN model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle',
    'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
    'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
    'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
    'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def detect_objects(image_path, confidence_threshold=0.5, output_path=None, filter_classes=None):

    # Load image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image from '{image_path}'. Please check the file path and ensure the image exists.")
        return None

    # Resize image for consistency (optional)
    resized_image = cv2.resize(image, (800, 800))
    original_image = resized_image.copy()

    # Convert image to tensor and normalize
    image_tensor = F.to_tensor(resized_image).unsqueeze(0).to(device)

    with torch.no_grad():
        predictions = model(image_tensor)

    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()

    for i, box in enumerate(boxes):
        if scores[i] >= confidence_threshold:
            label = COCO_INSTANCE_CATEGORY_NAMES[labels[i]]

            # Skip if filtering specific classes
            if filter_classes and label not in filter_classes:
                continue

            score = scores[i]
            start_point = (int(box[0]), int(box[1]))
            end_point = (int(box[2]), int(box[3]))
            color = tuple(np.random.randint(0, 255, 3).tolist())  # Random color for each class
            cv2.rectangle(original_image, start_point, end_point, color, 2)
            cv2.putText(original_image, f"{label}: {score:.2f}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Save output image if a path is provided
    if output_path:
        cv2.imwrite(output_path, original_image)
        print(f"Output saved to: {output_path}")

    return original_image

if __name__ == "__main__":
    IMAGE_PATH = "/content/cars.jpg"  # Replace with your image path
    OUTPUT_PATH = "/content/cars_detected.jpg"  # Replace with desired output path

    # Detect objects with GPU support and save the result
    detected_image = detect_objects(
        IMAGE_PATH,
        confidence_threshold=0.6,
        output_path=OUTPUT_PATH,
        filter_classes=['car', 'truck']  # Optional: Filter to detect only specific objects
    )

    # Display the output
    if detected_image is not None:
        cv2_imshow(detected_image)





  """
    Detect objects in an image using a pre-trained Faster R-CNN model.

    Parameters:
    - image_path: Path to the input image.
    - confidence_threshold: Confidence score threshold for displaying detections.
    - output_path: Optional path to save the annotated output image.
    - filter_classes: List of class names to filter detections (default: None for all classes).

    Returns:
    - Annotated image with detected objects drawn on it.
    """


In [None]:
# Key Changes:
# GPU Support: The model now uses GPU if available for faster inference (device variable).
# Input Image Resizing: The input image is resized to a consistent size for better model performance.
# Output Image Saving: Option to save the detected image to a specified path.
# Class Filtering: Added support for filtering detections by specific object classes.
# Distinct Visualization: Different colors for bounding boxes make it easier to differentiate objects.
# Improved Logging: Informative messages for errors and successful image saving.