In [None]:
pip install opencv-python opencv-python-headless numpy

In [None]:
pip show torch

In [None]:
pip install torch torchvision torchaudio

In [None]:
pip install torch torchvision opencv-python-headless

# Real-Time Object Detection and Tracking with Faster R-CNN

## Overview

This project demonstrates the use of **Faster R-CNN**, a deep learning model for object detection, to detect and track objects in real-time video streams. By leveraging a pre-trained Faster R-CNN model, we can identify a variety of objects such as people, cars, animals, and more, with high accuracy and efficiency. The model is capable of processing each frame in real-time to detect and draw bounding boxes around the detected objects.

### Key Features:
- **Real-time object detection** using Faster R-CNN.
- **Pre-trained model** (ResNet-50 with Feature Pyramid Networks).
- **Live webcam or video input** for real-time detection.
- **Multiple object classification** from the COCO dataset (80+ categories).
- **Bounding box visualization** with labels for each detected object.

---

## Technology Stack

- **Python**: Primary programming language for implementation.
- **PyTorch**: Deep learning framework for Faster R-CNN model.
- **OpenCV**: Computer vision library for video processing and displaying results.
- **Torchvision**: Provides pre-trained Faster R-CNN model.

---

## Installation

Follow the steps below to set up the project on your local machine:

1. Clone this repository:
   ```bash
   git clone https://github.com/your-username/object-detection.git
   cd object-detection


# Dependencies:

torch
torchvision
opencv-python
Pillow
numpy

## Workflow

Input Frames: Video frames are captured using OpenCV from a webcam or video file.
Preprocessing: Frames are converted into tensors for model input.
Object Detection: The Faster R-CNN model detects objects, outputs bounding boxes, labels, and confidence scores.
Visualization: Detected objects are highlighted with bounding boxes and labels using OpenCV.
Display & Exit: The processed frames are displayed in real-time, and the program can be exited with the Esc key or by closing the window.

Features -

Real-Time Detection: Processes frames sequentially for live object detection.
Accuracy: Utilizes a pre-trained COCO dataset model for detecting 80+ object classes.
Applications: Useful in surveillance, autonomous vehicles, and smart camera systems.


In [1]:
import cv2
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F

# Load pre-trained Faster R-CNN model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.to(device)

# Define COCO dataset classes
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
    'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
    'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A',
    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv',
    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
    'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors',
    'teddy bear', 'hair drier', 'toothbrush'
]

# Function to draw bounding boxes and labels
def draw_boxes(image, boxes, labels, scores, threshold=0.5):
    for i, box in enumerate(boxes):
        if scores[i] > threshold:
            label = COCO_INSTANCE_CATEGORY_NAMES[labels[i]]
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    return image

# Open webcam or video file
cap = cv2.VideoCapture(0)  # Use 'filename.mp4' for video file

if not cap.isOpened():
    print("Error: Unable to open video source")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Unable to read frame from video source")
        break

    # Preprocess the frame
    input_tensor = F.to_tensor(frame).unsqueeze(0).to(device)

    # Perform object detection
    with torch.no_grad():
        outputs = model(input_tensor)

    # Parse the outputs
    boxes = outputs[0]['boxes'].cpu().numpy()
    labels = outputs[0]['labels'].cpu().numpy()
    scores = outputs[0]['scores'].cpu().numpy()

    # Draw the bounding boxes on the frame
    frame = draw_boxes(frame, boxes, labels, scores)

    # Display the frame
    cv2.imshow("Object Detection", frame)

    # Close the window on 'Esc' key press or 'X' button click
    key = cv2.waitKey(1) & 0xFF
    if key == 27 or cv2.getWindowProperty("Object Detection", cv2.WND_PROP_VISIBLE) < 1:
        break

# Release resources and close windows
cap.release()
cv2.destroyAllWindows()

