In [None]:
import cv2
import numpy as np
import torch
import torchvision.models.detection as models_detection
import torchvision.transforms as transforms
from google.colab.patches import cv2_imshow

# Load the pre-trained object detection model (e.g., YOLO or Faster R-CNN)
model = models_detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Create a function to preprocess image frames
def preprocess_image(frame):
    transform = transforms.Compose([transforms.ToTensor()])
    return transform(frame).unsqueeze(0)

# Initialize video capture
cap = cv2.VideoCapture('video.mp4')

# Initialize object tracker (e.g., using OpenCV's built-in tracker)
tracker = cv2.TrackerKCF_create()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    input_tensor = preprocess_image(frame)

    # Run object detection
    with torch.no_grad():
        output = model(input_tensor)

    # Extract bounding boxes and labels
    boxes = output[0]['boxes'].numpy()
    labels = output[0]['labels'].numpy()

    for box, label in zip(boxes, labels):
        x, y, w, h = map(int, box)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(frame, f'Label: {label}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame with bounding boxes
    cv2_imshow(frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()
