In [None]:
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torchvision.transforms as T
import torch
from google.colab.patches import cv2_imshow
import cv2



In [None]:
# Load the Faster R-CNN model pretrained on COCO and move it to GPU if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
model.eval()



In [None]:
# Load video and set up video capture (replace 'your_video.avi' with your video file)
video_path = '/content/traffic.avi'
cap = cv2.VideoCapture(video_path)

In [None]:

# Define the codec and create a VideoWriter object to save the output video in .avi format
output_path = '/content/output_video_with_boxes_final.avi'
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Use 'XVID' codec for .avi
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Get the original frame rate
frame_size = (int(cap.get(3)), int(cap.get(4)))
out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)


In [None]:
# Create a transform to preprocess frames
transform = T.Compose([T.ToPILImage(), T.ToTensor()])
# Process every frame (no frame skipping)
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Preprocess frame and move to GPU
    input_tensor = transform(frame).unsqueeze(0).to(device)

    # Perform object detection
    with torch.no_grad():
        prediction = model(input_tensor)

    # Extract bounding boxes and scores
    boxes = prediction[0]['boxes']
    scores = prediction[0]['scores']


In [None]:
 # Filter detections based on a confidence threshold
    confidence_threshold = 0.64
    filtered_indices = [i for i, score in enumerate(scores) if score >= confidence_threshold]

    # Draw bounding boxes for detected cars
    for i in filtered_indices:
        box = boxes[i].cpu().numpy().astype(int)
        confidence = scores[i].cpu().numpy()
        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        cv2.putText(frame, f'Car: {confidence:.2f}', (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write the frame with bounding boxes to the output video
    out.write(frame)

    # Display the frame with bounding boxes using cv2_imshow
    cv2_imshow(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()  # Release the VideoWriter
cv2.destroyAllWindows()