In [3]:
import cv2
import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import time

# Load the pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Open the video
video_path = 'vids\pexels-keira-burton-6145681 (540p).mp4'  # Replace with your video file path
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception("Error opening video file")

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter('output_video.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))


count_all_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Total frames: {count_all_frames}")
# Define the transform to convert frame to tensor
transform = T.Compose([T.ToTensor()])

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    time_counter_start = time.time()

    # Convert frame to tensor and move to the device
    frame_tensor = transform(frame).unsqueeze(0).to(device)

    with torch.no_grad():
        predictions = model(frame_tensor)

    # Draw bounding boxes
    for element in range(len(predictions[0]['boxes'])):
        if predictions[0]['labels'][element] == 1:  # Label 1 corresponds to 'person'
            box = predictions[0]['boxes'][element].cpu().numpy()
            score = predictions[0]['scores'][element].cpu().numpy()
            if score > 0.5:  # Consider detections with a confidence score above 0.5
                cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)

    # Write the processed frame
    out.write(frame)
    
    time_counter_end = time.time()
    total_time = time_counter_end - time_counter_start
    fps = 1 / total_time
    print(f"Processed frame {count_all_frames}/{cap.get(1)}, Time: {total_time:.3f}, FPS: {fps:.3f}")

# Release everything when done
cap.release()
out.release()
cv2.destroyAllWindows()


Total frames: 240
Processed frame 240/1.0, Time: 0.203, FPS: 4.932
Processed frame 240/2.0, Time: 0.159, FPS: 6.282
Processed frame 240/3.0, Time: 0.152, FPS: 6.580
Processed frame 240/4.0, Time: 0.193, FPS: 5.193
Processed frame 240/5.0, Time: 0.160, FPS: 6.251
Processed frame 240/6.0, Time: 0.152, FPS: 6.558
Processed frame 240/7.0, Time: 0.155, FPS: 6.465
Processed frame 240/8.0, Time: 0.149, FPS: 6.722
Processed frame 240/9.0, Time: 0.153, FPS: 6.526
Processed frame 240/10.0, Time: 0.149, FPS: 6.730
Processed frame 240/11.0, Time: 0.149, FPS: 6.714
Processed frame 240/12.0, Time: 0.154, FPS: 6.506
Processed frame 240/13.0, Time: 0.164, FPS: 6.088
Processed frame 240/14.0, Time: 0.156, FPS: 6.422
Processed frame 240/15.0, Time: 0.154, FPS: 6.489
Processed frame 240/16.0, Time: 0.149, FPS: 6.706
Processed frame 240/17.0, Time: 0.160, FPS: 6.257
Processed frame 240/18.0, Time: 0.156, FPS: 6.418
Processed frame 240/19.0, Time: 0.157, FPS: 6.371
Processed frame 240/20.0, Time: 0.157, FP