In [None]:
!pip install ultralytics opencv-python-headless

Collecting ultralytics
  Downloading ultralytics-8.2.93-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.6-py3-none-any.whl.metadata (9.1 kB)
Downloading ultralytics-8.2.93-py3-none-any.whl (871 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m871.6/871.6 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading ultralytics_thop-2.0.6-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.2.93 ultralytics-thop-2.0.6


In [None]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO

#check for GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

#load YOLO model
model = YOLO('yolov8n.pt').to(device)

#input video and polygon coordinates
video_path = '/kaggle/input/task2-mp4/Task2.mp4'
cap = cv2.VideoCapture(video_path)

polygon_coords = np.array([[472, 2119], [3384, 2103], [2604, 735], [1436, 711]], dtype=np.int32)

#output video writer
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
output_path = '/kaggle/working/Task2_output.mp4'
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (frame_width, frame_height))

#vehicle counts
vehicle_count = {'car': 0, 'bus': 0, 'truck': 0}

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    #process the video frame by frames
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        #mask for polygon region
        mask = np.zeros(frame.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [polygon_coords], 255)
        rgb_frame = cv2.cvtColor(cv2.bitwise_and(frame, frame, mask=mask), cv2.COLOR_BGR2RGB)

        #YOLO detection
        results = model(rgb_frame)

        frame_count = {'car': 0, 'bus': 0, 'truck': 0}

        for result in results:
            for box in result.boxes:
                if box.conf.item() > 0.5:
                    name = result.names[int(box.cls[0])]
                    if name in frame_count:
                        frame_count[name] += 1
                        vehicle_count[name] += 1
                    #bounding box and label
                    cv2.rectangle(frame, (int(box.xyxy[0][0].item()), int(box.xyxy[0][1].item())),
                                  (int(box.xyxy[0][2].item()), int(box.xyxy[0][3].item())), (0, 255, 0), 2)
                    cv2.putText(frame, f"{name} {box.conf.item():.2f}",
                                (int(box.xyxy[0][0].item()), int(box.xyxy[0][1].item()) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        #display vehicle count on frame
        cv2.putText(frame, f"Cars: {frame_count['car']}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.putText(frame, f"Buses: {frame_count['bus']}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.putText(frame, f"Trucks: {frame_count['truck']}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        out.write(frame)

    cap.release()
    out.release()

    print(f"Output video saved to: {output_path}")
    print(f"Total Cars: {vehicle_count['car']}, Buses: {vehicle_count['bus']}, Trucks: {vehicle_count['truck']}")


Using device: cuda

0: 384x640 8 cars, 1 bus, 1 truck, 10.9ms
Speed: 2.7ms preprocess, 10.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 bus, 11.3ms
Speed: 3.6ms preprocess, 11.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 bus, 1 truck, 11.6ms
Speed: 3.5ms preprocess, 11.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 bus, 12.9ms
Speed: 3.8ms preprocess, 12.9ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 bus, 11.6ms
Speed: 3.6ms preprocess, 11.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 cars, 1 truck, 8.0ms
Speed: 3.3ms preprocess, 8.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 12.1ms
Speed: 4.1ms preprocess, 12.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 8.1ms
Speed: 3.7ms pre