In [2]:
! pip install ultralytics
from ultralytics import YOLO
import cv2

# Load the YOLOv5 model with explicit weight file
model = YOLO("yolov5su.pt")

def detect_and_count_objects(frame, target_classes=None):
    """
    Detect objects in the given frame, count specific classes if specified.
    
    Parameters:
    - frame: image array
    - target_classes: list of class names to count, e.g., ['person', 'car']
    
    Returns:
    - frame with detections drawn
    - dictionary of object counts by class
    """
    results = model(frame)
    detections = results[0].boxes.data  # Extract detection data
    
    # Count occurrences of specified target classes
    counts = {}
    for *box, conf, cls in detections:
        class_name = model.names[int(cls)]  # Get class name
        if target_classes is None or class_name in target_classes:
            counts[class_name] = counts.get(class_name, 0) + 1
            # Draw bounding box and label on frame
            x1, y1, x2, y2 = map(int, box)  # Bounding box coordinates
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{class_name} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    return frame, counts

def process_video(video_path, output_path="output.mp4", target_classes=None):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties for output
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Initialize VideoWriter with mp4 codec
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        processed_frame, counts = detect_and_count_objects(frame, target_classes)
        
        # Draw the count overlay on the video frame
        overlay_text = " | ".join([f"{cls}: {cnt}" for cls, cnt in counts.items()])
        cv2.putText(processed_frame, overlay_text, (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
        
        # Write the frame to the output video
        out.write(processed_frame)
        
        # Display the processed frame
        cv2.imshow("Video Detections", processed_frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    
    # Release everything
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# Example usage
target_classes = ['person', 'car']  # Specify classes you want to count, or use None to detect all classes
process_video("6366_vehicle_transport_transportation_170609ADelhi017720p5000br.mp4", "output.mp4", target_classes)



Collecting ultralytics
  Obtaining dependency information for ultralytics from https://files.pythonhosted.org/packages/9e/20/fcef1ebb10c8eb739d8ae8c3ead86c89d78c0121c23c5df5b85671ef3ef6/ultralytics-8.3.23-py3-none-any.whl.metadata
  Downloading ultralytics-8.3.23-py3-none-any.whl.metadata (35 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Obtaining dependency information for torch>=1.8.0 from https://files.pythonhosted.org/packages/78/18/7a2e56e2dc45a433dea9e1bf46a65e234294c9c470ccb4d4b53025f57b23/torch-2.5.0-cp311-cp311-win_amd64.whl.metadata
  Downloading torch-2.5.0-cp311-cp311-win_amd64.whl.metadata (28 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Obtaining dependency information for torchvision>=0.9.0 from https://files.pythonhosted.org/packages/b2/1b/b8eb51f87626c125cfa81f07488ab277e68e1c021c6cf2750d779eb61358/torchvision-0.20.0-cp311-cp311-win_amd64.whl.metadata
  Downloading torchvision-0.20.0-cp311-cp311-win_amd64.whl.metadata (6.2 kB)
Collecting ultralytics-thop>=

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt to 'yolov5su.pt'...


100%|█████████████████████████████████████████████████████████████████████████████| 17.7M/17.7M [00:04<00:00, 3.94MB/s]



0: 384x640 3 persons, 10 cars, 2 motorcycles, 1 train, 1 truck, 379.7ms
Speed: 0.0ms preprocess, 379.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 10 cars, 2 motorcycles, 1 train, 2 trucks, 188.0ms
Speed: 9.9ms preprocess, 188.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 9 cars, 1 motorcycle, 1 train, 1 truck, 162.1ms
Speed: 10.7ms preprocess, 162.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 9 cars, 3 motorcycles, 1 train, 1 truck, 192.0ms
Speed: 0.0ms preprocess, 192.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 10 cars, 3 motorcycles, 1 truck, 223.2ms
Speed: 0.0ms preprocess, 223.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 9 cars, 3 motorcycles, 1 train, 1 truck, 194.0ms
Speed: 2.3ms preprocess, 194.0ms inference, 8.7ms postprocess per image at shape (1


0: 384x640 4 persons, 10 cars, 2 motorcycles, 184.7ms
Speed: 0.9ms preprocess, 184.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 10 cars, 3 motorcycles, 1 truck, 155.4ms
Speed: 1.0ms preprocess, 155.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 2 motorcycles, 1 truck, 177.3ms
Speed: 2.5ms preprocess, 177.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 12 cars, 3 motorcycles, 1 truck, 174.4ms
Speed: 1.8ms preprocess, 174.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 11 cars, 2 motorcycles, 1 truck, 191.3ms
Speed: 0.0ms preprocess, 191.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 12 cars, 2 motorcycles, 1 truck, 127.3ms
Speed: 0.0ms preprocess, 127.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 10 cars, 2 moto


0: 384x640 3 persons, 7 cars, 3 motorcycles, 1 truck, 159.4ms
Speed: 0.0ms preprocess, 159.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 8 cars, 3 motorcycles, 1 truck, 173.2ms
Speed: 2.0ms preprocess, 173.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 8 cars, 2 motorcycles, 1 truck, 166.9ms
Speed: 0.0ms preprocess, 166.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 9 cars, 3 motorcycles, 1 truck, 175.4ms
Speed: 0.0ms preprocess, 175.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 9 cars, 2 motorcycles, 158.2ms
Speed: 0.0ms preprocess, 158.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 10 cars, 2 motorcycles, 143.4ms
Speed: 0.0ms preprocess, 143.4ms inference, 15.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 2 motorcycles, 137.


0: 384x640 1 person, 6 cars, 1 motorcycle, 1 truck, 216.3ms
Speed: 0.0ms preprocess, 216.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 6 cars, 1 motorcycle, 1 truck, 232.4ms
Speed: 0.0ms preprocess, 232.4ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 6 cars, 1 motorcycle, 1 truck, 231.6ms
Speed: 2.0ms preprocess, 231.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 motorcycle, 1 truck, 204.4ms
Speed: 2.2ms preprocess, 204.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 6 cars, 1 motorcycle, 1 truck, 257.0ms
Speed: 2.0ms preprocess, 257.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 6 cars, 1 motorcycle, 1 truck, 217.8ms
Speed: 0.0ms preprocess, 217.8ms inference, 12.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 7 cars, 1 motorcycle, 1


0: 384x640 1 person, 6 cars, 1 motorcycle, 224.1ms
Speed: 0.0ms preprocess, 224.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 motorcycle, 298.3ms
Speed: 0.0ms preprocess, 298.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 1 motorcycle, 332.2ms
Speed: 0.0ms preprocess, 332.2ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 motorcycle, 227.2ms
Speed: 2.2ms preprocess, 227.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 motorcycle, 234.2ms
Speed: 0.0ms preprocess, 234.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 motorcycle, 219.4ms
Speed: 0.0ms preprocess, 219.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 motorcycle, 287.1ms
Speed: 0.0ms preprocess, 287.1ms inference, 1.9m


0: 384x640 1 person, 2 cars, 2 motorcycles, 205.3ms
Speed: 0.0ms preprocess, 205.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 motorcycle, 225.8ms
Speed: 0.7ms preprocess, 225.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 2 motorcycles, 277.1ms
Speed: 1.7ms preprocess, 277.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 motorcycle, 292.4ms
Speed: 0.0ms preprocess, 292.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 motorcycle, 239.0ms
Speed: 0.0ms preprocess, 239.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 1 motorcycle, 215.3ms
Speed: 2.0ms preprocess, 215.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 motorcycle, 244.9ms
Speed: 0.0ms preprocess, 244.9ms inference, 0.0ms postpr


0: 384x640 3 cars, 214.7ms
Speed: 2.0ms preprocess, 214.7ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 cars, 312.8ms
Speed: 6.0ms preprocess, 312.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 motorcycle, 256.9ms
Speed: 0.0ms preprocess, 256.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 motorcycle, 246.0ms
Speed: 0.0ms preprocess, 246.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 motorcycle, 279.0ms
Speed: 2.0ms preprocess, 279.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 motorcycle, 236.6ms
Speed: 0.0ms preprocess, 236.6ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 motorcycle, 362.1ms
Speed: 0.0ms preprocess, 362.1ms inference, 15.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 motorcycle, 236.3

Speed: 0.0ms preprocess, 410.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 299.1ms
Speed: 0.0ms preprocess, 299.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 334.4ms
Speed: 0.0ms preprocess, 334.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 363.0ms
Speed: 0.0ms preprocess, 363.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 284.6ms
Speed: 0.0ms preprocess, 284.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 427.4ms
Speed: 0.0ms preprocess, 427.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 355.1ms
Speed: 0.0ms preprocess, 355.1ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 303.8ms
Speed: 3.8ms preprocess, 303.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 264.2ms



0: 384x640 1 bench, 291.4ms
Speed: 2.1ms preprocess, 291.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bench, 1 potted plant, 294.7ms
Speed: 3.0ms preprocess, 294.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 245.7ms
Speed: 3.0ms preprocess, 245.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 208.3ms
Speed: 2.4ms preprocess, 208.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 potted plant, 214.5ms
Speed: 2.4ms preprocess, 214.5ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 207.5ms
Speed: 0.9ms preprocess, 207.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 243.4ms
Speed: 2.9ms preprocess, 243.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 potted plant, 274.5ms
Speed: 2.1ms preproc