In [1]:
import cv2
from ultralytics import YOLO

# Load a pre-trained YOLOv8 model
model = YOLO('yolov8n.pt')  # You can choose different versions like yolov8s.pt, yolov8m.pt, etc.

# Initialize video capture
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open video stream.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Perform detection
    results = model(frame)

    # Draw bounding boxes
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            score = box.conf[0]
            label = int(box.cls[0])
            
            # Check if the detected object is a person (class label 0 in COCO dataset)
            if label == 0:
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('Real-Time Person Detection', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the window
cap.release()
cv2.destroyAllWindows()



0: 480x640 1 person, 55.4ms
Speed: 4.1ms preprocess, 55.4ms inference, 17.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 16.1ms
Speed: 2.4ms preprocess, 16.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 17.0ms
Speed: 4.2ms preprocess, 17.0ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 16.2ms
Speed: 2.2ms preprocess, 16.2ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 16.1ms
Speed: 1.7ms preprocess, 16.1ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 16.2ms
Speed: 2.4ms preprocess, 16.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 15.9ms
Speed: 2.1ms preprocess, 15.9ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 16.5ms
Speed: 1.9ms preprocess, 16.5ms inference, 1.1ms postprocess per image at shape (1, 3,