In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load the YOLO model
model = YOLO("yolov8n-seg.pt")  # load an official model

# Open the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to capture image.")
        break

    # Perform prediction
    results = model(frame)

    # Get the image with predictions
    img = results[0].plot()

    # Create a heatmap
    heatmap = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)

    # Overlay the prediction masks onto the heatmap
    if results[0].masks is not None:
        for mask in results[0].masks.data:
            mask = mask.cpu().numpy().squeeze()  # Convert mask to numpy array and remove single-dimensional entries
            mask_resized = cv2.resize(mask, (heatmap.shape[1], heatmap.shape[0]))  # Resize mask to match heatmap size
            heatmap = cv2.add(heatmap, mask_resized.astype(np.float32))

    # Normalize the heatmap
    heatmap = cv2.normalize(heatmap, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Convert heatmap to RGB
    heatmap_img = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)

    # Combine the original image with the heatmap
    overlay_img = cv2.addWeighted(img, 0.6, heatmap_img, 0.4, 0)

    # Display the result
    cv2.imshow('Original Image with Predictions', img)
    cv2.imshow('Heatmap Overlay', overlay_img)

    # Press 'q' to quit the webcam stream
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...
100%|██████████| 6.73M/6.73M [00:00<00:00, 25.1MB/s]

0: 480x640 (no detections), 97.3ms
Speed: 1.8ms preprocess, 97.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 76.9ms
Speed: 1.4ms preprocess, 76.9ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 71.7ms
Speed: 1.3ms preprocess, 71.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 93.6ms
Speed: 1.2ms preprocess, 93.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.4ms
Speed: 1.0ms preprocess, 63.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 62.2ms
Speed: 1.2ms preprocess, 62.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 66.7ms
Speed: 1.1ms preprocess, 66.7ms inferenc

KeyboardInterrupt: 

: 