In [1]:
pip install ultralytics opencv-python numpy


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLOv8 segmentation model
yolo_model = YOLO("yolov8n-seg.pt")

# Open webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Convert frame to RGB for YOLO processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Apply YOLO segmentation
    results = yolo_model(rgb_frame)

    # Create a blank mask (single channel)
    silhouette_mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint8)

    # Process YOLO segmentation results
    for result in results:
        if result.masks is not None:  
            for seg in result.masks.xy:
                pts = np.array(seg, dtype=np.int32)
                cv2.fillPoly(silhouette_mask, [pts], 255)  # Use 255 for white mask

    # Convert single-channel mask to 3 channels
    silhouette_mask_colored = cv2.cvtColor(silhouette_mask, cv2.COLOR_GRAY2BGR)

    # Apply mask to frame
    silhouette_output = cv2.bitwise_and(frame, silhouette_mask_colored)

    # Display the mask and silhouette output
    cv2.imshow("Mask Overlay", silhouette_mask)
    cv2.imshow("Silhouette Output", silhouette_output)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()



0: 480x640 2 persons, 517.9ms
Speed: 11.8ms preprocess, 517.9ms inference, 32.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 378.3ms
Speed: 6.2ms preprocess, 378.3ms inference, 12.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 302.4ms
Speed: 0.0ms preprocess, 302.4ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 332.7ms
Speed: 7.9ms preprocess, 332.7ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 322.6ms
Speed: 0.0ms preprocess, 322.6ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 377.0ms
Speed: 0.0ms preprocess, 377.0ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 351.3ms
Speed: 4.5ms preprocess, 351.3ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 341.6ms
Speed: 0.0ms preprocess, 341.6ms inference, 10.9ms postproces

In [3]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLOv8 segmentation model
yolo_model = YOLO("yolov8n-seg.pt")  # Ensure you have the correct segmentation model

# Open webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Convert frame to RGB for YOLO processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Apply YOLO segmentation
    results = yolo_model(rgb_frame, imgsz=640)  # Increase image size for better segmentation

    # Create a blank mask with the same dimensions as the frame
    silhouette_mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint8)

    # Process YOLO segmentation results
    for result in results:
        if result.masks is not None:  
            for seg in result.masks.xy:
                pts = np.array(seg, dtype=np.int32).reshape((-1, 1, 2))
                cv2.drawContours(silhouette_mask, [pts], -1, 255, thickness=cv2.FILLED)

    # Convert mask to 3 channels to overlay on the original frame
    silhouette_colored = cv2.cvtColor(silhouette_mask, cv2.COLOR_GRAY2BGR)

    # Apply mask: Show silhouettes as a semi-transparent overlay
    alpha = 0.6  # Adjust transparency (0 = invisible, 1 = fully visible)
    overlay = cv2.addWeighted(frame, 1 - alpha, silhouette_colored, alpha, 0)

    # Display the webcam feed with silhouettes
    cv2.imshow("Real-Time Silhouette Detection", overlay)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()



0: 480x640 1 person, 385.4ms
Speed: 4.8ms preprocess, 385.4ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 410.4ms
Speed: 6.8ms preprocess, 410.4ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 359.2ms
Speed: 3.6ms preprocess, 359.2ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 337.3ms
Speed: 4.2ms preprocess, 337.3ms inference, 13.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 335.4ms
Speed: 4.4ms preprocess, 335.4ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 350.0ms
Speed: 3.1ms preprocess, 350.0ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 382.2ms
Speed: 3.6ms preprocess, 382.2ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 333.8ms
Speed: 3.7ms preprocess, 333.8ms inference, 6.6ms postprocess per image

KeyboardInterrupt: 