<a href="https://colab.research.google.com/github/akin-oladejo/scenes/blob/main/instance_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
import time
from ultralytics import YOLO

# --- Configuration ---
# You can change this path to your own video file (e.g., 'my_test_video.mp4').
# Use '0' to capture from the default webcam.
VIDEO_SOURCE = '0'
MODEL_PATH = 'yolov8n-seg.pt'  # Smallest and fastest model for real-time
CONFIDENCE_THRESHOLD = 0.4    # Minimum confidence to consider a detection
OUTPUT_FILENAME = 'segmented_output.mp4'

def get_person_masks(frame_source=VIDEO_SOURCE, model_path=MODEL_PATH):
    """
    Main function to run YOLOv8-Seg instance segmentation on a video stream or file.

    This implementation manually extracts and colors the segmentation masks
    for the 'person' class (class ID 0) to ensure clear, distinct visualization.
    """

    # --- Task 1: YOLOv8 Model Initialization ---
    try:
        model = YOLO(model_path)
    except Exception as e:
        print(f"Error loading model: {e}")
        print("Please ensure the model file path is correct and ultralytics is installed.")
        return

    # --- Task 2: Basic Video Processing Pipeline Setup ---
    # Convert '0' (string) to 0 (int) for webcam
    source = int(frame_source) if frame_source.isdigit() else frame_source
    cap = cv2.VideoCapture(source)

    if not cap.isOpened():
        print(f"Error: Could not open video source {frame_source}.")
        return

    # Get frame properties for video writing
    W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    FPS = cap.get(cv2.CAP_PROP_FPS)

    # --- Task 5: Initialize Video Writer ---
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
    writer = cv2.VideoWriter(OUTPUT_FILENAME, fourcc, FPS, (W, H))

    # Variables for FPS calculation
    frame_count = 0
    start_time = time.time()

    print(f"--- Starting Segmentation ({'Webcam' if source == 0 else source}) ---")
    print(f"Outputting to: {OUTPUT_FILENAME}")

    # --- Main Processing Loop ---
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        frame_count += 1

        # --- Task 3: YOLOv8 Inference ---
        # Run prediction, only targeting class 0 (person)
        # We run it on the CPU by default, change 'cpu' to 'cuda' if you have GPU setup
        results = model(frame, conf=CONFIDENCE_THRESHOLD, classes=[0], verbose=False, device='cpu')
        res = results[0]

        # Initialize a blank color overlay for blending
        # We need a copy of the frame as a float array to handle blending properly
        processed_frame = frame.astype(np.float32)

        # --- Task 4: Mask Extraction and Processing ---
        if res.masks is not None:

            # The Ultralytics mask data is a tensor of low-resolution masks
            masks_data = res.masks.data.cpu().numpy()

            # Iterate through each detected mask (which are all people due to classes=[0])
            for i, mask_tensor in enumerate(masks_data):

                # 1. Resize mask to original frame dimensions (H, W)
                mask = cv2.resize(
                    mask_tensor,
                    (W, H),
                    interpolation=cv2.INTER_NEAREST
                ).astype(bool)

                # 2. Create a unique color for the current person
                # We use fixed, bright colors for good distinction.
                # In a real application, you might use a HASH function on person ID for consistency.
                color_map = [
                    (0, 255, 255),  # Cyan
                    (255, 0, 255),  # Magenta
                    (255, 255, 0),  # Yellow
                    (0, 255, 0),    # Green
                    (0, 0, 255),    # Blue
                    (255, 0, 0),    # Red
                ]
                # Cycle through colors based on the index
                person_color = color_map[i % len(color_map)]

                # 3. Create a color overlay (a colored patch where the mask is True)
                color_patch = np.zeros_like(frame, dtype=np.uint8)
                color_patch[mask] = person_color

                # 4. Blend the color patch with the original frame for a smooth, visible mask
                # We use a 40% opacity for the color and 60% for the original frame
                processed_frame[mask] = cv2.addWeighted(
                    processed_frame[mask],
                    0.6,
                    color_patch[mask].astype(np.float32),
                    0.4,
                    0
                )

        # Convert back to uint8 for visualization
        processed_frame_uint8 = processed_frame.astype(np.uint8)

        # --- Task 5: Visualization and FPS Calculation ---

        # Calculate FPS every 10 frames
        if frame_count % 10 == 0:
            end_time = time.time()
            elapsed_time = end_time - start_time
            current_fps = frame_count / elapsed_time
            fps_text = f"FPS: {current_fps:.2f}"

            # Display FPS on the frame
            cv2.putText(processed_frame_uint8, fps_text, (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Display the processed frame
        cv2.imshow('YOLOv8 Instance Segmentation', processed_frame_uint8)

        # Write the processed frame to the output file
        writer.write(processed_frame_uint8)

        # Exit condition: press 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # --- Cleanup ---
    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"\n--- Segmentation Finished ---")
    print(f"Output saved to: {OUTPUT_FILENAME}")

if __name__ == "__main__":
    get_person_masks()