# **Segmentation Pre-trained YOLO MODEL**

Image segmentation with YOLO represents one of the most advanced techniques in computer vision, allowing specific objects to be identified and delimited in real time.

In [None]:
# !pip install ultralytics (Don't forget install it)

import cv2
import time
import numpy as np
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\AlexisBenitez\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


For segmentation, we'll use YOLOv11 in its "nano" version, which is the smallest model available. This choice is ideal when working with CPUs instead of GPUs, as it requires fewer computational resources.

#### You can check https://docs.ultralytics.com/es/tasks/segment/ for other models

Un aspecto importante al trabajar con segmentación en tiempo real es medir la latencia, es decir, cuánto tiempo tarda el sistema en procesar cada frame.

In [None]:
#Load the model YOLOv11 for segmentation (or have download 'yolo11n-seg.pt')


model = YOLO("yolo11n-seg")

# Define the source of video (Can be the path o index of cam )
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # To measure time of processing to calculate the latency 
    start_time = time.time()
    
    # Make the detecation and detection on the frame 
    results = model(
        frame, 
        conf=0.7, # Confidence of show over 70 %
        #classes=[0] # "0" for focuss just in person and comment if you wanna detect all categories in the COCO clases.txt of nano model
        )
    latency = (time.time() - start_time) * 1000  # miliseconds in latency

    # Acces to adress (bounding boxes)
    boxes_obj = results[0].boxes
    if boxes_obj is not None and len(boxes_obj) > 0:
        bboxes = boxes_obj.xyxy.cpu().numpy()   # [x1, y1, x2, y2]
        confs = boxes_obj.conf.cpu().numpy()      # Confidence scores (Puntajes de confianza)
        classes = boxes_obj.cls.cpu().numpy()     # Class index (Índices de clase)
        
        for i, box in enumerate(bboxes):
            x1, y1, x2, y2 = map(int, box)
            
            # Get the class name if this exist 
            class_name = model.names[int(classes[i])] if hasattr(model, 'names') else str(int(classes[i]))
            label = f'{class_name} {confs[i]:.2f}'
            # Draw bounding box and label the frame 
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # PProcess the segmetations: assing random color for every mask detected
    masks_obj = results[0].masks
    if masks_obj is not None and len(masks_obj) > 0:
        # Extract the mask: It's assume that masks_obj.data is a tensor
        masks = masks_obj.data.cpu().numpy() if hasattr(masks_obj.data, 'cpu') else masks_obj.data
        for mask in masks:
            # Become the mask to binary (umbral 0.5) and scale to 0-255
            mask_bin = (mask > 0.5).astype(np.uint8) * 255
            # Reshape the mask for get the same size in frame 
            mask_bin = cv2.resize(mask_bin, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST)
            
            # Create a boolean mask with 3 channels 
            binary_mask = cv2.threshold(mask_bin, 127, 255, cv2.THRESH_BINARY)[1]
            binary_mask_3c = cv2.merge([binary_mask, binary_mask, binary_mask])
            
            # Generate radom color  (BGR)
            random_color = np.random.randint(0, 256, size=(3,), dtype=np.uint8)
            # Create an image the same size as the frame, filled with the random color
            colored_mask = np.full((frame.shape[0], frame.shape[1], 3), random_color, dtype=np.uint8)
            
            # Combine mask with frame: In regions where the mask is 255, the random color is used
            output_frame = frame.copy()
            output_frame[binary_mask_3c == 255] = colored_mask[binary_mask_3c == 255]
            
            # Update the frame with the colored mask (keeping the natural background)
            frame = output_frame
        
        # Display the number of masks detected
        cv2.putText(frame, f'Masks: {len(masks)}', (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    
    # Display latency in the frame
    cv2.putText(frame, f'Latency: {latency:.1f}ms', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)

    # Display the processed frame in real time
    cv2.imshow("YOLOv11-Seg - Real Time segementation", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 1 person, 345.6ms
Speed: 52.2ms preprocess, 345.6ms inference, 50.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 114.4ms
Speed: 3.8ms preprocess, 114.4ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 130.2ms
Speed: 2.2ms preprocess, 130.2ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 125.3ms
Speed: 4.7ms preprocess, 125.3ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 118.4ms
Speed: 1.9ms preprocess, 118.4ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 97.8ms
Speed: 2.3ms preprocess, 97.8ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 103.7ms
Speed: 2.2ms preprocess, 103.7ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 99.3ms
Speed: 2.6ms preprocess, 99.3ms inference, 2.5ms postprocess per image at s

#### Important facts 
    - YOLO comes pre-trained to detect 80 different object categories. We can filter to show only the classes we're interested in

    - We can set a confidence threshold to show only detections with high probability (Could be 0.7 or 70% )

### Real-Time Segmentation and Heatmap Generation Integration

The implemented solution combines two powerful techniques:

- Motion detection: Using background subtraction to identify any changes between frames.
- YOLO segmentation: Applying a specific model to identify and isolate only people.

The result is a refined hitmap that exclusively shows the movement of people, eliminating false positives and providing much cleaner and more useful data for analysis.

In [None]:
# Install dependencies 
# !pip install ultralytics

# Libraries requiered 
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO


In [2]:
# video_path = "Video Useful\store-aisle-detection.mp4"
video_path = "Video Useful\park_detection.avi"

  video_path = "Video Useful\park_detection.avi"


In [None]:
# Open Video
cap = cv2.VideoCapture(video_path)

# Create background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2(
    history=500,          # Number of frames useed for build the background 
    varThreshold=16,      # Sensibility for detect changues 
    detectShadows=True,   # Sahdow Detection
    )

heatmap_refined = None

# Load model YOLOv11 for segementation
model = YOLO("yolo11n-seg")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Initialize the heatmap accumulator on the first frame
    if heatmap_refined is None:
        heatmap_refined = np.zeros(frame.shape[:2], dtype=np.float32)

    # --- Paso 1: Banckgroudn subtraction ---
    fgmask = bg_subtractor.apply(frame)
    # threshold for get a binary mask clean 
    _, fgmask = cv2.threshold(fgmask, 200, 255, cv2.THRESH_BINARY)

    # --- Step 2: Segmentation with YOLO ---
# We perform detection with segmentation on the entire frame.

    results = model(frame, verbose=False)[0]

    # Create an empty mask to accumulate the segmentations of the "person" class
    segmentation_mask = np.zeros(frame.shape[:2], dtype=np.uint8)

    if results.masks is not None:
        # Extract the masks and classes
        masks = results.masks.data.cpu().numpy() if hasattr(results.masks.data, 'cpu') else results.masks.data
        classes = results.boxes.cls.cpu().numpy() if hasattr(results.boxes.cls, 'cpu') else results.boxes.cls

        for mask, cls in zip(masks, classes):
            if int(cls) == 0:  # We filter person detections (in COCO, "person" is class 0)
                mask_bin = (mask > 0.5).astype(np.uint8) * 255
                # Resize mask_bin to the dimensions of the frame (or segmentation_mask)
                mask_bin_resized = cv2.resize(mask_bin, (segmentation_mask.shape[1], segmentation_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
                segmentation_mask = cv2.bitwise_or(segmentation_mask, mask_bin_resized)

# --- Step 3: Mask Combination ---
# An intersection is performed between the motion mask and the person segmentation mask
    refined_mask = cv2.bitwise_and(fgmask, segmentation_mask)

    # We accumulate the refined mask on the heatmap
    heatmap_refined = cv2.add(heatmap_refined, refined_mask.astype(np.float32))

    # intermedia Visualization
    cv2.imshow("Original Frame", frame)
    cv2.imshow("Movement Mask(FG)", fgmask)
    cv2.imshow("Segmentation Mask (Persons)", segmentation_mask)
    cv2.imshow("Refined Mask", refined_mask)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


### What advantages does segmentation offer in traffic analysis?

Elimination of false positives: Moving objects other than people (such as ropes, doors, etc.) are no longer detected.

Increased accuracy: The resulting hitmap shows only human movement.

Cleaner data: The visualization is clearer and easier to interpret.

More focused analysis: Allows you to focus on customer behavior without distractions.


For deploy and implement in google colab use (!wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt)

### Here We just create the refined map

In [6]:
# video_path = "Video Useful\store-aisle-detection.mp4"
video_path = "Video Useful\park_detection.avi"

  video_path = "Video Useful\park_detection.avi"


In [10]:
# Libraries requiered 
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO


In [None]:
# Open Video
cap = cv2.VideoCapture(video_path)

# Create background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2(
    history=500,          # Number of frames useed for build the background 
    varThreshold=16,      # Sensibility for detect changues 
    detectShadows=True,   # Sahdow Detection
    )

heatmap_refined = None

# Load model YOLOv11 for segementation
model = YOLO("yolo11n-seg")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Initialize the heatmap accumulator on the first frame
    if heatmap_refined is None:
        heatmap_refined = np.zeros(frame.shape[:2], dtype=np.float32)

    # --- step 1: Banckgroudn subtraction ---
    fgmask = bg_subtractor.apply(frame)
    # threshold for get a binary mask clean 
    _, fgmask = cv2.threshold(fgmask, 200, 255, cv2.THRESH_BINARY)

    # --- Step 2: Segmentation with YOLO ---
# We perform detection with segmentation on the entire frame.

    results = model(frame, verbose=False)[0]

    # Create an empty mask to accumulate the segmentations of the "person" class
    segmentation_mask = np.zeros(frame.shape[:2], dtype=np.uint8)

    if results.masks is not None:
        # Extract the masks and classes
        masks = results.masks.data.cpu().numpy() if hasattr(results.masks.data, 'cpu') else results.masks.data
        classes = results.boxes.cls.cpu().numpy() if hasattr(results.boxes.cls, 'cpu') else results.boxes.cls

        for mask, cls in zip(masks, classes):
            if int(cls) == 0:  # We filter person detections (in COCO, "person" is class 0)
                mask_bin = (mask > 0.5).astype(np.uint8) * 255
                # Resize mask_bin to the dimensions of the frame (or segmentation_mask)
                mask_bin_resized = cv2.resize(mask_bin, (segmentation_mask.shape[1], segmentation_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
                segmentation_mask = cv2.bitwise_or(segmentation_mask, mask_bin_resized)

# --- Step 3: Mask Combination ---
# An intersection is performed between the motion mask and the person segmentation mask
    refined_mask = cv2.bitwise_and(fgmask, segmentation_mask)

    # We accumulate the refined mask on the heatmap
    heatmap_refined = cv2.add(heatmap_refined, refined_mask.astype(np.float32))

# Show the heat map refined
plt.figure(figsize=(8,6))
plt.imshow(heatmap_refined, cmap="hot")
plt.title("Heatmap refined with YOLO segementation")
plt.axis("off")
plt.show()

In [None]:
# Normalize the heatmap to range 0-255
heatmap_norm = cv2.normalize(heatmap_refined, None, 0, 255, cv2.NORM_MINMAX)
heatmap_norm = np.uint8(heatmap_norm)

# add a colormap (example, COLORMAP_VIRIDIS)
colored_heatmap = cv2.applyColorMap(heatmap_norm, cv2.COLORMAP_VIRIDIS)

# Visualization with matplotlib
plt.figure(figsize=(10,8))
plt.imshow(cv2.cvtColor(colored_heatmap, cv2.COLOR_BGR2RGB))
plt.title("Normalized Heatmap")
plt.axis("off")
plt.show()