# Track all ojects using bounding box in video 

In [None]:
from ultralytics import YOLO

# Load a COCO-pretrained YOLO11n model
model = YOLO("yolo11n.pt")

# Run inference with the YOLO11n model on the 'bus.jpg' image
results = model.track("video_001.mp4", save=True, show=True, tracker='bytetrack.yaml')

# Track all ojects using segmentation in video 

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11x-seg.pt")  # load an official model
# model = YOLO("path/to/best.pt")  # load a custom model

# Predict with the model
results = model.track(source="video_001.mp4", save=True)  # predict on an image

# Track all objeects using segmentation once image at a time

In [None]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

filename = 'video_001'
model = YOLO("yolo11x-seg.pt")  # segmentation model
cap = cv2.VideoCapture(f'{filename}.mp4')
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
out = cv2.VideoWriter(f'{filename}_x_instance-segmentation-object-tracking.mp4', cv2.VideoWriter_fourcc(*"H265"), fps, (w, h))

idx_frame = 0
while True:
    ret, im0 = cap.read()
    idx_frame += 1
    if not ret or idx_frame == -1:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=2)

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()
        track_cls = [results[0].names[i] for i in results[0].boxes.cls.int().cpu().tolist()]

        #for mask, track_id in zip(masks, track_ids):
        for mask, track_cl in zip(masks, track_cls):
            #color = colors(int(track_id), True)
            color = colors(0, True)
            txt_color = annotator.get_txt_color(color)
            #annotator.seg_bbox(mask=mask, mask_color=color, label=str(track_id), txt_color=txt_color)
            annotator.seg_bbox(mask=mask, mask_color=color, label=str(track_cl), txt_color=txt_color)

    out.write(im0)
    cv2.imshow(f'{filename}video_002_trimmed_instance-segmentation-object-tracking', im0)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

# Track specific object using segmentation one image at a time

In [1]:
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
from queue import Queue
import numpy as np

class ObjectTracker:
    def __init__(self, model_path="yolo11x-seg.pt", track_class="car"):
        """
        Initialize the ObjectTracker with the specified model and class to track.

        :param model_path: Path to the YOLO model.
        :param track_class: Name of the class to track (default is 'car').
        """
        self.model = YOLO(model_path)
        self.track_class = track_class

    def process_image(self, image):
        """
        Process an image to detect and segment the specified object class.

        :param image: The input image.
        :return: A tuple containing the original image and a list of tracking results, where each result includes the object ID, the masked image, and the top-left coordinate.
        """
        results = self.model.track(image, persist=True)
        output_data = []

        if results[0].boxes.id is not None and results[0].masks is not None:
            masks = results[0].masks.xy
            track_ids = results[0].boxes.id.int().cpu().tolist()
            track_cls = [results[0].names[i] for i in results[0].boxes.cls.int().cpu().tolist()]

            for mask, obj_id, track_cl in zip(masks, track_ids, track_cls):
                if track_cl == self.track_class:
                    # Apply the mask to the original image
                    masked_image = self.apply_mask(image, mask)
                    output_data.append({
                        "object_id": obj_id,
                        "masked_image": masked_image,
                        "top_left": (0, 0)  # Always (0, 0) as we keep the original size
                    })
        return output_data

    def apply_mask(self, image, mask):
        """
        Apply the mask to the image while keeping the original size, 
        setting everything outside the mask to black.

        :param image: The original image.
        :param mask: The mask coordinates.
        :return: The masked image with the same size as the original.
        """
        # Create a blank mask of the same size as the image
        mask_image = np.zeros(image.shape[:2], dtype=np.uint8)

        # Draw the polygon mask on the blank image
        cv2.fillPoly(mask_image, [np.array(mask, dtype=np.int32)], 255)

        # Apply the mask to the original image
        masked_image = cv2.bitwise_and(image, image, mask=mask_image)

        return masked_image


In [None]:
import sys
import os
import cv2
# sys.path.append(os.path.abspath('../Live-Pose-Estimator'))
# from pose_estimator import pose_estimator

FILENAME = 'video_002'
TRACK_CLASS = 'car'

cap = cv2.VideoCapture(f'{FILENAME}.mp4')
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

tracker = ObjectTracker(track_class="car")  # Initialize tracker for cars

'''
# Principal point (assuming the center of the image)
cx, cy = w / 2, h / 2
# Convert FOV from degrees to radians
fov_x, fov_y = 70, 50
fov_x_rad, fov_y_rad = np.deg2rad(fov_x), np.deg2rad(fov_y)
# Focal lengths in pixels
fx, fy = w / (2 * np.tan(fov_x_rad / 2)), h / (2 * np.tan(fov_y_rad / 2))
# Intrinsic matrix
K = np.array([[fx, 0, cx],
                [0, fy, cy],
                [0, 0, 1]])

pose = pose_estimator(K, max_feature=10000, print_messages=True, size_output_img=[3000, 2400])'''

idx_frame = 0
while True:
    ret, image = cap.read()
    idx_frame += 1
    if not ret or idx_frame == -1:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Retrieve and process results if available
    tracked_objects = tracker.process_image(image)
    if tracked_objects:
        # Use only the first detected car's masked image
        first_car = tracked_objects[0]
        masked_image = first_car["masked_image"]

        '''ret, R, t, match_img = pose.compute_pose(masked_image)
        if ret:
            cv2.imshow('Matched image', match_img)'''

        cv2.imshow('Masked Car', masked_image)
        
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()