In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.1-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.8-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.1-py3-none-any.whl (881 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m881.3/881.3 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.8-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.1 ultralytics-thop-2.0.8


In [None]:
from ultralytics import YOLO
import cv2

# Load the YOLOv8 nano segmentation model
model = YOLO("yolo11n.pt")

# Define input and output video paths
input_video_path = "/content/drive/MyDrive/yolo/car.mp4"
output_video_path = "output_video.mp4"

# Open the video file using OpenCV
cap = cv2.VideoCapture(input_video_path)

# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the video writer to save the output
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO segmentation model on the current frame
    results = model(frame)

    # Extract the segmented frame
    segmented_frame = results[0].plot()  # Get the plotted frame with segmentation masks

    # Write the segmented frame to the output video
    out.write(segmented_frame)

# Release video capture and writer
cap.release()
out.release()

print(f"Video saved at {output_video_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 1.8ms preprocess, 8.9ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 8.8ms
Speed: 2.0ms preprocess, 8.8ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 9.5ms
Speed: 1.8ms preprocess, 9.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 8.9ms
Speed: 1.8ms preprocess, 8.9ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 8.4ms
Speed: 2.2ms preprocess, 8.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 8.4ms
Speed: 1.5ms preprocess, 8.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 8.3ms
Speed: 2.3ms preprocess, 8.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 banana, 8.9ms
Speed: 1.9ms preprocess, 8.9ms inference, 1.2ms postprocess per image at shape

In [None]:
from collections import defaultdict, Counter
import cv2
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import colors

track_history = defaultdict(lambda: [])

model = YOLO("/content/yolo11n-seg.pt")  # segmentation model
cap = cv2.VideoCapture("/content/drive/MyDrive/yolo/race.mp4")
# Get original video properties
original_fps = int(cap.get(cv2.CAP_PROP_FPS))
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

out_w = w
out_h = h
bar_width = out_w // 4
seg_width = out_w - bar_width
desired_fps = 10
out = cv2.VideoWriter("3instance-segmentation-object-tracking.avi", cv2.VideoWriter_fourcc(*"MJPG"), desired_fps, (out_w, out_h))

# Define the classes of interest
classes_of_interest = {'horse': 17, 'person': 0, 'car': 2, 'van': 8, 'bus': 5, 'tree': 62}
unique_colors = {}  # To store unique colors for each track_id

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Prepare the segmentation output
    im0_seg = im0.copy()  # Make a copy for segmented output
    counts = Counter()

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, class_id, track_id in zip(masks, class_ids, track_ids):
            if class_id in classes_of_interest.values():
                # Get the class name
                class_name = [name for name, id in classes_of_interest.items() if id == class_id][0]

                # Increment the count for the detected class
                counts[class_name] += 1

                # Assign a unique color for each track_id if not already assigned
                if track_id not in unique_colors:
                    unique_colors[track_id] = colors(int(track_id), True)

                # Get the color for the specific object
                color = unique_colors[track_id]

                # Convert mask to a binary image
                mask_image = np.zeros((h, w), dtype=np.uint8)
                mask_image = cv2.drawContours(mask_image, [mask.astype(np.int32)], -1, 255, thickness=cv2.FILLED)

                # Create a colored mask
                colored_mask = np.zeros_like(im0)
                colored_mask[mask_image == 255] = color

                # Blend the colored mask with the original image
                im0_seg = cv2.addWeighted(im0_seg, 1.0, colored_mask, 0.5, 0)

    # Create a bar graph for displaying counts
    counts_img = np.zeros((h, bar_width, 3), dtype=np.uint8)
    max_count = max(counts.values(), default=1)  # Avoid division by zero
    y_offset = 30

    for i, (class_name, count) in enumerate(counts.items()):
        bar_height = int((count / max_count) * (h - 60))  # Normalize bar height
        cv2.rectangle(counts_img, (10, h - 30 - bar_height), (bar_width - 10, h - 30), colors(i, True), thickness=cv2.FILLED)
        cv2.putText(counts_img, f"{class_name}: {count}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
        y_offset += 40

    # Resize segmented video to fit 3/4 of the width
    im0_seg_resized = cv2.resize(im0_seg, (seg_width, h))

    # Combine counts image with the resized segmented video
    combined_img = np.hstack((counts_img, im0_seg_resized))

    out.write(combined_img)
    # cv2.imshow("instance-segmentation-object-tracking", combined_img)
    if cv2.waitKey(int(1000 / desired_fps)) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()


0: 384x640 2 persons, 4 horses, 12.4ms
Speed: 2.8ms preprocess, 12.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 10.2ms
Speed: 2.1ms preprocess, 10.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 11.3ms
Speed: 2.0ms preprocess, 11.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 9.8ms
Speed: 1.7ms preprocess, 9.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 10.0ms
Speed: 1.8ms preprocess, 10.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 9.8ms
Speed: 1.7ms preprocess, 9.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 10.3ms
Speed: 3.0ms preprocess, 10.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 horses, 10.7ms
Speed