In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.62-py3-none-any.whl (825 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.2/825.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-c

In [6]:
from collections import defaultdict, Counter
import cv2
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import colors

track_history = defaultdict(lambda: [])

model = YOLO("yolov8n-seg.pt")  # segmentation model
cap = cv2.VideoCapture("/content/drive/MyDrive/yolo/race.mp4")
# Get original video properties
original_fps = int(cap.get(cv2.CAP_PROP_FPS))
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

out_w = w
out_h = h
bar_width = out_w // 4
seg_width = out_w - bar_width
desired_fps = 10
out = cv2.VideoWriter("3instance-segmentation-object-tracking.avi", cv2.VideoWriter_fourcc(*"MJPG"), desired_fps, (out_w, out_h))

# Define the classes of interest
classes_of_interest = {'horse': 17, 'person': 0, 'car': 2, 'van': 8, 'bus': 5, 'tree': 62}
unique_colors = {}  # To store unique colors for each track_id

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Prepare the segmentation output
    im0_seg = im0.copy()  # Make a copy for segmented output
    counts = Counter()

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, class_id, track_id in zip(masks, class_ids, track_ids):
            if class_id in classes_of_interest.values():
                # Get the class name
                class_name = [name for name, id in classes_of_interest.items() if id == class_id][0]

                # Increment the count for the detected class
                counts[class_name] += 1

                # Assign a unique color for each track_id if not already assigned
                if track_id not in unique_colors:
                    unique_colors[track_id] = colors(int(track_id), True)

                # Get the color for the specific object
                color = unique_colors[track_id]

                # Convert mask to a binary image
                mask_image = np.zeros((h, w), dtype=np.uint8)
                mask_image = cv2.drawContours(mask_image, [mask.astype(np.int32)], -1, 255, thickness=cv2.FILLED)

                # Create a colored mask
                colored_mask = np.zeros_like(im0)
                colored_mask[mask_image == 255] = color

                # Blend the colored mask with the original image
                im0_seg = cv2.addWeighted(im0_seg, 1.0, colored_mask, 0.5, 0)

    # Create a bar graph for displaying counts
    counts_img = np.zeros((h, bar_width, 3), dtype=np.uint8)
    max_count = max(counts.values(), default=1)  # Avoid division by zero
    y_offset = 30

    for i, (class_name, count) in enumerate(counts.items()):
        bar_height = int((count / max_count) * (h - 60))  # Normalize bar height
        cv2.rectangle(counts_img, (10, h - 30 - bar_height), (bar_width - 10, h - 30), colors(i, True), thickness=cv2.FILLED)
        cv2.putText(counts_img, f"{class_name}: {count}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
        y_offset += 40

    # Resize segmented video to fit 3/4 of the width
    im0_seg_resized = cv2.resize(im0_seg, (seg_width, h))

    # Combine counts image with the resized segmented video
    combined_img = np.hstack((counts_img, im0_seg_resized))

    out.write(combined_img)
    # cv2.imshow("instance-segmentation-object-tracking", combined_img)
    if cv2.waitKey(int(1000 / desired_fps)) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 2 horses, 220.8ms
Speed: 3.7ms preprocess, 220.8ms inference, 53.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 horses, 180.9ms
Speed: 2.5ms preprocess, 180.9ms inference, 43.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 horses, 185.3ms
Speed: 3.8ms preprocess, 185.3ms inference, 41.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 horses, 181.4ms
Speed: 2.5ms preprocess, 181.4ms inference, 44.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 horses, 184.5ms
Speed: 2.5ms preprocess, 184.5ms inference, 40.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 horses, 205.3ms
Speed: 3.4ms preprocess, 205.3ms inference, 39.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 horses, 176.3ms
Speed: 2.5ms preprocess, 176.3ms inference, 42.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 h

KeyboardInterrupt: 

# july 9 demo

In [None]:
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator
from ultralytics.solutions import speed_estimation

# Initialize YOLO models
object_detection_model = YOLO("yolov8s.pt")
speed_estimation_model = YOLO("yolov8n.pt")
names = speed_estimation_model.model.names

# Open video file
cap = cv2.VideoCapture("/content/drive/MyDrive/yolo/h1.mp4")
assert cap.isOpened(), "Error reading video file"

# Get video properties
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize video writer
out = cv2.VideoWriter("Distribution_speed_distance_visual_scatter_unique1hor_car.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w + w // 3, h))

frame_count = 0
data = {}
labels = []
class_counts_over_time = {}
speed_over_time = {}
distance_over_time = {}

# Center point and pixel per meter for distance calculation
center_point = (0, h)
pixel_per_meter = 10

# Line points for speed estimation
line_pts = [(0, 360), (1280, 360)]

# Initialize speed-estimation object
speed_obj = speed_estimation.SpeedEstimator(names=names, reg_pts=line_pts, view_img=False)

# Colors for text and bounding box
txt_color, txt_background, bbox_clr = ((0, 0, 0), (255, 255, 255), (255, 0, 255))

def create_pie_chart(data):
    fig, ax = plt.subplots(figsize=(4, 3))  # Aspect ratio of 4:3
    ax.pie(data.values(), labels=data.keys(), autopct='%1.1f%%')
    ax.legend()
    ax.set_title("Total Percentage of Individual Class Perspective")
    plt.close(fig)
    return fig

def create_bar_plot(data):
    fig, ax = plt.subplots(figsize=(4, 3))  # Aspect ratio of 4:3
    colors = plt.cm.get_cmap('tab10', len(data))  # Use 'tab20' colormap
    ax.bar(data.keys(), data.values(), color=[colors(i) for i in range(len(data))])
    ax.legend()
    ax.set_title("Distribution of Each Class")
    ax.set_xlabel("Class")
    ax.set_ylabel("Count")
    plt.close(fig)
    return fig

def create_multiple_line_plot(speed_data, distance_data, frame_count):
    fig, ax = plt.subplots(figsize=(4, 3))  # Aspect ratio of 4:3
    for track_id in speed_data.keys():
        ax.plot(range(frame_count), speed_data[track_id], label=f"Speed {track_id}")
    for track_id in distance_data.keys():
        ax.plot(range(frame_count), distance_data[track_id], label=f"Distance {track_id}")
    ax.legend()
    ax.set_title("Speed and Distance Identification of Each Class")
    ax.set_xlabel("Frame Count")
    ax.set_ylabel("Value")
    plt.close(fig)
    return fig

def create_scatter_plot(data):
    fig, ax = plt.subplots(figsize=(4, 3))  # Aspect ratio of 4:3
    x = list(data.keys())
    y = list(data.values())
    ax.scatter(x, y)
    ax.set_title("Class Distribution Scatter Plot")
    ax.set_xlabel("Class")
    ax.set_ylabel("Count")
    plt.close(fig)
    return fig

def fig_to_img(fig):
    fig.canvas.draw()
    img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    return img

def resize_and_place_image(base_image, overlay_image, position):
    overlay_image_resized = cv2.resize(overlay_image, (w // 3, h // 3))
    x, y = position
    base_image[y:y + overlay_image_resized.shape[0], x:x + overlay_image_resized.shape[1]] = overlay_image_resized
    return base_image

def draw_visualizations(frame, data, labels, speed_data, distance_data, frame_count):
    vis_frame = np.zeros((h, w // 3, 3), dtype=np.uint8)

    # Create Pie Chart
    if data:
        pie_chart = create_pie_chart(data)
        pie_chart_img = fig_to_img(pie_chart)
        vis_frame = resize_and_place_image(vis_frame, pie_chart_img, (0, 0))

    # Create Bar Plot
    if data:
        bar_plot = create_bar_plot(data)
        bar_plot_img = fig_to_img(bar_plot)
        vis_frame = resize_and_place_image(vis_frame, bar_plot_img, (0, h // 3))

    # Create Multiple Line Plot
    if speed_data or distance_data:
        line_plot = create_multiple_line_plot(speed_data, distance_data, frame_count)
        line_plot_img = fig_to_img(line_plot)
        vis_frame = resize_and_place_image(vis_frame, line_plot_img, (0, 2 * (h // 3)))

    combined_frame = np.hstack((frame, vis_frame))
    return combined_frame

def pad_lists_to_length(data_dict, length, default_value=0):
    for key in data_dict.keys():
        if len(data_dict[key]) < length:
            data_dict[key] += [default_value] * (length - len(data_dict[key]))

# Define the desired classes and their corresponding IDs
#desired_classes = {'person': 0, 'car': 2, 'horse': 17}
desired_classes = {'horse':17, 'person': 0,'car': 2, 'van': 8,'bus': 5,'tree': 62}

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    frame_count += 1

    # Object detection for speed estimation
    speed_tracks = speed_estimation_model.track(frame, persist=True, show=False)
    frame = speed_obj.estimate_speed(frame, speed_tracks)

    # Object detection for distance estimation
    annotator = Annotator(frame, line_width=2)
    results = object_detection_model.track(frame, persist=True)

    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        clss = results[0].boxes.cls.cpu().tolist()

        for box, track_id, cls in zip(boxes, track_ids, clss):
            cls_name = object_detection_model.names[int(cls)]
            if cls_name in desired_classes and desired_classes[cls_name] == cls:  # Filter desired classes and IDs
                if cls_name not in labels:
                    labels.append(cls_name)

                if cls_name in data:
                    data[cls_name] += 1
                else:
                    data[cls_name] = 1

                annotator.box_label(box, label=str(track_id), color=bbox_clr)
                annotator.visioneye(box, center_point)

                x1, y1 = int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)  # Bounding box centroid

                distance = (math.sqrt((x1 - center_point[0]) ** 2 + (y1 - center_point[1]) ** 2)) / pixel_per_meter

                text_size, _ = cv2.getTextSize(f"Distance: {distance:.2f} m", cv2.FONT_HERSHEY_SIMPLEX, 1.2, 3)
                cv2.rectangle(frame, (x1, y1 - text_size[1] - 10), (x1 + text_size[0] + 10, y1), txt_background, -1)
                cv2.putText(frame, f"Distance: {distance:.2f} m", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.2, txt_color, 3)

                if track_id not in distance_over_time:
                    distance_over_time[track_id] = [0] * (frame_count - 1)
                distance_over_time[track_id].append(distance)

                speed = speed_obj.speeds.get(track_id, 0) if hasattr(speed_obj, 'speeds') else 0
                if track_id not in speed_over_time:
                    speed_over_time[track_id] = [0] * (frame_count - 1)
                speed_over_time[track_id].append(speed)

    # Pad lists to current frame count to ensure equal lengths
    pad_lists_to_length(distance_over_time, frame_count)
    pad_lists_to_length(speed_over_time, frame_count)

    # Draw combined visualizations on the frame
    combined_frame = draw_visualizations(frame, data, labels, speed_over_time, distance_over_time, frame_count)

    # Write the frame with visualizations
    out.write(combined_frame)

    # Clear counts for next frame
    data = {}

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Generate and overlay scatter plot on the final frame
final_frame = np.zeros((h, w, 3), dtype=np.uint8)
scatter_plot = create_scatter_plot(class_counts_over_time)
scatter_plot_img = fig_to_img(scatter_plot)
final_frame = resize_and_place_image(final_frame, scatter_plot_img, (0, 0))


# Save the final frame with the scatter plot
cv2.imwrite("final_frame_with_scatter_plot.png", final_frame)

cap.release()
out.release()
cv2.destroyAllWindows()
