## 1. Define Data Structures and Helper Function

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
# Dictionary of available OpenCV trackers
tracker_dict = {
    "BOOSTING": cv2.legacy.TrackerBoosting_create(),
    "CSRT": cv2.legacy.TrackerCSRT_create(),
    "KCF": cv2.legacy.TrackerKCF_create(),
    "MEDIANFLOW": cv2.legacy.TrackerMedianFlow_create(),
    "MIL": cv2.legacy.TrackerMIL_create(),
    "MOSSE": cv2.legacy.TrackerMOSSE_create(),
    "TLD": cv2.legacy.TrackerTLD_create(),
}

# Dictionary for common video resolutions
resolution_dict = {
    "360p": (480, 360),
    "480p": (858, 480),
    "720p": (1280, 720),
    "1080p": (1920, 1080),
}

class VideoSpec:
    """Class to hold video specifications."""
    def __init__(self, video_filename, resolution, bbox):
        self.video_filename = video_filename
        self.res = resolution
        self.bbox = bbox

def draw_bounding_box(frame, bbox, ok, color=(0, 255, 255), thickness=2):
    """Draws bounding box on the given frame."""
    if ok:
        p1 = (int(bbox[0]), int(bbox[1]))
        p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
        cv2.rectangle(frame, p1, p2, color, thickness)
    else:
        cv2.putText(frame, "Tracking failure detected", (10, 80),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)

def draw_banner_text(frame, text, banner_height_percent=0.08,
                     font_scale=1.5, font_thickness=2, text_alignment="center", text_color=(0, 255, 0)):
    """Draws a banner at the top of the frame and overlays text."""

    # Determine banner height as a percentage of frame height
    banner_height = int(banner_height_percent * frame.shape[0])

    # Draw black rectangle banner
    cv2.rectangle(frame, (0, 0), (frame.shape[1], banner_height), (0, 0, 0), thickness=-1)

    # Get frame width
    width = frame.shape[0]

    # Define text alignment positions
    alignment_dict = {
        "left": width // 4,
        "center": width // 2,
        "right": width * 3 // 4
    }
    left_offset = alignment_dict[text_alignment]
    location = [left_offset, banner_height - 10]
    cv2.putText(frame, text, location, cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness, cv2.LINE_AA)
def draw_text(frame, text, location=(20, 20), font_scale=1, color=(50, 170, 50), font_thickness=2):
    """
    Draws text on the given frame at a specified location.

    :param frame: Image frame
    :param text: Text to display
    :param location: (x, y) position for the text
    :param font_scale: Font size
    :param color: Text color in BGR format
    :param font_thickness: Thickness of the text
    """
    cv2.putText(frame, text, location, cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, font_thickness, cv2.LINE_AA)

def get_trackers(tracker_names, tracker_dict):
    """
    Creates a list of tracker objects based on given tracker names.

    :param tracker_names: List of tracker names
    :param tracker_dict: Dictionary mapping tracker names to tracker objects
    :return: List of tracker objects
    """
    tracker_objects = [tracker_dict[tracker] for tracker in tracker_names]
    return tracker_objects

def initialize_trackers(tracker_objects, frame, bbox):
    """
    Initializes all trackers with the given frame and bounding box.

    :param tracker_objects: List of tracker objects
    :param frame: Initial video frame
    :param bbox: Bounding box coordinates
    """
    for tracker in tracker_objects:
        tracker.init(frame, bbox)

def get_tracker_results(tracker_objects, frame, tracker_names):
    """
    Updates all trackers and returns the tracking results.

    :param tracker_objects: List of tracker objects
    :param frame: Current video frame
    :param tracker_names: List of tracker names
    :return: List of tracking results for each tracker
    """
    n = len(tracker_objects)
    init_frames_list = [frame.copy() for _ in range(n)]
    final_frames_list = []

    for i in range(n):
        ok, result = update_tracker(tracker_objects[i], init_frames_list[i])
        final_frames_list.append(result)

    return final_frames_list

def update_tracker(tracker, frame):
    """
    Updates a single tracker and returns the updated bounding box.

    :param tracker: Tracker object
    :param frame: Current video frame
    :return: Tuple (ok, bbox) where ok is True if tracking was successful
    """
    timer = cv2.getTickCount()
    ok, bbox = tracker.update(frame)

    # Calculate Frames per Second (FPS)
    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
    #Draw bbox
    draw_bounding_box(frame, bbox, ok)
    #Display tracker type on frame
    draw_banner_text(frame, f"Tracker: {tracker.__class__.__name__}")
    # Display FPS on frame
    draw_text(frame, f"FPS: {fps:.2f}")
    return ok, frame


In [None]:
def get_output_video_dims(tracker_names, resolution_specs):
    """
    Computes the output video dimensions based on the number of trackers and resolution specs.

    :param tracker_names: List of tracker names
    :param resolution_specs: Tuple (width, height) of the base resolution
    :return: Tuple (output_width, output_height)
    """
    width, height = resolution_specs
    n = len(tracker_names)

    # Determine the optimal grid size for arranging frames
    cols = min(4, n)  # Maximum 4 columns
    rows = (n + cols - 1) // cols  # Compute rows needed

    return width * cols, height * rows

def align_frames(frames_list):
    """
    Aligns multiple frames in a grid layout based on the number of frames.

    :param frames_list: List of frames to be arranged
    :return: Single combined frame
    """
    n = len(frames_list)

    if n == 1:
        return frames_list[0]

    cols = min(4, n)  # Maximum 4 columns
    rows = (n + cols - 1) // cols  # Compute rows needed

    # Pad frames list if necessary to make it a full grid
    while len(frames_list) < rows * cols:
        frames_list.append(np.zeros_like(frames_list[0]))  # Black frame as filler

    # Stack frames row-wise
    row_frames = [np.hstack(frames_list[i * cols:(i + 1) * cols]) for i in range(rows)]

    return np.vstack(row_frames)

## 2. Define the Main Controller for Tracking

In [None]:
def run_tracker(tracker_names, video_spec, video_output_file_name):
    """
    Runs object tracking on a video using specified tracking algorithms.

    :param tracker_names: List of tracker names
    :param video_spec: VideoSpec object containing video file name, resolution, and bbox
    :param video_output_file_name: Name of the output video file
    """

    # Create video capture object
    video_cap = cv2.VideoCapture(video_spec.video_filename)

    # Confirm video file can be opened
    if not video_cap.isOpened():
        print("Error: Could not open video file.")
        sys.exit()

    # Retrieve video properties
    width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    # Set up video writer for output
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps_write = fps  # Can be adjusted if needed

    resolution_specs = resolution_dict.get(video_spec.res)
    output_video_dim = get_output_video_dims(tracker_names, resolution_specs)

    video_out = cv2.VideoWriter(video_output_file_name, fourcc, fps_write, output_video_dim)

    # Read first frame
    ok, frame = video_cap.read()
    if not ok:
        print("Error: Cannot read video file.")
        sys.exit()

    # Resize the frame to match the specified resolution
    frame = cv2.resize(frame, resolution_specs, interpolation=cv2.INTER_AREA)

    # Initialize tracker objects
    tracker_objects = get_trackers(tracker_names, tracker_dict)
    initialize_trackers(tracker_objects, frame, video_spec.bbox)

    # Process video frames
    while True:
    # Read a new frame from the video
        ok, frame = video_cap.read()
        if not ok:
            break  # Exit loop if no more frames are available

        # Resize the frame to match the desired resolution
        frame = cv2.resize(frame, resolution_specs, interpolation=cv2.INTER_AREA)

        # Get tracking results for all trackers
        tracked_frames = get_tracker_results(tracker_objects, frame, tracker_names)

        # Align and merge tracked frames into a final multi-view layout
        output_frame = align_frames(tracked_frames)

        # Write the processed frame to the output video
        video_out.write(output_frame)

    # Release resources when finished
    video_cap.release()
    video_out.release()

## 3. Input specification and Execution

In [None]:
# Define input video file
input_video = "/content/5991156-hd_1280_720_30fps.mp4"

# Define output video file path
video_output_prefix = "test_1x2"
video_output_file_name = f"tracking_analysis_output_videos/{video_output_prefix}.mp4"

# List of tracking algorithms to use
trackers = ["BOOSTING", "CSRT"]

# Create a video specification object (resolution, bounding box)
video_obj = VideoSpec(input_video, "480p", (370, 225, 180, 80))

# Run the tracking process
run_tracker(trackers, video_obj, video_output_file_name)

Display the tracking results: 1x2

In [None]:
from moviepy.editor import *

clip = VideoFileClip('tracking_analysis_output_videos/test_1x2.mp4')
clip.ipython_display(width=1000)