## Setting Up Your Python Environment

In [1]:
# %%capture
# # Install additional dependencies
# !pip install pandas pillow opencv-python

# # Install ONNX packages
# !pip install onnx onnxruntime onnx-simplifier

# # Install utility packages
# !pip install cjm_psl_utils cjm_pil_utils cjm_byte_track

In [2]:
# !pip install -U cjm_byte_track

## Importing the Required Dependencies

In [3]:
# Import Python Standard Library dependencies
from dataclasses import dataclass
import json
from pathlib import Path
import random
import time
from typing import List

# Import ByteTrack package
from cjm_byte_track.byte_tracker import BYTETracker
from cjm_byte_track.matching import match_detections_with_tracks

# Import utility functions
from cjm_psl_utils.core import download_file
from cjm_pil_utils.core import resize_img

# Import OpenCV
import cv2

# Class for displaying videos in Jupyter notebooks
from IPython.display import Video

# Import numpy
import numpy as np

# Import the pandas package
import pandas as pd

# Import PIL for image manipulation
from PIL import Image, ImageDraw, ImageFont

# Import ONNX dependencies
import onnx # Import the onnx module
import onnxruntime as ort # Import the ONNX Runtime

# Import tqdm for progress bar
from tqdm.auto import tqdm

## Setting Up the Project

### Set the Directory Paths

In [4]:
# The name for the project
project_name = f"pytorch-yolox-object-detector"

# The path for the project folder
project_dir = Path(f"./{project_name}/")

# Create the project directory if it does not already exist
project_dir.mkdir(parents=True, exist_ok=True)

# The path to the checkpoint folder
# checkpoint_dir = Path(project_dir/f"2023-08-17_16-14-43")
checkpoint_dir = Path(project_dir/f"pretrained-coco")

pd.Series({
    "Project Directory:": project_dir,
    "Checkpoint Directory:": checkpoint_dir,
}).to_frame().style.hide(axis='columns')

0,1
Project Directory:,pytorch-yolox-object-detector
Checkpoint Directory:,pytorch-yolox-object-detector\pretrained-coco


### Download a Font File

In [5]:
# Set the name of the font file
font_file = 'KFOlCnqEu92Fr1MmEU9vAw.ttf'

# Download the font file
download_file(f"https://fonts.gstatic.com/s/roboto/v30/{font_file}", "./")

The file ./KFOlCnqEu92Fr1MmEU9vAw.ttf already exists and overwrite is set to False.


## Loading the Checkpoint Data

### Load the Colormap

In [6]:
# The colormap path
colormap_path = list(checkpoint_dir.glob('*colormap.json'))[0]

# Load the JSON colormap data
with open(colormap_path, 'r') as file:
        colormap_json = json.load(file)

# Convert the JSON data to a dictionary        
colormap_dict = {item['label']: item['color'] for item in colormap_json['items']}

# Extract the class names from the colormap
class_names = list(colormap_dict.keys())

# Make a copy of the colormap in integer format
int_colors = [tuple(int(c*255) for c in color) for color in colormap_dict.values()]

### Set the Preprocessing and Post-Processing Parameters

In [7]:
max_stride = 32
input_dim_slice = slice(2, 4, None)

## Defining Utility Functions

### Define a Function to Prepare Images for Inference

In [8]:
def prepare_image_for_inference(frame:np.ndarray, target_sz:int, max_stride:int):

    """
    Prepares an image for inference by performing a series of preprocessing steps.
    
    Steps:
    1. Converts a BGR image to RGB.
    2. Resizes the image to a target size without cropping, considering a given divisor.
    3. Calculates input dimensions as multiples of the max stride.
    4. Calculates offsets based on the resized image dimensions and input dimensions.
    5. Computes the scale between the original and resized image.
    6. Crops the resized image based on calculated input dimensions.
    
    Parameters:
    - frame (numpy.ndarray): The input image in BGR format.
    - target_sz (int): The target minimum size for resizing the image.
    - max_stride (int): The maximum stride to be considered for calculating input dimensions.
    
    Returns:
    tuple: 
    - rgb_img (PIL.Image): The converted RGB image.
    - input_dims (list of int): Dimensions of the image that are multiples of max_stride.
    - offsets (numpy.ndarray): Offsets from the resized image dimensions to the input dimensions.
    - min_img_scale (float): Scale factor between the original and resized image.
    - input_img (PIL.Image): Cropped image based on the calculated input dimensions.
    """

    # Convert the BGR image to RGB
    rgb_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    # Resize image without cropping to multiple of the max stride
    resized_img = resize_img(rgb_img, target_sz=target_sz, divisor=1)
    
    # Calculating the input dimensions that multiples of the max stride
    input_dims = [dim - dim % max_stride for dim in resized_img.size]
    # Calculate the offsets from the resized image dimensions to the input dimensions
    offsets = (np.array(resized_img.size) - input_dims) / 2
    # Calculate the scale between the source image and the resized image
    min_img_scale = min(rgb_img.size) / min(resized_img.size)
    
    # Crop the resized image to the input dimensions
    input_img = resized_img.crop(box=[*offsets, *resized_img.size - offsets])
    
    return rgb_img, input_dims, offsets, min_img_scale, input_img

### Define Functions to Process YOLOX Output

#### Define a function to generate the output grids

In [9]:
def generate_output_grids_np(height, width, strides=[8,16,32]):
    """
    Generate a numpy array containing grid coordinates and strides for a given height and width.

    Args:
        height (int): The height of the image.
        width (int): The width of the image.

    Returns:
        np.ndarray: A numpy array containing grid coordinates and strides.
    """

    all_coordinates = []

    for stride in strides:
        # Calculate the grid height and width
        grid_height = height // stride
        grid_width = width // stride

        # Generate grid coordinates
        g1, g0 = np.meshgrid(np.arange(grid_height), np.arange(grid_width), indexing='ij')

        # Create an array of strides
        s = np.full((grid_height, grid_width), stride)

        # Stack the coordinates along with the stride
        coordinates = np.stack((g0.flatten(), g1.flatten(), s.flatten()), axis=-1)

        # Append to the list
        all_coordinates.append(coordinates)

    # Concatenate all arrays in the list along the first dimension
    output_grids = np.concatenate(all_coordinates, axis=0)

    return output_grids

#### Define a function to calculate bounding boxes and probabilities

In [10]:
def calculate_boxes_and_probs(model_output:np.ndarray, output_grids:np.ndarray) -> np.ndarray:
    """
    Calculate the bounding boxes and their probabilities.

    Parameters:
    model_output (numpy.ndarray): The output of the model.
    output_grids (numpy.ndarray): The output grids.

    Returns:
    numpy.ndarray: The array containing the bounding box coordinates, class labels, and maximum probabilities.
    """
    # Calculate the bounding box coordinates
    box_centroids = (model_output[..., :2] + output_grids[..., :2]) * output_grids[..., 2:]
    box_sizes = np.exp(model_output[..., 2:4]) * output_grids[..., 2:]

    x0, y0 = [t.squeeze(axis=2) for t in np.split(box_centroids - box_sizes / 2, 2, axis=2)]
    w, h = [t.squeeze(axis=2) for t in np.split(box_sizes, 2, axis=2)]

    # Calculate the probabilities for each class
    box_objectness = model_output[..., 4]
    box_cls_scores = model_output[..., 5:]
    box_probs = np.expand_dims(box_objectness, -1) * box_cls_scores

    # Get the maximum probability and corresponding class for each proposal
    max_probs = np.max(box_probs, axis=-1)
    labels = np.argmax(box_probs, axis=-1)

    return np.array([x0, y0, w, h, labels, max_probs]).transpose((1, 2, 0))

#### Define a function to extract object proposals from the raw model output

In [11]:
def process_outputs(outputs:np.ndarray, input_dims:tuple, bbox_conf_thresh:float):

    """
    Process the model outputs to generate bounding box proposals filtered by confidence threshold.
    
    Parameters:
    - outputs (numpy.ndarray): The raw output from the model, which will be processed to calculate boxes and probabilities.
    - input_dims (tuple of int): Dimensions (height, width) of the input image to the model.
    - bbox_conf_thresh (float): Threshold for the bounding box confidence/probability. Bounding boxes with a confidence
                                score below this threshold will be discarded.
    
    Returns:
    - numpy.array: An array of proposals where each proposal is an array containing bounding box coordinates
                   and its associated probability, sorted in descending order by probability.
    """

    # Process the model output
    outputs = calculate_boxes_and_probs(outputs, generate_output_grids_np(*input_dims))
    # Filter the proposals based on the confidence threshold
    max_probs = outputs[:, :, -1]
    mask = max_probs > bbox_conf_thresh
    proposals = outputs[mask]
    # Sort the proposals by probability in descending order
    proposals = proposals[proposals[..., -1].argsort()][::-1]
    return proposals

#### Define a function to calculate the intersection-over-union

In [12]:
def calc_iou(proposals:np.ndarray) -> np.ndarray:
    """
    Calculates the Intersection over Union (IoU) for all pairs of bounding boxes (x,y,w,h) in 'proposals'.

    The IoU is a measure of overlap between two bounding boxes. It is calculated as the area of
    intersection divided by the area of union of the two boxes.

    Parameters:
    proposals (2D np.array): A NumPy array of bounding boxes, where each box is an array [x, y, width, height].

    Returns:
    iou (2D np.array): The IoU matrix where each element i,j represents the IoU of boxes i and j.
    """

    # Calculate coordinates for the intersection rectangles
    x1 = np.maximum(proposals[:, 0], proposals[:, 0][:, None])
    y1 = np.maximum(proposals[:, 1], proposals[:, 1][:, None])
    x2 = np.minimum(proposals[:, 0] + proposals[:, 2], (proposals[:, 0] + proposals[:, 2])[:, None])
    y2 = np.minimum(proposals[:, 1] + proposals[:, 3], (proposals[:, 1] + proposals[:, 3])[:, None])
    
    # Calculate intersection areas
    intersections = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)

    # Calculate union areas
    areas = proposals[:, 2] * proposals[:, 3]
    unions = areas[:, None] + areas - intersections

    # Calculate IoUs
    iou = intersections / unions

    # Return the iou matrix
    return iou

#### Define a function to filter bounding box proposals using Non-Maximum Suppression

In [13]:
def nms_sorted_boxes(iou:np.ndarray, iou_thresh:float=0.45) -> np.ndarray:
    """
    Applies non-maximum suppression (NMS) to sorted bounding boxes.

    It suppresses boxes that have high overlap (as defined by the IoU threshold) with a box that 
    has a higher score.

    Parameters:
    iou (np.ndarray): An IoU matrix where each element i,j represents the IoU of boxes i and j.
    iou_thresh (float): The IoU threshold for suppression. Boxes with IoU > iou_thresh are suppressed.

    Returns:
    keep (np.ndarray): The indices of the boxes to keep after applying NMS.
    """

    # Create a boolean mask to keep track of boxes
    mask = np.ones(iou.shape[0], dtype=bool)

    # Apply non-max suppression
    for i in range(iou.shape[0]):
        if mask[i]:
            # Suppress boxes with higher index and IoU > threshold
            mask[(iou[i] > iou_thresh) & (np.arange(iou.shape[0]) > i)] = False

    # Return the indices of the boxes to keep
    return np.arange(iou.shape[0])[mask]

### Define a Function to Annotate Images with Bounding Boxes

In [14]:
def draw_bboxes_pil(image, boxes, labels, colors, font, width=2, font_size=18, probs=None):
    """
    Annotates an image with bounding boxes, labels, and optional probability scores.

    Parameters:
    - image (PIL.Image): The input image on which annotations will be drawn.
    - boxes (list of tuples): A list of bounding box coordinates where each tuple is (x, y, w, h).
    - labels (list of str): A list of labels corresponding to each bounding box.
    - colors (list of str): A list of colors for each bounding box and its corresponding label.
    - font (str): Path to the font file to be used for displaying the labels.
    - width (int, optional): Width of the bounding box lines. Defaults to 2.
    - font_size (int, optional): Size of the font for the labels. Defaults to 18.
    - probs (list of float, optional): A list of probability scores corresponding to each label. Defaults to None.

    Returns:
    - annotated_image (PIL.Image): The image annotated with bounding boxes, labels, and optional probability scores.
    """
    
    # Define a reference diagonal
    REFERENCE_DIAGONAL = 1000
    
    # Scale the font size using the hypotenuse of the image
    font_size = int(font_size * (np.hypot(*image.size) / REFERENCE_DIAGONAL))
    
    # Add probability scores to labels if provided
    if probs is not None:
        labels = [f"{label}: {prob*100:.2f}%" for label, prob in zip(labels, probs)]

    # Create an ImageDraw object for drawing on the image
    draw = ImageDraw.Draw(image)

    # Load the font file (outside the loop)
    fnt = ImageFont.truetype(font, font_size)
    
    # Compute the mean color value for each color
    mean_colors = [np.mean(np.array(color)) for color in colors]

    # Loop through the bounding boxes, labels, and colors
    for box, label, color, mean_color in zip(boxes, labels, colors, mean_colors):
        # Get the bounding box coordinates
        x, y, w, h = box

        # Draw the bounding box on the image
        draw.rectangle([x, y, x+w, y+h], outline=color, width=width)
        
        # Get the size of the label text box
        label_w, label_h = draw.textbbox(xy=(0,0), text=label, font=fnt)[2:]
        
        # Draw the label rectangle on the image
        draw.rectangle([x, y-label_h, x+label_w, y], outline=color, fill=color)

        # Draw the label text on the image
        font_color = 'black' if mean_color > 127.5 else 'white'
        draw.multiline_text((x, y-label_h), label, font=fnt, fill=font_color)
        
    return image

## Tracking Objects in Videos

### Create an Inference Session

In [15]:
# Get a filename for the ONNX model
onnx_file_path = list(checkpoint_dir.glob('*.onnx'))[0]

In [16]:
# Load the model and create an InferenceSession
providers = [
    'CPUExecutionProvider',
    # "CUDAExecutionProvider",
]
sess_options = ort.SessionOptions()
session = ort.InferenceSession(onnx_file_path, sess_options=sess_options, providers=providers)

### Select a Test Video

In [17]:
# Specify the directory where videos are or will be stored.
video_dir = "./videos/"

# Name of the test video to be used.
# test_video_name = "pexels-rodnae-productions-10373924.mp4"
test_video_name = "cars_on_highway.mp4"

# Construct the full path for the video using the directory and video name.
video_path = f"{video_dir}{test_video_name}"

# Define the URL for the test video stored on Huggingface's server.
test_video_url = f"https://huggingface.co/datasets/cj-mills/pexels-object-tracking-test-videos/resolve/main/{test_video_name}"

# Download the video file from the specified URL to the local video directory.
download_file(test_video_url, video_dir, False)

# Display the video using the Video function (assuming an appropriate library/module is imported).
Video(video_path)

The file ./videos/cars_on_highway.mp4 already exists and overwrite is set to False.


### Initialize a `VideoCapture` Object

In [18]:
# Open the video file located at 'video_path' using OpenCV
video_capture = cv2.VideoCapture(video_path)

# Retrieve the frame width of the video
frame_width = int(video_capture.get(3))
# Retrieve the frame height of the video
frame_height = int(video_capture.get(4))
# Retrieve the frames per second (FPS) of the video
frame_fps = int(video_capture.get(5))
# Retrieve the total number of frames in the video
frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

# Create a pandas Series containing video metadata and convert it to a DataFrame
pd.Series({
    "Frame Width:": frame_width,
    "Frame Height:": frame_height,
    "Frame FPS:": frame_fps,
    "Frames:": frames
}).to_frame().style.hide(axis='columns')

0,1
Frame Width:,1280
Frame Height:,720
Frame FPS:,50
Frames:,3000


### Initialize a `VideoWriter` Object

In [19]:
# Construct the output video path 
video_out_path = f"{(video_dir)}{Path(video_path).stem}-byte-track.mp4"

# Initialize a VideoWriter object for video writing.
# 1. video_out_path: Specifies the name of the output video file.
# 2. cv2.VideoWriter_fourcc(*'mp4v'): Specifies the codec for the output video. 'mp4v' is used for .mp4 format.
# 3. frame_fps: Specifies the frames per second for the output video.
# 4. (frame_width, frame_height): Specifies the width and height of the frames in the output video.
video_writer = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'mp4v'), frame_fps, (frame_width, frame_height))

### Define Inference Parameters

In [20]:
test_sz = 288
# test_sz = 384
bbox_conf_thresh = 0.1
iou_thresh = 0.45

### Detect, Track, and Annotate Objects in Video Frames

In [21]:
# %%capture cap --no-stderr

In [22]:
%%capture cap --no-stderr

# Initialize a ByteTracker object
tracker = BYTETracker(track_thresh=0.25, track_buffer=30, match_thresh=0.8, frame_rate=frame_fps)

frame_counter = 0

with tqdm(total=frames, desc="Processing frames") as pbar:
    # Iterate through each frame in the video
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if ret:
            frame_counter += 1
            
            # Prepare an input image for inference
            rgb_img, input_dims, offsets, min_img_scale, input_img = prepare_image_for_inference(frame, test_sz, max_stride)
                        
            # Convert the existing input image to NumPy format
            input_tensor_np = np.array(input_img, dtype=np.float32).transpose((2, 0, 1))[None]/255

            # Start performance counter
            start_time = time.perf_counter()
                        
            # Run inference
            outputs = session.run(None, {"input": input_tensor_np})[0]

            # Process the model output
            proposals = process_outputs(outputs, input_tensor_np.shape[input_dim_slice], bbox_conf_thresh)
            
            # Apply non-max suppression to the proposals with the specified threshold
            proposal_indices = nms_sorted_boxes(calc_iou(proposals[:, :-2]), iou_thresh)
            proposals = proposals[proposal_indices]
            
            bbox_list = (proposals[:,:4]+[*offsets, 0, 0])*min_img_scale
            label_list = [class_names[int(idx)] for idx in proposals[:,4]]
            probs_list = proposals[:,5]

            # Update tracker with detections.
            track_ids = [-1]*len(bbox_list)

            # Convert to tlbr format
            tlbr_boxes = bbox_list.copy()
            tlbr_boxes[:, 2:4] += tlbr_boxes[:, :2]

            # print(np.concatenate([tlbr_boxes, probs_list[:, np.newaxis]], axis=1))

            # Update tracker with detections
            tracks = tracker.update(
                output_results=np.concatenate([tlbr_boxes, probs_list[:, np.newaxis]], axis=1),
                img_info=rgb_img.size,
                img_size=rgb_img.size)
            track_ids = match_detections_with_tracks(tlbr_boxes=tlbr_boxes, track_ids=track_ids, tracks=tracks)
            print(f"\nFRAME-{frame_counter}: {track_ids}")
            # if (frame_counter >= 300): 
            #     break

            # End performance counter
            end_time = time.perf_counter()
            # Calculate the combined FPS for object detection and tracking
            fps = 1 / (end_time - start_time)
            # Display the frame rate in the progress bar
            pbar.set_postfix(fps=fps)

            # Filter object detections based on tracking results
            bbox_list, label_list, probs_list, track_ids = zip(*[(bbox, label, prob, track_id) 
                                                                 for bbox, label, prob, track_id 
                                                                 in zip(bbox_list, label_list, probs_list, track_ids) if track_id != -1])

            # Annotate the current frame with bounding boxes and tracking IDs
            annotated_img = draw_bboxes_pil(
                image=rgb_img, 
                boxes=bbox_list, 
                labels=[f"{track_id}-{label}" for track_id, label in zip(track_ids, label_list)],
                probs=probs_list,
                colors=[int_colors[class_names.index(i)] for i in label_list],  
                font=font_file,
            )
            annotated_frame = cv2.cvtColor(np.array(annotated_img), cv2.COLOR_RGB2BGR)
            
            video_writer.write(annotated_frame)
            pbar.update(1)
        else:
            break
video_capture.release()
video_writer.release()

# with open('output.md', 'w') as f:
#     f.write(cap.stdout)

In [23]:
with open('output.md', 'w') as f:
    f.write(cap.stdout)
print("Done.")

Done.


In [24]:
from cjm_byte_track.matching import ious, linear_assignment

In [25]:
def iou_distance_test(stracks, detections):
    atlbrs = [track+np.array([0,0, *track[:2]]) for track in stracks]
    btlbrs = [track+np.array([0,0, *track[:2]]) for track in detections]
    return 1 - ious(atlbrs, btlbrs)

In [26]:
stracks = [np.array([370.46357155, 877.45772362, 145.92466354, 119.21984673])]

In [27]:
detections = [np.array([368.32773685, 879.60356593, 143.66029739, 114.70830917]), np.array([240.97084284, 833.63639474, 148.40068817, 133.82930756])]

In [28]:
iou_distance_test(stracks, detections)

array([[0.07981369, 0.95213506]])

In [29]:
match_thresh=0.8

In [30]:
dists = np.array([[0.09774083, 1],
         [1, 0.18316322]])
linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]]


(array([[0, 0],
        [1, 1]], dtype=int64),
 (),
 ())

In [31]:
dists = np.array([[1, 1],
         [1, 0.173594]])
linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]]


(array([[1, 1]], dtype=int64), (0,), (0,))

In [32]:
dists = np.array([[0.0631399, 0.986465],
                  [0.96626, 0.111179]])
linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]]


(array([[0, 0],
        [1, 1]], dtype=int64),
 (),
 ())

In [33]:
class TestClass:
    def __init__(self):
        self.item = 0

In [34]:
test_list = [TestClass() for i in range(10)]

In [35]:
def list_changer(test_list):
    return test_list[5:]

In [36]:
new_list = list_changer(test_list)

In [37]:
for cls in new_list:
    cls.item += 2

In [38]:
[cls.item for cls in new_list]

[2, 2, 2, 2, 2]

In [39]:
[cls.item for cls in test_list]

[0, 0, 0, 0, 0, 2, 2, 2, 2, 2]

In [40]:
match_thresh=0.8

In [41]:
# Define the values as a list of lists, with each inner list representing a row
values = [
    [0.0673455, 1, 1, 1, 1, 1, 1, 1, 1],
    [1, 0.0600082, 1, 0.967432, 1, 1, 1, 1, 1],
    [1, 1, 0.025685, 1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 1, 0.0627941, 1, 1, 1],
    [1, 1, 1, 0.74512, 0.0319988, 1, 0.836532, 1, 1],
    [1, 1, 1, 1, 1, 1, 1, 1, 1],
    [1, 1, 1, 0.976879, 0.824716, 1, 0.0832212, 1, 0.942253],
    [1, 0.972737, 1, 0.0347837, 0.733537, 1, 0.981221, 1, 1],
    [1, 1, 1, 1, 1, 1, 1, 0.0162111, 1]
]

# Convert the list of lists to a numpy array
dists = np.array(values)

linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]
 [2 2]
 [3 5]
 [4 4]
 [5 8]
 [6 6]
 [7 3]
 [8 7]]


(array([[0, 0],
        [1, 1],
        [2, 2],
        [3, 5],
        [4, 4],
        [6, 6],
        [7, 3],
        [8, 7]], dtype=int64),
 (5,),
 (8,))

In [42]:
dists = np.array([
  [0.06871448, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
  [1., 0.06176221, 1., 1., 0.96813791, 1., 1., 1., 1., 1.],
  [1., 1., 0.02992284, 1., 1., 1., 1., 1., 1., 1.],
  [1., 1., 1., 1., 1., 0.055438, 1., 1., 1., 1.],
  [1., 1., 1., 0.03692986, 0.74497972, 1., 0.80186697, 1., 1., 1.],
  [1., 1., 1., 0.82536749, 0.97627397, 1., 0.0555338, 0.93578798, 1., 1.],
  [1., 1., 1., 1., 1., 1., 1., 1., 1., 0.03617145],
  [1., 0.97301484, 1., 0.72932389, 0.02848313, 1., 0.96989549, 1., 1., 1.],
  [1., 1., 1., 1., 1., 1., 1., 1., 0.01484608, 1.]
])

linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]
 [2 2]
 [3 5]
 [4 3]
 [5 6]
 [6 9]
 [7 4]
 [8 8]]


(array([[0, 0],
        [1, 1],
        [2, 2],
        [3, 5],
        [4, 3],
        [5, 6],
        [6, 9],
        [7, 4],
        [8, 8]], dtype=int64),
 (),
 (7,))

In [43]:
dists = np.array([
    [1, 1, 1, 1, 1, 1, 1],
    [1, 1, 0.941002, 1, 1, 1, 0.575342],
    [1, 1, 1, 0.0317649, 0.950865, 1, 1],
    [1, 1, 0.908129, 1, 1, 1, 0.677881],
    [1, 1, 0.031623, 1, 1, 1, 0.845811],
    [1, 0.0319127, 1, 1, 1, 1, 1],
    [0.034943, 1, 0.999031, 1, 1, 1, 1],
    [1, 1, 0.856714, 1, 1, 1, 0.0713416],
    [1, 1, 1, 1, 1, 0.00747945, 1],
    [1, 1, 1, 0.955944, 0.0108065, 1, 1],
    [1, 1, 1, 1, 1, 1, 1],
    [0.96013, 1, 1, 1, 1, 1, 1]
])

linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[2 3]
 [4 2]
 [5 1]
 [6 0]
 [7 6]
 [8 5]
 [9 4]]


(array([[2, 3],
        [4, 2],
        [5, 1],
        [6, 0],
        [7, 6],
        [8, 5],
        [9, 4]], dtype=int64),
 (0, 1, 3, 10, 11),
 ())

In [44]:
dists = np.array([
    [0.0583072, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    [1, 0.0432461, 1, 1, 0.968647, 1, 1, 1, 1, 1],
    [1, 1, 1, 0.0128587, 1, 1, 1, 1, 1, 1],
    [1, 1, 0.0354033, 1, 1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 0.74248, 0.0344458, 0.811372, 1, 1, 1],
    [1, 1, 1, 1, 1, 1, 1, 1, 0.0406445, 1],
    [1, 1, 1, 1, 0.976859, 0.826129, 0.0430449, 1, 1, 0.954299],
    [1, 0.971974, 1, 1, 0.0414572, 0.732245, 0.974159, 1, 1, 1],
    [1, 1, 1, 1, 1, 1, 1, 0.0109232, 1, 1],
    [1, 1, 1, 1, 1, 1, 0.963612, 1, 1, 0.146502]
])

linear_assignment(dists, match_thresh)

indices_to_matches
indices:
[[0 0]
 [1 1]
 [2 3]
 [3 2]
 [4 5]
 [5 8]
 [6 6]
 [7 4]
 [8 7]
 [9 9]]


(array([[0, 0],
        [1, 1],
        [2, 3],
        [3, 2],
        [4, 5],
        [5, 8],
        [6, 6],
        [7, 4],
        [8, 7],
        [9, 9]], dtype=int64),
 (),
 ())

In [45]:
from cjm_byte_track.matching import box_iou_batch

In [46]:
stracks_tlbr = [
    [802.431, 301.6, 995.954, 451.851],
    [521.554, 369.363, 711.1, 529.451],
    [191.288, 193.669, 267.159, 244],
    [565.679, 566.005, 815.292, 719.111],
    [389.009, 224.114, 508.233, 353.844],
    [597.513, 193.034, 737.378, 309.714],
    [326.345, 189.51, 447.15, 298.353],
    [420.268, 270.779, 585.164, 393.233],
    [88.4078, 136.008, 178.812, 253.103],
    [269.081, 208.368, 343.869, 241.893]
]

# Create a NumPy array from the data
stracks_tlbr_np = np.ascontiguousarray(np.array(stracks_tlbr))


detections_tlbr = [
    [805.686, 304.402, 996.72, 455.002],
    [524.17, 370.633, 715.037, 529.768],
    [567.432, 569.344, 815.9, 719.817],
    [191.558, 193.821, 267.597, 244.032],
    [421.328, 268.753, 587.79, 392.764],
    [388.84, 224.5, 511.07, 354.787],
    [327.209, 190.112, 449.947, 299.291],
    [88.2511, 136.163, 178.74, 253.941],
    [599.344, 192.703, 739.006, 308.079],
    [268.088, 204.027, 345.001, 241.598]
]

# Create a NumPy array from the detections data
detections_tlbr_np = np.ascontiguousarray(np.array(detections_tlbr))

1-box_iou_batch(stracks_tlbr_np, detections_tlbr_np)

array([[0.05830749, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 0.04324753, 1.        , 1.        , 0.9686473 ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 0.01287833, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 0.03540396, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.74247683,
        0.03444314, 0.8113688 , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.04064818, 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.97685778,
        0.82612748, 0.04305579, 1.        , 1.        , 0.9543001 ],
       [1.        , 0.97197521, 1.       

In [47]:
stracks_tlbr = [[801.68070078, 301.69291735, 995.83788157, 451.876719],
                 [521.22518063, 369.33599353, 711.42142773, 529.38341022],
                 [191.16331458, 193.47710609, 266.96166396, 243.86285782],
                 [565.75703144, 565.5274415, 815.41626453, 719.13038492],
                 [388.49719703, 223.79027367, 509.01418388, 353.95997047],
                 [326.05074883, 189.85471725, 447.58558273, 298.98592949],
                 [597.48936176, 192.86307931, 737.04803944, 310.09313226],
                 [419.81409431, 269.17650223, 585.61533332, 393.53150368],
                 [87.96551704, 135.90763569, 178.39156151, 254.06175137]]

# Create a NumPy array from the data
stracks_tlbr_np = np.ascontiguousarray(np.array(stracks_tlbr))


detections_tlbr = [[804.66910124, 304.01881218, 996.75326109, 454.91154671],
        [523.68838668, 370.06069183, 715.21527648, 529.86287117],
        [567.03765035, 570.35922289, 816.41314626, 719.79817629],
        [191.39453053, 193.88383031, 267.48803258, 243.99884343],
        [421.60306096, 269.01772976, 587.72294164, 392.45414257],
        [388.93204629, 225.01641512, 510.41085184, 353.98362398],
        [327.04007626, 190.07407188, 449.82983112, 299.73217964],
        [599.40533876, 192.85640478, 738.88836145, 308.57549429],
        [ 87.77255535, 135.95619678, 178.5406065 , 254.53011036],
        [267.82703042, 201.37670994, 345.64222932, 241.60795689]]

# Create a NumPy array from the detections data
detections_tlbr_np = np.ascontiguousarray(np.array(detections_tlbr))

1-box_iou_batch(stracks_tlbr_np, detections_tlbr_np)

array([[0.05394472, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 0.03939261, 1.        , 1.        , 0.96888572,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 0.02049892, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 0.0442709 , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.74190436,
        0.02437249, 0.80984901, 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.97639729,
        0.82357392, 0.03459486, 1.        , 1.        , 0.94949333],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.03905921, 1.        , 1.        ],
       [1.        , 0.97079682, 1.       