# TP 2: IOU-based Tracking (Bounding-Box Tracker)

Objective: Develop a Simple IoU-Based Tracker and extend it for Multiple Object Tracking
- Object representation: bounding box
- MOT (Multiple Object Tracker)
- Data: pre-generated detections loaded from text file

In [52]:
import pandas as pd
import numpy as np
from scipy.optimize import linear_sum_assignment
import cv2
import time

In [53]:
ADL_Rundle_6_path = "ADL-Rundle-6"
det_path = ADL_Rundle_6_path + "/det"
public_dataset_path = det_path + "/public-dataset/det.txt"
Yolov5l_path = det_path + "/Yolov5l/det.txt"
Yolov5s_path = det_path + "/Yolov5s/det.txt"

# 1. Load detections (det) stored in a MOT-challenge like formatted text file.
Each line represents one object
instance and contains 10 values
- frame = frame number
- id = number identifies that object as belonging to a trajectory by assigning a unique ID (set to
−1 in a detection file, as no ID is assigned yet).
- bb_left, bb_top, bb_width, bb_height: bounding box position in 2D image coordinates i.e. the
top-left corner as well as width and height
- conf: detection confidence score
- x,y,z: the world coordinates are ignored for the 2D challenge and can be filled with -1.


In [54]:
def load_detections(det_file):
    """
    Load detections (det) like formatted text file.
    - frame = frame number
    - id = number identifies that object as belonging to a trajectory by assigning a unique ID (set to −1 in a detection file, as no ID is assigned yet).
    - bb_left, bb_top, bb_width, bb_height: bounding box position in 2D image coordinates i.e. the top-left corner as well as width and height
    - conf: detection confidence score
    - x,y,z: the world coordinates are ignored for the 2D challenge and can be filled with -1.

    :param det_file: path to the detection file
    :return: pandas dataframe with the detections
    """
    df = pd.read_csv(det_file, sep=',', header=None)
    df.columns = ['frame', 'id', 'bb_left', 'bb_top', 'bb_width', 'bb_height', 'conf', 'x', 'y', 'z']
    return df

In [55]:
public_dataset_df = load_detections(public_dataset_path)
public_dataset_df.head()

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,conf,x,y,z
0,1,-1,1689,385,146.62,332.71,67.567,-1,-1,-1
1,1,-1,1303,503,61.514,139.59,29.439,-1,-1,-1
2,1,-1,1258,569,40.123,91.049,19.601,-1,-1,-1
3,1,-1,31,525,113.37,257.27,17.013,-1,-1,-1
4,1,-1,1800,483,94.66,214.81,11.949,-1,-1,-1


# 2. Create similarity matrix

## 2.1 Initialization
- Define the lists or arrays to store the current tracked bounding boxes and the new
detections for the current frame.

In [56]:
frames = public_dataset_df['frame'].unique()
current_tracked_bounding_boxes = []

for frame in frames:
    current_frame = public_dataset_df[public_dataset_df['frame'] == frame]
    current_tracked_bounding_boxes.append(current_frame[['bb_left', 'bb_top', 'bb_width', 'bb_height']].values)

new_detections = []
# get at frame 1 because we are going to compare frame n with frame n + 1
for frame in frames[1:]:
    current_frame = public_dataset_df[public_dataset_df['frame'] == frame]
    new_detections.append(current_frame[['bb_left', 'bb_top', 'bb_width', 'bb_height']].values)
print("Number of frames:", len(frames))
print("Length of current_tracked_bounding_boxes:", len(current_tracked_bounding_boxes))
print("Length of new_detections:", len(new_detections))

Number of frames: 525
Length of current_tracked_bounding_boxes: 525
Length of new_detections: 524


## 2.2 Calculate IoU for all pairs
- Create a similarity matrix (a 2D array) where each entry (i,j) corresponds to the IoU
value between the ith tracked object and the jth new detection
- The dimensions of this matrix will be (N×M), where N is the number of tracked objects
and M is the number of new detections. Compute similarity score using the Jaccard
index (intersection-over-union) for each pair of bounding boxes

In [57]:
def calculate_iou(box_1, box_2):
    bb_left = max(box_1[0], box_2[0])
    bb_top = max(box_1[1], box_2[1])
    bb_right = min(box_1[0] + box_1[2], box_2[0] + box_2[2])
    bb_bottom = min(box_1[1] + box_1[3], box_2[1] + box_2[3])

    if bb_right < bb_left or bb_bottom < bb_top:
        return 0.0

    intersection_area = (bb_right - bb_left) * (bb_bottom - bb_top)
    box_1_area = box_1[2] * box_1[3]
    box_2_area = box_2[2] * box_2[3]
    iou = intersection_area / float(box_1_area + box_2_area - intersection_area)
    return iou

In [58]:
def make_similarity_matrix(current_tracked_bounding_boxes, new_detections, frame):
    similarity_matrix = np.zeros((len(current_tracked_bounding_boxes[frame]), len(new_detections[frame])))
    for i, tracked_box in enumerate(current_tracked_bounding_boxes[frame]):
        for j, new_detection in enumerate(new_detections[frame]):
            similarity_matrix[i, j] = calculate_iou(tracked_box, new_detection)
    return similarity_matrix

In [59]:
def make_similarity_matrices(current_tracked_bounding_boxes, new_detections):
    similarity_matrices = []
    for frame in range(len(new_detections)):
        similarity_matrix = make_similarity_matrix(current_tracked_bounding_boxes, new_detections, frame)
        similarity_matrices.append(similarity_matrix)
    return similarity_matrices

In [60]:
similarity_matrices = make_similarity_matrices(current_tracked_bounding_boxes, new_detections)
for i in range(len(similarity_matrices)):
    print(f"Frame {i+1}")
    print(pd.DataFrame(similarity_matrices[i]))
    print("---------------------------------------------------------------------------")

Frame 1
          0         1         2         3         4    5
0  1.000000  0.000000  0.361845  0.000000  0.000000  0.0
1  0.000000  0.744732  0.000000  0.021989  0.000000  0.0
2  0.000000  0.000000  0.000000  0.544377  0.000000  0.0
3  0.000000  0.000000  0.000000  0.000000  0.633519  0.0
4  0.124487  0.000000  0.358332  0.000000  0.000000  0.0
---------------------------------------------------------------------------
Frame 2
          0         1    2         3         4         5         6
0  0.383676  0.577545  0.0  0.230269  0.000000  0.000000  0.000000
1  0.000000  0.000000  1.0  0.000000  0.120208  0.000000  0.000000
2  0.568781  0.221538  0.0  0.574153  0.000000  0.000000  0.000000
3  0.000000  0.000000  0.0  0.000000  0.238375  0.000000  0.000000
4  0.000000  0.000000  0.0  0.000000  0.000000  0.217252  0.790802
5  0.000000  0.035670  0.0  0.000000  0.000000  0.000000  0.000000
---------------------------------------------------------------------------
Frame 3
          0  

# 3. Associate the detections to tracks
- Apply the Hungarian algorithm using existing libraries (e.g. function linear_sum_assignement
from scipy library for Python) to find the optimal assignment of detections to tracked objects

In [61]:
def optimal_assignment_detections(similarity_matrix):
    row_ind, col_ind = linear_sum_assignment(-similarity_matrix)
    return row_ind, col_ind

In [62]:
def make_assignments(similarity_matrices):
    assignments = []
    for i in range(len(similarity_matrices)):
        row_ind, col_ind = optimal_assignment_detections(similarity_matrices[i])
        assignments.append((row_ind, col_ind))
    return assignments

In [63]:
assignments = make_assignments(similarity_matrices)
for i in range(len(assignments)):
    print(f"Frame {i+1}")
    print("Row indices:", assignments[i][0])
    print("Column indices:", assignments[i][1])
    print("---------------------------------------------------------------------------")

Frame 1
Row indices: [0 1 2 3 4]
Column indices: [0 1 3 4 2]
---------------------------------------------------------------------------
Frame 2
Row indices: [0 1 2 3 4 5]
Column indices: [1 2 3 4 6 0]
---------------------------------------------------------------------------
Frame 3
Row indices: [0 1 2 3 5 6]
Column indices: [2 0 3 1 4 5]
---------------------------------------------------------------------------
Frame 4
Row indices: [0 1 2 3]
Column indices: [3 0 1 2]
---------------------------------------------------------------------------
Frame 5
Row indices: [0 1 2 3]
Column indices: [4 1 3 0]
---------------------------------------------------------------------------
Frame 6
Row indices: [0 1 2 3 4 5]
Column indices: [0 1 3 2 5 4]
---------------------------------------------------------------------------
Frame 7
Row indices: [0 1 2 3 4]
Column indices: [0 2 1 4 3]
---------------------------------------------------------------------------
Frame 8
Row indices: [0 1 2 3 4]
Colu

## 4. Implement track management
- Each object can be assigned to only one trajectory (ID)
- Create and update lists for matches, unmatched detections and unmatched tracks
    - Matched -> update existing tracks based on associations
    - Unmatched tracks -> remove tracks that exceed the maximum missed frames
    - Unmatched detections -> create new tracks

In [64]:
class Tracker:
    def __init__(self, max_missed_frames=5, threshold=0.3):
        self.tracks = {}
        self.next_track_id = 1
        self.max_missed_frames = max_missed_frames
        self.threshold = threshold

    def compute_iou(self, detections):
        iou_matrix = np.zeros((len(self.tracks), len(detections)))
        for i, track in enumerate(self.tracks.values()):
            for j, det in enumerate(detections):
                iou_matrix[i, j] = self.iou(track['bbox'], det)
        return iou_matrix

    def iou(self, box1, box2):
        x1, y1, w1, h1 = box1
        x2, y2, w2, h2 = box2
        xi1, yi1 = max(x1, x2), max(y1, y2)
        xi2, yi2 = min(x1 + w1, x2 + w2), min(y1 + h1, y2 + h2)
        intersection_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
        box1_area = w1 * h1
        box2_area = w2 * h2
        union_area = box1_area + box2_area - intersection_area
        return intersection_area / union_area if union_area > 0 else 0

    def update(self, detections):
        unmatched_tracks = list(self.tracks.keys())
        unmatched_detections = set(range(len(detections)))
        similarity_matrix = self.compute_iou(detections)
        row_ind, col_ind = linear_sum_assignment(-similarity_matrix)

        for row, col in zip(row_ind, col_ind):
            if similarity_matrix[row, col] > self.threshold:
                track_id = list(self.tracks.keys())[row]
                self.tracks[track_id]['bbox'] = detections[col]
                self.tracks[track_id]['missed_frames'] = 0
                unmatched_tracks.remove(track_id)
                unmatched_detections.remove(col)

        for track_id in unmatched_tracks:
            self.tracks[track_id]['missed_frames'] += 1
            if self.tracks[track_id]['missed_frames'] > self.max_missed_frames:
                del self.tracks[track_id]

        for detection_id in unmatched_detections:
            self.tracks[self.next_track_id] = {'bbox': detections[detection_id], 'missed_frames': 0}
            self.next_track_id += 1

## 5. Develop an interface for tracking results check to see if the tracker properly keeps track of objects:
- Display Video Frames
- Draw Bounding Boxes: Overlay bounding boxes for each tracked object on the frames
- Show Track IDs: Label the bounding boxes with track IDs for identification
- Save tracking video

In [65]:
frames = [cv2.imread(f"{ADL_Rundle_6_path}/img1/{str(i).zfill(6)}.jpg") for i in range(1, len(frames)+1)]

In [66]:
def draw_tracks(frame, tracks):
    for track_id, track in tracks.items():
        x, y, w, h = track['bbox']
        cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), (0, 255, 0), 2)
        cv2.putText(frame, str(track_id), (int(x), int(y-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)
    return frame

In [67]:
def save_tracking_video(output_path, frames, fps=10):
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    for frame in frames:
        out.write(frame)
    out.release()

In [68]:
tracker = Tracker()

output_frames = []

for frame_idx, frame in enumerate(frames):
    if frame_idx == 0:
        for detection in current_tracked_bounding_boxes[frame_idx]:
            tracker.tracks[tracker.next_track_id] = {'bbox': detection, 'missed_frames': 0}
            tracker.next_track_id += 1
    elif frame_idx < len(new_detections):
        detections = new_detections[frame_idx]
        tracker.update(detections)
        frame_with_tracks = draw_tracks(frame.copy(), tracker.tracks)
        output_frames.append(frame_with_tracks)
        cv2.imshow('frame', frame_with_tracks)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

output_path = "output.mp4"
save_tracking_video(output_path, output_frames)