In [1]:
import motmetrics as mm
from pathlib import Path
import os
import pandas as pd
import json
import numpy as np

In [2]:
FOLDER_NAME = str(input("Enter your folder name (e.g., '20251229-223351')"))

DATA_DIR = os.path.join(Path.cwd(), 'data')
PREDICTED_DIR = os.path.join(DATA_DIR, 'predicted', FOLDER_NAME)
CONFIG_DIR = os.path.join(DATA_DIR, 'config', FOLDER_NAME)
GROUND_TRUTH_FILE = os.path.join(DATA_DIR, 'ground-truth', 'test_30s_output.txt')
EVALUATE_DIR = os.path.join(DATA_DIR, 'evaluate')

os.makedirs(EVALUATE_DIR, exist_ok=True)

if not os.path.exists(PREDICTED_DIR) or not os.path.isdir(PREDICTED_DIR) or len(os.listdir(PREDICTED_DIR)) == 0:
    raise FileNotFoundError(f"The directory {PREDICTED_DIR} does not exist or is empty.")
if not os.path.exists(GROUND_TRUTH_FILE):
    raise FileNotFoundError(f"The ground truth file {GROUND_TRUTH_FILE} does not exist.")
if not os.path.exists(CONFIG_DIR) or not os.path.isdir(CONFIG_DIR) or len(os.listdir(CONFIG_DIR)) == 0:
    raise FileNotFoundError(f"The directory {CONFIG_DIR} does not exist or is empty.")
print(f"All necessary files and directories are present.")

All necessary files and directories are present.


In [3]:
df = pd.DataFrame()

In [4]:
def evaluate_tracker(gt_file, ts_file, name):
    gt = mm.io.loadtxt(gt_file, fmt='mot16', min_confidence=1)
    ts = mm.io.loadtxt(ts_file, fmt='mot16')

    acc = mm.utils.compare_to_groundtruth(gt, ts, 'iou', distth=0.6)

    mh = mm.metrics.create()
    summary = mh.compute(acc, metrics=mm.metrics.motchallenge_metrics, name=name)

    row = summary.loc[name].to_dict()
    return row

def evaluate_config(config_file):
    with open(config_file, 'r') as f:
        data = json.load(f)

    algorithm = data['algorithm']
    attempt = data['attempt']
    inference_time = data['inference_time']
    tracking_time = data['tracking_time']

    return algorithm, attempt, inference_time, tracking_time

def add_metadata(row, algorithm, attempt, inference_time, tracking_time, std_inference_time, std_tracking_time):
    row.update({
        'algorithm': algorithm,
        'attempt': attempt,
        'inference_time': inference_time,
        'tracking_time': tracking_time,
        'std_inference_time': std_inference_time,
        'std_tracking_time': std_tracking_time
    })
    return row

def store_results(row):
    global df
    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)

In [5]:
# Import TrackEval metrics
import sys
sys.path.append(os.path.join(Path.cwd(), 'TrackEval'))

from scipy.optimize import linear_sum_assignment

def load_mot_format(file_path):
    """Load MOT format file and organize by frame"""
    data = {}
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            frame_id = int(parts[0])
            track_id = int(parts[1])
            bbox = [float(x) for x in parts[2:6]]  # x, y, w, h
            
            if frame_id not in data:
                data[frame_id] = {'ids': [], 'bboxes': []}
            data[frame_id]['ids'].append(track_id)
            data[frame_id]['bboxes'].append(bbox)
    
    return data

def bbox_iou(bbox1, bbox2):
    """Calculate IoU between two bounding boxes in [x, y, w, h] format"""
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
    
    # Convert to [x1, y1, x2, y2]
    box1 = [x1, y1, x1 + w1, y1 + h1]
    box2 = [x2, y2, x2 + w2, y2 + h2]
    
    # Calculate intersection
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    
    # Calculate union
    box1_area = w1 * h1
    box2_area = w2 * h2
    union_area = box1_area + box2_area - inter_area
    
    return inter_area / union_area if union_area > 0 else 0

def prepare_trackeval_data(gt_data, pred_data):
    """Prepare data in format expected by TrackEval metrics"""
    # Get all frames
    all_frames = sorted(set(gt_data.keys()) | set(pred_data.keys()))
    
    # Get unique IDs
    gt_ids_set = set()
    tracker_ids_set = set()
    for frame in all_frames:
        if frame in gt_data:
            gt_ids_set.update(gt_data[frame]['ids'])
        if frame in pred_data:
            tracker_ids_set.update(pred_data[frame]['ids'])
    
    # Create ID mappings
    gt_id_to_idx = {id_val: idx for idx, id_val in enumerate(sorted(gt_ids_set))}
    tracker_id_to_idx = {id_val: idx for idx, id_val in enumerate(sorted(tracker_ids_set))}
    
    # Prepare frame-by-frame data
    gt_ids = []
    tracker_ids = []
    similarity_scores = []
    
    num_gt_dets = 0
    num_tracker_dets = 0
    
    for frame in all_frames:
        gt_frame_ids = []
        tracker_frame_ids = []
        
        if frame in gt_data:
            gt_frame_ids = [gt_id_to_idx[id_val] for id_val in gt_data[frame]['ids']]
            num_gt_dets += len(gt_frame_ids)
        
        if frame in pred_data:
            tracker_frame_ids = [tracker_id_to_idx[id_val] for id_val in pred_data[frame]['ids']]
            num_tracker_dets += len(tracker_frame_ids)
        
        # Calculate similarity matrix for this frame
        if frame in gt_data and frame in pred_data:
            gt_bboxes = gt_data[frame]['bboxes']
            pred_bboxes = pred_data[frame]['bboxes']
            
            sim_matrix = np.zeros((len(gt_bboxes), len(pred_bboxes)))
            for i, gt_bbox in enumerate(gt_bboxes):
                for j, pred_bbox in enumerate(pred_bboxes):
                    sim_matrix[i, j] = bbox_iou(gt_bbox, pred_bbox)
        else:
            sim_matrix = np.zeros((len(gt_frame_ids), len(tracker_frame_ids)))
        
        gt_ids.append(np.array(gt_frame_ids, dtype=int))
        tracker_ids.append(np.array(tracker_frame_ids, dtype=int))
        similarity_scores.append(sim_matrix)
    
    return {
        'gt_ids': gt_ids,
        'tracker_ids': tracker_ids,
        'similarity_scores': similarity_scores,
        'num_gt_ids': len(gt_id_to_idx),
        'num_tracker_ids': len(tracker_id_to_idx),
        'num_gt_dets': num_gt_dets,
        'num_tracker_dets': num_tracker_dets,
        'num_timesteps': len(all_frames)
    }

def calculate_hota(data):
    """Calculate HOTA metric"""
    array_labels = np.arange(0.05, 0.99, 0.05)
    res = {
        'HOTA_TP': np.zeros(len(array_labels)),
        'HOTA_FN': np.zeros(len(array_labels)),
        'HOTA_FP': np.zeros(len(array_labels)),
        'HOTA': np.zeros(len(array_labels)),
        'DetA': np.zeros(len(array_labels)),
        'AssA': np.zeros(len(array_labels)),
        'DetRe': np.zeros(len(array_labels)),
        'DetPr': np.zeros(len(array_labels)),
        'LocA': np.zeros(len(array_labels))
    }
    
    if data['num_tracker_dets'] == 0 or data['num_gt_dets'] == 0:
        if data['num_tracker_dets'] == 0:
            res['HOTA_FN'] = data['num_gt_dets'] * np.ones(len(array_labels))
        if data['num_gt_dets'] == 0:
            res['HOTA_FP'] = data['num_tracker_dets'] * np.ones(len(array_labels))
        res['LocA'] = np.ones(len(array_labels))
        return res
    
    # Variables counting global association
    potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
    gt_id_count = np.zeros((data['num_gt_ids'], 1))
    tracker_id_count = np.zeros((1, data['num_tracker_ids']))
    
    # Accumulate global track information
    for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
        similarity = data['similarity_scores'][t]
        sim_iou_denom = similarity.sum(0)[np.newaxis, :] + similarity.sum(1)[:, np.newaxis] - similarity
        sim_iou = np.zeros_like(similarity)
        sim_iou_mask = sim_iou_denom > 0 + np.finfo('float').eps
        sim_iou[sim_iou_mask] = similarity[sim_iou_mask] / sim_iou_denom[sim_iou_mask]
        potential_matches_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += sim_iou
        
        gt_id_count[gt_ids_t] += 1
        tracker_id_count[0, tracker_ids_t] += 1
    
    global_alignment_score = potential_matches_count / (gt_id_count + tracker_id_count - potential_matches_count)
    matches_counts = [np.zeros_like(potential_matches_count) for _ in array_labels]
    
    # Calculate scores for each timestep
    for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
        if len(gt_ids_t) == 0:
            for a in range(len(array_labels)):
                res['HOTA_FP'][a] += len(tracker_ids_t)
            continue
        if len(tracker_ids_t) == 0:
            for a in range(len(array_labels)):
                res['HOTA_FN'][a] += len(gt_ids_t)
            continue
        
        similarity = data['similarity_scores'][t]
        score_mat = global_alignment_score[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] * similarity
        
        match_rows, match_cols = linear_sum_assignment(-score_mat)
        
        for a, alpha in enumerate(array_labels):
            actually_matched_mask = similarity[match_rows, match_cols] >= alpha - np.finfo('float').eps
            alpha_match_rows = match_rows[actually_matched_mask]
            alpha_match_cols = match_cols[actually_matched_mask]
            num_matches = len(alpha_match_rows)
            res['HOTA_TP'][a] += num_matches
            res['HOTA_FN'][a] += len(gt_ids_t) - num_matches
            res['HOTA_FP'][a] += len(tracker_ids_t) - num_matches
            if num_matches > 0:
                res['LocA'][a] += sum(similarity[alpha_match_rows, alpha_match_cols])
                matches_counts[a][gt_ids_t[alpha_match_rows], tracker_ids_t[alpha_match_cols]] += 1
    
    # Calculate association scores
    for a in range(len(array_labels)):
        matches_count = matches_counts[a]
        ass_a = matches_count / np.maximum(1, gt_id_count + tracker_id_count - matches_count)
        res['AssA'][a] = np.sum(matches_count * ass_a) / np.maximum(1, res['HOTA_TP'][a])
    
    # Calculate final scores
    res['LocA'] = np.maximum(1e-10, res['LocA']) / np.maximum(1e-10, res['HOTA_TP'])
    res['DetRe'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'])
    res['DetPr'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FP'])
    res['DetA'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'] + res['HOTA_FP'])
    res['HOTA'] = np.sqrt(res['DetA'] * res['AssA'])
    
    return res

def calculate_clear(data, threshold=0.5):
    """Calculate CLEAR metrics (MOTA, MOTP, etc.)"""
    res = {
        'CLR_TP': 0, 'CLR_FN': 0, 'CLR_FP': 0, 'IDSW': 0,
        'MOTA': 0, 'MOTP': 0, 'MOTP_sum': 0
    }
    
    if data['num_tracker_dets'] == 0 or data['num_gt_dets'] == 0:
        if data['num_tracker_dets'] == 0:
            res['CLR_FN'] = data['num_gt_dets']
        if data['num_gt_dets'] == 0:
            res['CLR_FP'] = data['num_tracker_dets']
        return res
    
    num_gt_ids = data['num_gt_ids']
    prev_tracker_id = np.nan * np.zeros(num_gt_ids)
    prev_timestep_tracker_id = np.nan * np.zeros(num_gt_ids)
    
    for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
        if len(gt_ids_t) == 0:
            res['CLR_FP'] += len(tracker_ids_t)
            continue
        if len(tracker_ids_t) == 0:
            res['CLR_FN'] += len(gt_ids_t)
            continue
        
        similarity = data['similarity_scores'][t]
        score_mat = (tracker_ids_t[np.newaxis, :] == prev_timestep_tracker_id[gt_ids_t[:, np.newaxis]])
        score_mat = 1000 * score_mat + similarity
        score_mat[similarity < threshold - np.finfo('float').eps] = 0
        
        match_rows, match_cols = linear_sum_assignment(-score_mat)
        actually_matched_mask = score_mat[match_rows, match_cols] > 0 + np.finfo('float').eps
        match_rows = match_rows[actually_matched_mask]
        match_cols = match_cols[actually_matched_mask]
        
        matched_gt_ids = gt_ids_t[match_rows]
        matched_tracker_ids = tracker_ids_t[match_cols]
        
        prev_matched_tracker_ids = prev_tracker_id[matched_gt_ids]
        is_idsw = (np.logical_not(np.isnan(prev_matched_tracker_ids))) & (
            np.not_equal(matched_tracker_ids, prev_matched_tracker_ids))
        res['IDSW'] += np.sum(is_idsw)
        
        prev_tracker_id[matched_gt_ids] = matched_tracker_ids
        prev_timestep_tracker_id[:] = np.nan
        prev_timestep_tracker_id[matched_gt_ids] = matched_tracker_ids
        
        num_matches = len(matched_gt_ids)
        res['CLR_TP'] += num_matches
        res['CLR_FN'] += len(gt_ids_t) - num_matches
        res['CLR_FP'] += len(tracker_ids_t) - num_matches
        if num_matches > 0:
            res['MOTP_sum'] += sum(similarity[match_rows, match_cols])
    
    res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
    res['MOTA'] = (res['CLR_TP'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
    
    return res

def evaluate_with_trackeval(gt_file, ts_file):
    """Evaluate using TrackEval metrics"""
    gt_data = load_mot_format(gt_file)
    pred_data = load_mot_format(ts_file)
    
    data = prepare_trackeval_data(gt_data, pred_data)
    
    hota_res = calculate_hota(data)
    clear_res = calculate_clear(data)
    
    return {
        'HOTA': np.mean(hota_res['HOTA']),
        'DetA': np.mean(hota_res['DetA']),
        'AssA': np.mean(hota_res['AssA']),
        'DetRe': np.mean(hota_res['DetRe']),
        'DetPr': np.mean(hota_res['DetPr']),
        'LocA': np.mean(hota_res['LocA']),
        'MOTA_trackeval': clear_res['MOTA'],
        'MOTP_trackeval': clear_res['MOTP'],
        'IDSW_trackeval': clear_res['IDSW'],
        'CLR_TP': clear_res['CLR_TP'],
        'CLR_FN': clear_res['CLR_FN'],
        'CLR_FP': clear_res['CLR_FP']
    }

In [6]:
for config_file in os.listdir(CONFIG_DIR):
    print(config_file)
    file_name = config_file.split('.')[0]

    algorithm, attempt, inference_time, tracking_time = evaluate_config(os.path.join(CONFIG_DIR, config_file))
    avg_inference_time = np.mean(inference_time)
    avg_tracking_time = np.mean(tracking_time)
    std_inference_time = np.std(inference_time)
    std_tracking_time = np.std(tracking_time)

    predict_file = os.path.join(PREDICTED_DIR, f"{file_name}.txt")
    if not os.path.exists(predict_file):
        print(f"{config_file} cann't locate the detection csv")
        break

    # Calculate motmetrics
    row = evaluate_tracker(GROUND_TRUTH_FILE, predict_file, file_name)
    
    # Calculate TrackEval metrics (HOTA, CLEAR)
    print(f"Calculating TrackEval metrics for {file_name}...")
    trackeval_metrics = evaluate_with_trackeval(GROUND_TRUTH_FILE, predict_file)
    row.update(trackeval_metrics)
    
    # Add metadata
    row = add_metadata(row, algorithm, attempt, avg_inference_time, avg_tracking_time, std_inference_time, std_tracking_time)
    store_results(row)

BoostTrack_round1.json
Calculating TrackEval metrics for BoostTrack_round1...
BoostTrack_round10.json
Calculating TrackEval metrics for BoostTrack_round10...
BoostTrack_round2.json
Calculating TrackEval metrics for BoostTrack_round2...
BoostTrack_round3.json
Calculating TrackEval metrics for BoostTrack_round3...
BoostTrack_round4.json
Calculating TrackEval metrics for BoostTrack_round4...
BoostTrack_round5.json
Calculating TrackEval metrics for BoostTrack_round5...
BoostTrack_round6.json
Calculating TrackEval metrics for BoostTrack_round6...
BoostTrack_round7.json
Calculating TrackEval metrics for BoostTrack_round7...
BoostTrack_round8.json
Calculating TrackEval metrics for BoostTrack_round8...
BoostTrack_round9.json
Calculating TrackEval metrics for BoostTrack_round9...
BotSort_round1.json
Calculating TrackEval metrics for BotSort_round1...
BotSort_round10.json
Calculating TrackEval metrics for BotSort_round10...
BotSort_round2.json
Calculating TrackEval metrics for BotSort_round2...


In [7]:
display(df)

Unnamed: 0,idf1,idp,idr,recall,precision,num_unique_objects,mostly_tracked,partially_tracked,mostly_lost,num_false_positives,...,IDSW_trackeval,CLR_TP,CLR_FN,CLR_FP,algorithm,attempt,inference_time,tracking_time,std_inference_time,std_tracking_time
0,0.417423,0.475683,0.371339,0.437869,0.561949,42.0,7.0,13.0,22.0,2832.0,...,50,2201,6096,4264,BoostTrack,1,0.009600,0.018278,0.004894,0.030064
1,0.417423,0.475683,0.371339,0.437869,0.561949,42.0,7.0,13.0,22.0,2832.0,...,50,2201,6096,4264,BoostTrack,10,0.008463,0.017206,0.002028,0.025553
2,0.417423,0.475683,0.371339,0.437869,0.561949,42.0,7.0,13.0,22.0,2832.0,...,50,2201,6096,4264,BoostTrack,2,0.007585,0.016110,0.002552,0.025120
3,0.417423,0.475683,0.371339,0.437869,0.561949,42.0,7.0,13.0,22.0,2832.0,...,50,2201,6096,4264,BoostTrack,3,0.007498,0.016171,0.002520,0.025136
4,0.417423,0.475683,0.371339,0.437869,0.561949,42.0,7.0,13.0,22.0,2832.0,...,50,2201,6096,4264,BoostTrack,4,0.008236,0.016090,0.002546,0.025487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0.484349,0.446974,0.527781,0.617091,0.523250,42.0,20.0,11.0,11.0,4665.0,...,95,2893,5404,6892,StrongSort,5,0.005977,0.057797,0.001546,0.031318
66,0.484349,0.446974,0.527781,0.617091,0.523250,42.0,20.0,11.0,11.0,4665.0,...,95,2893,5404,6892,StrongSort,6,0.006411,0.059437,0.001374,0.031027
67,0.484349,0.446974,0.527781,0.617091,0.523250,42.0,20.0,11.0,11.0,4665.0,...,95,2893,5404,6892,StrongSort,7,0.006534,0.057945,0.001363,0.028830
68,0.484349,0.446974,0.527781,0.617091,0.523250,42.0,20.0,11.0,11.0,4665.0,...,95,2893,5404,6892,StrongSort,8,0.006601,0.058933,0.001225,0.032087


In [8]:
import time

want_to_save = str(input("Do you want to save the results to a CSV file? (yes/no): ")).strip().lower()
if want_to_save == 'yes':
    output_file = os.path.join(EVALUATE_DIR, f"evaluation_results_{FOLDER_NAME}_{time.strftime('%Y%m%d_%H%M%S')}.csv")
    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")
else:
    print("Results not saved.")

Results saved to c:\Users\sschw\schwynn\Work\Teacher-Supervised\traffic\benchmark\data\evaluate\evaluation_results_20251230-164234_20251230_174840.csv
