# Player Tracking System Comparison

This notebook compares 3 different player tracking systems:
- Eagle
- Darkmyter (using Ultralytics YOLO)
- Ultralytics YOLO 11 + Botsort

**Important**: Run cells in order from top to bottom!

In [None]:
# Cell 1: Setup directories and utilities

from pathlib import Path
import os

BASE_DIR = Path("/content")
REPOS_DIR = BASE_DIR / "repositories"
VIDEOS_DIR = BASE_DIR / "videos"
CLIPS_DIR = BASE_DIR / "clips"
OUTPUT_DIR = BASE_DIR / "output"

for d in [REPOS_DIR, VIDEOS_DIR, CLIPS_DIR, OUTPUT_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def print_status(msg, status="INFO"):
    "Print colored status messages"
    colors = {
        "INFO": "\033[94m",
        "SUCCESS": "\033[92m",
        "WARNING": "\033[93m",
        "ERROR": "\033[91m",
        "RESET": "\033[0m"
    }
    print(f"{colors.get(status, '')}[{status}] {msg}{colors['RESET']}")

print_status("Directory structure created", "SUCCESS")
print(f"Working directory: {BASE_DIR}")

In [None]:
# Cell 2: Clone all repositories

import subprocess

REPOSITORIES = {
    "eagle": "https://github.com/nreHieW/Eagle.git",
    "darkmyter": "https://github.com/Darkmyter/Football-Players-Tracking.git",
}

print_status("Cloning repositories...", "INFO")

for name, url in REPOSITORIES.items():
    repo_path = REPOS_DIR / name

    if repo_path.exists():
        print_status(f"{name}: Already exists, skipping", "WARNING")
        continue

    try:
        print_status(f"{name}: Cloning...", "INFO")
        result = subprocess.run(
            ["git", "clone", url, str(repo_path)],
            capture_output=True,
            text=True,
            timeout=300
        )

        if result.returncode == 0:
            print_status(f"{name}: Cloned successfully", "SUCCESS")
        else:
            print_status(f"{name}: Clone failed - {result.stderr[:100]}", "ERROR")

    except Exception as e:
        print_status(f"{name}: Clone failed - {str(e)}", "ERROR")

print_status("Repository cloning complete", "SUCCESS")

In [None]:
# Cell 3: Install dependencies

print_status("Installing dependencies...", "INFO")

!pip install -q torch torchvision torchaudio tracklab
!pip install -q opencv-python numpy scipy pandas scikit-learn matplotlib
!pip install -q ultralytics supervision
!pip install -q gdown Pillow tqdm requests
!pip install -q \
    loguru cython cython_bbox lap onemetric scikit-image tabulate tqdm numpy torch torchvision opencv-python pyyaml yolox
!pip install -q loguru
!pip install onemetric #THIS CELL IS IMPORTANT
!pip install psutil


print_status("Dependencies installed", "SUCCESS")

In [None]:
from ultralytics import YOLO
model = YOLO("yolo11m.pt")
print("Loaded weights from:", getattr(model, "ckpt_path", "unknown path"))

In [None]:
# Cell 4: Download videos from Google Drive

!pip install -q gdown

import gdown
from pathlib import Path

# Shared folder ID
FOLDER_ID = "1Cs4kTX6GYwfcpKyDZdqRKBezz49wT7_N"

print_status("Downloading videos from shared folder...", "INFO")

try:
    gdown.download_folder(
        id=FOLDER_ID,
        output=str(VIDEOS_DIR),
        quiet=False,
        use_cookies=False
    )

    # List downloaded videos
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.MP4', '.AVI', '.MOV', '.MKV']
    available_videos = []

    for ext in video_extensions:
        available_videos.extend(list(VIDEOS_DIR.glob(f"*{ext}")))

    if not available_videos:
        print_status("No video files found", "ERROR")
    else:
        print(f"\nDOWNLOADED {len(available_videos)} VIDEO(S)")
        print("="*50)

        for idx, video in enumerate(available_videos, 1):
            size_mb = video.stat().st_size / (1024 * 1024)
            print(f"{idx}. {video.name} ({size_mb:.1f} MB)")

        print("\n")
        print("VIDEO SELECTION")

        # Ask for number of videos
        print("\nHow many videos do you want to evaluate?")
        print(f"  - Enter a number between 1 and {len(available_videos)}")
        print(f"  - Enter 'all' or leave blank to process ALL {len(available_videos)} videos")

        num_selection = input("\nNumber of videos: ").strip().lower()

        VIDEO_PATHS = []

        if not num_selection or num_selection == 'all':
            VIDEO_PATHS = available_videos
            print_status(f"Selected ALL {len(VIDEO_PATHS)} videos", "SUCCESS")
        elif num_selection.isdigit():
            num_videos = int(num_selection)
            if 1 <= num_videos <= len(available_videos):
                if num_videos == len(available_videos):
                    VIDEO_PATHS = available_videos
                else:
                    print(f"\nSelect {num_videos} video(s) from the list above:")
                    print("  - Enter comma-separated numbers (e.g., '1,3,5')")
                    print(f"  - Or enter 'first' to select the first {num_videos} videos")

                    video_selection = input("\nYour selection: ").strip().lower()

                    if video_selection == 'first':
                        VIDEO_PATHS = available_videos[:num_videos]
                    else:
                        try:
                            indices = [int(x.strip()) for x in video_selection.split(',')]
                            if len(indices) != num_videos:
                                print_status(f"Warning: Selected {len(indices)} videos instead of {num_videos}", "WARNING")
                            for idx in indices[:num_videos]:
                                if 1 <= idx <= len(available_videos):
                                    VIDEO_PATHS.append(available_videos[idx - 1])
                        except ValueError:
                            print_status("Invalid input, selecting first videos", "WARNING")
                            VIDEO_PATHS = available_videos[:num_videos]

                print_status(f"Selected {len(VIDEO_PATHS)} video(s)", "SUCCESS")
                for video in VIDEO_PATHS:
                    print(f"  - {video.name}")
            else:
                print_status(f"Invalid number. Must be between 1 and {len(available_videos)}", "ERROR")
        else:
            print_status("Invalid input", "ERROR")

        if not VIDEO_PATHS:
            print_status("No videos selected", "ERROR")

except Exception as e:
    print_status(f"Download failed: {str(e)}", "ERROR")
    print("\nNote: Make sure the folder is set to 'Anyone with the link can view'")

In [None]:
# Cell 5: Prepare videos and clips

import cv2
import subprocess

CLIP_DURATION = 60

# Prepare both full videos and clips
FULL_VIDEOS = {}
VIDEO_CLIPS = {}

for VIDEO_PATH in VIDEO_PATHS:
    VIDEO_NAME = VIDEO_PATH.stem

    print(f"\nPREPARING: {VIDEO_NAME}")

    # Store full video path
    FULL_VIDEOS[VIDEO_NAME] = {"full": VIDEO_PATH}

    # Get video info for clip extraction
    cap = cv2.VideoCapture(str(VIDEO_PATH))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    cap.release()

    print(f"Duration: {duration:.1f}s | FPS: {fps:.1f} | Frames: {total_frames}")

    # Determine clip positions
    if duration < CLIP_DURATION * 3:
        if duration < CLIP_DURATION:
            CLIPS = [(0, duration, "full")]
            print_status(f"Video shorter than {CLIP_DURATION}s, will use full video", "INFO")
        else:
            CLIPS = [
                (0, CLIP_DURATION, "start"),
                (max(duration - CLIP_DURATION, 0), CLIP_DURATION, "end")
            ]
            print_status("Will extract start and end clips", "INFO")
    else:
        CLIPS = [
            (0, CLIP_DURATION, "start"),
            ((duration - CLIP_DURATION) / 2, CLIP_DURATION, "middle"),
            (duration - CLIP_DURATION, CLIP_DURATION, "end")
        ]
        print_status("Will extract start, middle, and end clips", "INFO")

    # Extract clips
    CLIP_PATHS = {}
    for start_time, clip_dur, position in CLIPS:
        clip_name = f"{VIDEO_NAME}_{position}.mp4"
        clip_path = CLIPS_DIR / clip_name

        cmd = [
            "ffmpeg", "-i", str(VIDEO_PATH),
            "-ss", str(start_time),
            "-t", str(clip_dur),
            "-c", "copy",
            str(clip_path),
            "-y",
            "-loglevel", "error"
        ]

        result = subprocess.run(cmd, capture_output=True)

        if result.returncode == 0 and clip_path.exists():
            CLIP_PATHS[position] = clip_path
            size_mb = clip_path.stat().st_size / (1024 * 1024)
            print_status(f"Clip '{position}' ready ({size_mb:.1f} MB)", "SUCCESS")
        else:
            print_status(f"Failed to extract '{position}' clip", "ERROR")

    VIDEO_CLIPS[VIDEO_NAME] = CLIP_PATHS

print("\n")
print("PREPARATION COMPLETE")
print(f"Prepared {len(VIDEO_PATHS)} video(s) with both full and clip options")

In [None]:
# Cell: Setup Darkmyter (Original ByteTrack + YOLO - Authentic Implementation)

print_status("Setting up Darkmyter tracking...", "INFO")

import os
import subprocess
from pathlib import Path

darkmyter_dir = REPOS_DIR / "darkmyter"

# Clone original ByteTrack repo (required for authentic Darkmyter)
bytetrack_dir = darkmyter_dir / "ByteTrack"
if not bytetrack_dir.exists():
    print_status("Cloning original ByteTrack repository...", "INFO")
    subprocess.run([
        "git", "clone", "--depth", "1",
        "https://github.com/ifzhang/ByteTrack.git",
        str(bytetrack_dir)
    ], check=True)

# Install ByteTrack dependencies
!pip install -q cython lap cython_bbox

# Download football-specific weights
weights_dir = darkmyter_dir / "yolov8-weights"
weights_dir.mkdir(parents=True, exist_ok=True)

custom_weights = weights_dir / "yolov8l-football-players.pt"
gdrive_id = "12dWRBsegmyGE3feTdy9LBf1eZ-hTZ9Sx"

def download_darkmyter_weights():
    print_status("Downloading Darkmyter football weights...", "INFO")
    try:
        import gdown
        url = f"https://drive.google.com/uc?id={gdrive_id}"
        gdown.download(url, str(custom_weights), quiet=False)
        print_status("Darkmyter weights downloaded", "SUCCESS")
    except Exception as e:
        print_status(f"Failed to download weights: {e}", "ERROR")

if custom_weights.exists():
    try:
        with open(custom_weights, "rb") as f:
            header = f.read(16)
        if header.startswith(b"<"):
            print_status("Weights file is HTML, re-downloading...", "ERROR")
            custom_weights.unlink(missing_ok=True)
            download_darkmyter_weights()
        else:
            print_status("Darkmyter weights already present", "SUCCESS")
    except Exception:
        custom_weights.unlink(missing_ok=True)
        download_darkmyter_weights()
else:
    download_darkmyter_weights()

# Create Darkmyter wrapper
darkmyter_wrapper = darkmyter_dir / "run_darkmyter.py"
darkmyter_wrapper.write_text('''
#!/usr/bin/env python
"""
Darkmyter: YOLOv8 + original ByteTrack (football, notebook-authentic).

This script is a CLI version of the Roboflow "track players with ByteTrack + YOLOv8"
notebook, adapted to output JSON instead of an annotated video.

It:
- Uses yolov8l-football-players.pt if present, else falls back to yolov8x.pt
- Uses original ifzhang/ByteTrack
- Uses football-specific BYTETrackerArgs
- Uses the same format_predictions + match_detections_with_tracks pattern
"""

import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path

import cv2
import numpy as np

try:
    from ultralytics import YOLO
    import torch
except ImportError:
    print("Error: ultralytics or torch not installed", file=sys.stderr)
    sys.exit(1)

# Original ByteTrack imports
BYTETRACK_PATH = Path(__file__).resolve().parent / "ByteTrack"
sys.path.insert(0, str(BYTETRACK_PATH))

try:
    from yolox.tracker.byte_tracker import BYTETracker, STrack
except ImportError:
    print("Error: ByteTrack repo not found; expected at ./ByteTrack", file=sys.stderr)
    sys.exit(1)

try:
    from onemetric.cv.utils.iou import box_iou_batch
except ImportError:
    print("Error: onemetric not installed (needed for IoU). "
          "Install with: pip install onemetric", file=sys.stderr)
    sys.exit(1)
try:
    from yolox.tracker.byte_tracker import BYTETracker, STrack
except ImportError as e:
    import traceback
    print("Error importing ByteTrack from ./ByteTrack:", e, file=sys.stderr)
    traceback.print_exc()
    sys.exit(1)


#BYTETrackerArgs: football-specific params (from notebook)
@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.25
    track_buffer: int = 30
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False


# Same mapping as in the notebook
IND_TO_CLS = {
    0: "ball",
    1: "goalkeeper",
    2: "player",
    3: "referee",
}


def format_predictions(predictions, with_conf: bool = True) -> np.ndarray:
    """
    Format YOLO detections to ByteTrack format: (x1, y1, x2, y2, conf).

    This mirrors the notebook's function exactly:
        bbox = pred.boxes.xyxy.int().tolist()[0]
        conf = pred.boxes.conf.item()
    """
    frame_detections = []
    for pred in predictions:
        # pred is a ultralytics Results object with a single box
        bbox = pred.boxes.xyxy.int().tolist()[0]  # [x1, y1, x2, y2]
        conf = float(pred.boxes.conf.item())
        if with_conf:
            detection = bbox + [conf]
        else:
            detection = bbox
        frame_detections.append(detection)

    if not frame_detections:
        # shape must be (0, 5) or (0, 4) depending on with_conf
        return np.zeros((0, 5 if with_conf else 4), dtype=float)

    return np.array(frame_detections, dtype=float)


def match_detections_with_tracks(detections, tracks):
    """
    Notebook-authentic matching:

    - Build detections_bboxes using format_predictions(with_conf=False)
    - Build tracks_bboxes from track.tlbr
    - Compute IoU matrix with box_iou_batch
    - For each track, assign its track_id to the best IoU detection if IoU != 0
    """
    if not detections or not tracks:
        return detections

    detections_bboxes = format_predictions(detections, with_conf=False)
    tracks_bboxes = np.array([track.tlbr for track in tracks], dtype=float)

    iou = box_iou_batch(tracks_bboxes, detections_bboxes)  # [num_tracks, num_dets]
    track2detection = np.argmax(iou, axis=1)

    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            detections[detection_index].tracker_id = tracks[tracker_index].track_id

    return detections


def main():
    parser = argparse.ArgumentParser(
        description="Darkmyter: YOLOv8 + original ByteTrack (football)"
    )
    parser.add_argument("--video", required=True, help="Path to input video")
    parser.add_argument("--output", required=True, help="Path to output JSON file")

    args = parser.parse_args()
    video_path = Path(args.video)
    output_path = Path(args.output)

    if not video_path.exists():
        print(f"Error: video not found: {video_path}", file=sys.stderr)
        sys.exit(1)

    # Load YOLO model (football weights if available)
    repo_root = Path(__file__).resolve().parent
    custom_weights = repo_root / "yolov8-weights" / "yolov8l-football-players.pt"

    if custom_weights.exists():
        print(f"[Darkmyter] Using football-specific weights: {custom_weights}", file=sys.stderr)
        model = YOLO(str(custom_weights))
        model_name = "yolov8l-football"
        football_specific = True
    else:
        print("[Darkmyter] Football weights not found, using yolov8x.pt", file=sys.stderr)
        model = YOLO("yolov8x.pt")
        model_name = "yolov8x"
        football_specific = False

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"[Darkmyter] Device: {device}", file=sys.stderr)

    # Open video
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print(f"Error: cannot open video: {video_path}", file=sys.stderr)
        sys.exit(1)

    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps is None or fps <= 0:
        fps = 30.0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if cap.get(cv2.CAP_PROP_FRAME_COUNT) > 0 else -1

    print(f"[Darkmyter] FPS={fps:.1f}, total_frames={total_frames}", file=sys.stderr)

    # Initialize ByteTrack (with proper frame_rate)
    tracker = BYTETracker(BYTETrackerArgs(), frame_rate=int(round(fps)))

    detections_json = []
    total_tracks = set()
    frame_idx = 0


    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Notebook: detections = yolo_model(frame, verbose=0)[0]
        results = model(frame, verbose=False)[0]

        # "detections" in the notebook is iterable; each element is a single-box Results
        detections_with_tracker = []
        for detection in results:
            detection.tracker_id = ""  # will be filled in after tracking
            detections_with_tracker.append(detection)

        if detections_with_tracker:
            # get trackers with ByteTrack
            bt_input = format_predictions(detections_with_tracker, with_conf=True)

            tracks = tracker.update(
                output_results=bt_input,
                img_info=frame.shape,
                img_size=frame.shape,
            )

            # set tracker_id in yolo detections
            detections_with_tracker = match_detections_with_tracks(
                detections_with_tracker,
                tracks,
            )

            # Convert to JSON rows
            for det in detections_with_tracker:
                if det.tracker_id == "":
                    continue

                # Single box per det
                bbox = det.boxes.xyxy.tolist()[0]
                x1, y1, x2, y2 = map(float, bbox)
                conf = float(det.boxes.conf.item())
                cls_idx = int(det.boxes.cls.item()) if det.boxes.cls is not None else 0

                detections_json.append(
                    {
                        "frame_id": int(frame_idx),
                        "track_id": int(det.tracker_id),
                        "bbox": [x1, y1, x2, y2],
                        "score": conf,
                        "class_id": cls_idx,
                        "class_name": IND_TO_CLS.get(cls_idx, "unknown"),
                    }
                )
                total_tracks.add(int(det.tracker_id))

        if frame_idx % 100 == 0:
            if total_frames > 0:
                pct = 100.0 * frame_idx / total_frames
                print(f"[Darkmyter] Processed {frame_idx}/{total_frames} frames ({pct:.1f}%)",
                      file=sys.stderr)
            else:
                print(f"[Darkmyter] Processed {frame_idx} frames...", file=sys.stderr)

        frame_idx += 1

    cap.release()

    stats = {
        "total_tracks": len(total_tracks),
        "frames_processed": frame_idx,
    }

    full_output = {
        "framework": "Darkmyter",
        "model": model_name,
        "tracker": "ByteTrack (ifzhang/ByteTrack)",
        "tracker_params": {
            "track_thresh": BYTETrackerArgs.track_thresh,
            "track_buffer": BYTETrackerArgs.track_buffer,
            "match_thresh": BYTETrackerArgs.match_thresh,
            "aspect_ratio_thresh": BYTETrackerArgs.aspect_ratio_thresh,
            "min_box_area": BYTETrackerArgs.min_box_area,
        },
        "features": {
            "football_specific": football_specific,
            "original_bytetrack": True,
        },
        "detections": detections_json,
        "statistics": stats,
    }

    output_path.parent.mkdir(parents=True, exist_ok=True)
    with output_path.open("w") as f:
        json.dump(full_output, f, indent=2)

    print(
        f"[Darkmyter] Complete: {len(detections_json)} detections, "
        f"{stats['total_tracks']} tracks. Output saved to: {output_path}",
        file=sys.stderr,
    )


if __name__ == "__main__":
    main()
''')

darkmyter_wrapper.chmod(0o755)
print_status("Darkmyter wrapper created (original ByteTrack)", "SUCCESS")

In [None]:
# Patch ByteTrack for NumPy 1.24+ compatibility
import os

bytetrack_path = REPOS_DIR / "darkmyter" / "ByteTrack"

# Files that typically have np.float issues
for root, dirs, files in os.walk(bytetrack_path):
    for file in files:
        if file.endswith('.py'):
            filepath = os.path.join(root, file)
            try:
                with open(filepath, 'r') as f:
                    content = f.read()

                # Replace deprecated numpy aliases
                new_content = content
                new_content = new_content.replace('np.float,', 'float,')
                new_content = new_content.replace('np.float)', 'float)')
                new_content = new_content.replace('np.float]', 'float]')
                new_content = new_content.replace('np.int,', 'int,')
                new_content = new_content.replace('np.int)', 'int)')
                new_content = new_content.replace('np.int]', 'int]')

                if new_content != content:
                    with open(filepath, 'w') as f:
                        f.write(new_content)
                    print(f"Patched: {filepath}")
            except Exception as e:
                pass

print("ByteTrack patched for NumPy compatibility")

In [None]:
import textwrap
from pathlib import Path
import yaml
import ultralytics

REPOS_DIR = Path("/content/repositories")
ultra_dir = REPOS_DIR / "ultra_trackers"
ultra_dir.mkdir(parents=True, exist_ok=True)


# 1) Choose custom configs for bytetrack + botsort
runner_script = ultra_dir / "run_ultra_yolo_tracker.py"
runner_script.write_text(textwrap.dedent("""\
    #!/usr/bin/env python
    \"\"\"Run Ultralytics YOLO (v5 / v8 / v11 weights) with a chosen tracker and dump JSON tracks.

    Usage:
      python run_ultra_yolo_tracker.py \\
          --video input.mp4 \\
          --output output.json \\
          --weights yolo11m.pt \\
          --tracker botsort
    \"\"\"

    import argparse
    import json
    from pathlib import Path
    from ultralytics import YOLO
    import yaml

    def main():
        parser = argparse.ArgumentParser(description="YOLO + tracker to JSON")
        parser.add_argument("--video", required=True, help="Path to input video")
        parser.add_argument("--output", required=True, help="Path to output JSON")
        parser.add_argument(
            "--weights",
            default="yolo11m.pt",
            help="YOLO weights (e.g., yolov5s.pt, yolov8n.pt, yolo11m.pt, ...)",
        )
        parser.add_argument(
            "--tracker",
            default="botsort",
            choices=["botsort", "bytetrack", "deepsort"],
            help="Which tracker to use",
        )
        parser.add_argument("--conf", type=float, default=0.3,
                    help="Confidence threshold (detector)")
        parser.add_argument("--iou", type=float, default=0.4,
                    help="IOU threshold for NMS (lower = keep more boxes)")
        parser.add_argument("--imgsz", type=int, default=1280,
                    help="Image size for inference")
        parser.add_argument("--max-det", type=int, default=300,
                    help="Maximum detections per image")

        args = parser.parse_args()

        video_path = Path(args.video)
        out_path = Path(args.output)

        if not video_path.exists():
            raise SystemExit(f"Video not found: {video_path}")

        # Load YOLO model
        model = YOLO(args.weights)

        # Get the script's directory for saving custom configs
        ultra_root = Path(__file__).resolve().parent

        # Try to load Ultralytics default configs first
        import ultralytics
        ultra_path = Path(ultralytics.__file__).parent
        tracker_base_path = ultra_path / "cfg" / "trackers"

        # Select the default config path
        if args.tracker == "bytetrack":
            default_cfg_path = tracker_base_path / "bytetrack.yaml"
        elif args.tracker == "botsort":
            default_cfg_path = tracker_base_path / "botsort.yaml"
        else:  # deepsort
            default_cfg_path = tracker_base_path / "deepsort.yaml"

        # Path for our custom config
        custom_cfg_path = ultra_root / f"{args.tracker}_football.yaml"

        # Load and modify the config
        if default_cfg_path.exists():
            # Load the default config
            with open(default_cfg_path, 'r') as f:
                tracker_cfg = yaml.safe_load(f)

            # Modify with football-optimized values from tracklab

            if args.tracker == "botsort":
                tracker_cfg.update({
                    "track_high_thresh": 0.33824964456239337,
                    "new_track_thresh": 0.21144301345190655,
                    "track_buffer": 60,
                    "match_thresh": 0.22734550911325851,
                    "proximity_thresh": 0.5945380911899254,
                    "appearance_thresh": 0.4818211117541298,
                    "cmc_method": "sparseOptFlow",
                    "frame_rate": 30,
                    "lambda_": 0.9896143462366406,
                    "conf_thres": 0.3501265956918775,
                    "with_reid": True
                })

            # Save the modified config to a file
            with open(custom_cfg_path, 'w') as f:
                yaml.dump(tracker_cfg, f)

            # Use the custom config FILE PATH (not the dictionary!)
            tracker_cfg_path = str(custom_cfg_path)
        else:
            # Fallback: just use the default tracker name
            print(f"Warning: Could not find default config at {default_cfg_path}")
            print(f"Using default tracker: {args.tracker}.yaml")
            tracker_cfg_path = f"{args.tracker}.yaml"

        # Run tracking with the config FILE PATH
        results = model.track(
            source=str(video_path),
            tracker=tracker_cfg_path,  # Pass the FILE PATH, not dictionary!
            conf=args.conf,
            iou=args.iou,
            imgsz=args.imgsz,
            max_det=args.max_det,
            stream=True,
            device=0,
            save=False,
            verbose=False,
            persist=True,
            vid_stride=1,
        )

        print(f"Tracking with {args.tracker} on device: {model.device}")

        all_detections = []
        frame_idx = 0

        for r in results:
            boxes = r.boxes
            if boxes is None:
                frame_idx += 1
                continue

            ids = boxes.id
            if ids is None:
                frame_idx += 1
                continue

            xyxy = boxes.xyxy
            confs = boxes.conf
            clses = boxes.cls

            ids = ids.cpu().tolist()
            xyxy = xyxy.cpu().tolist()
            confs = confs.cpu().tolist()
            clses = clses.cpu().tolist()

            for tid, (x1, y1, x2, y2), score, c in zip(ids, xyxy, confs, clses):
                all_detections.append({
                    "frame_id": frame_idx,
                    "track_id": int(tid),
                    "bbox": [float(x1), float(y1), float(x2), float(y2)],
                    "score": float(score),
                    "class_id": int(c),
                })

            frame_idx += 1

        out_path.parent.mkdir(parents=True, exist_ok=True)
        with out_path.open("w") as f:
            json.dump(all_detections, f)

        print(f"Wrote {len(all_detections)} tracked detections to {out_path}")


    if __name__ == "__main__":
        main()
    """))

runner_script.chmod(0o755)
print_status("Created wrapper for Botsort and Bytetrack", "SUCCESS")


In [None]:

# Cell: Setup Eagle with Python 3.13

print_status("Setting up Eagle with Python 3.13...", "INFO")

eagle_dir = REPOS_DIR / "eagle"

# Install Python 3.13 (Eagle's required version)
print_status("Installing Python 3.13...", "INFO")
!apt-get update -qq
!apt-get install -y software-properties-common
!add-apt-repository -y ppa:deadsnakes/ppa
!apt-get update -qq
!apt-get install -y python3.13 python3.13-venv python3.13-dev python3.13-distutils

# Install pip for Python 3.13
!curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13

# Install uv if not already installed
print_status("Installing uv...", "INFO")
!curl -LsSf https://astral.sh/uv/install.sh | sh

# Add uv to PATH
import os
os.environ['PATH'] = f"/root/.local/bin:{os.environ['PATH']}"

# Create Eagle environment with Python 3.13
os.chdir(eagle_dir)
print_status("Creating Eagle environment with Python 3.13...", "INFO")
!uv venv --python python3.13
!uv sync

# Download model weights
print_status("Downloading Eagle model weights...", "INFO")
models_dir = eagle_dir / "eagle" / "models"
if models_dir.exists():
    os.chdir(models_dir)
    !bash get_weights.sh
    os.chdir(eagle_dir)
    print_status("Eagle weights downloaded", "SUCCESS")
else:
    print_status("Eagle models directory not found", "ERROR")

# Create Eagle wrapper that uses Python 3.13

eagle_wrapper = eagle_dir / "run_eagle.py"
eagle_wrapper.write_text('''
#!/usr/bin/env python3
"""
Clean Eagle wrapper that produces a single output file
Outputs Eagle's native raw format directly
"""

import argparse
import json
import subprocess
import sys
import os
from pathlib import Path
import time


def get_eagle_output(eagle_output_dir):
    """
    Find and return Eagle's raw output data

    Args:
        eagle_output_dir: Path to Eagle's output directory

    Returns:
        Raw Eagle data dictionary
    """
    coords_dir = eagle_output_dir / "raw_coordinates"
    if not coords_dir.exists():
        coords_dir = eagle_output_dir

    raw_coords_file = coords_dir / "raw_coordinates.json"
    raw_data_file = coords_dir / "raw_data.json"
    processed_file = coords_dir / "processed_data.json"

    data = None
    source_file = None

    for file_path in [raw_coords_file, processed_file, raw_data_file]:
        if file_path.exists():
            with open(file_path, 'r') as f:
                data = json.load(f)
            source_file = file_path
            print(f"[Eagle] Using {file_path.name} as source", file=sys.stderr)
            break

    if data is None:
        json_files = list(coords_dir.glob("*.json"))
        if json_files:
            with open(json_files[0], 'r') as f:
                data = json.load(f)
            source_file = json_files[0]
            print(f"[Eagle] Using {json_files[0].name} as source", file=sys.stderr)

    if data is None:
        print(f"[Eagle] No output files found in {coords_dir}", file=sys.stderr)
        raise FileNotFoundError(f"No Eagle JSON outputs found in {coords_dir}")

    return data


def main():
    parser = argparse.ArgumentParser(description='Eagle wrapper for unified output')
    parser.add_argument('--video', required=True, help='Path to input video')
    parser.add_argument('--output', required=True, help='Path to output JSON file')
    parser.add_argument('--fps', default=24, type=int, help='FPS to process (default: 24)')
    args = parser.parse_args()

    video_path = Path(args.video)
    output_path = Path(args.output)

    if not video_path.exists():
        print(f"Error: Video not found: {video_path}", file=sys.stderr)
        sys.exit(1)

    output_path.parent.mkdir(parents=True, exist_ok=True)

    env = os.environ.copy()
    env["CUDA_VISIBLE_DEVICES"] = "0"

    cmd = [
        "uv", "run", "--python", "python3.13",
        "main.py",
        "--video_path", str(video_path),
        "--fps", str(args.fps),
    ]

    print(f"[Eagle] Processing {video_path.name} at {args.fps} FPS...", file=sys.stderr)
    start = time.time()

    eagle_dir = Path(__file__).parent
    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        cwd=eagle_dir,
        env=env,
    )

    elapsed = time.time() - start
    print(f"[Eagle] Processing took {elapsed:.1f}s", file=sys.stderr)

    if result.returncode != 0:
        print(f"[Eagle] Warning: Process returned {result.returncode}", file=sys.stderr)
        if result.stderr:
            print(f"[Eagle] Stderr: {result.stderr[:500]}", file=sys.stderr)

    video_stem = video_path.stem
    eagle_output_base = eagle_dir / "output"
    eagle_output_dir = eagle_output_base / video_stem

    if not eagle_output_dir.exists():
        for d in eagle_output_base.iterdir():
            if d.is_dir() and video_stem in d.name:
                eagle_output_dir = d
                break

    if not eagle_output_dir.exists():
        print(f"[Eagle] Error: Could not find output directory for {video_stem}", file=sys.stderr)
        with open(output_path, 'w') as f:
            json.dump([], f)
        sys.exit(1)

    try:
        print(f"[Eagle] Consolidating output from {eagle_output_dir}", file=sys.stderr)
        raw_data = get_eagle_output(eagle_output_dir)
    except Exception as e:
        print(f"[Eagle] Error while getting Eagle output: {e}", file=sys.stderr)
        with open(output_path, "w") as f:
            json.dump([], f)
        sys.exit(1)

    with open(output_path, 'w') as f:
        json.dump(raw_data, f, indent=2)

    if isinstance(raw_data, dict):
        print(f"[Eagle] Output: {len(raw_data)} frames (raw format)", file=sys.stderr)
    elif isinstance(raw_data, list):
        print(f"[Eagle] Output: {len(raw_data)} frames", file=sys.stderr)

    print(f"[Eagle] Saved to: {output_path}", file=sys.stderr)
    sys.exit(0)


if __name__ == "__main__":
    main()
''')

eagle_wrapper.chmod(0o755)
print_status("Eagle  wrapper created", "SUCCESS")

In [None]:
from google.colab import drive
from pathlib import Path

drive.mount("/content/drive")

# Base folder on Drive where all results will go
BASE_DIR = Path("/content/drive/MyDrive/tracklab_eval")  # change name if you like
OUTPUT_DIR = BASE_DIR / "results"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("Results will be saved under:", OUTPUT_DIR)


In [None]:
# Cell: Final System Evaluation

import time
import json
import subprocess
import os
import psutil
import threading
from pathlib import Path

# System configurations
SYSTEM_CONFIGS = {
    "eagle": {
        "path": REPOS_DIR / "eagle",
        "script": "run_eagle.py",
        "python": "python3.13",
    },
    "yolo11_botsort": {
        "path": REPOS_DIR / "ultra_trackers",
        "script": "run_ultra_yolo_tracker.py",
        "args": ["--weights", "yolo11m.pt", "--tracker", "botsort"],
    },
    "darkmyter": {
        "path": REPOS_DIR / "darkmyter",
        "script": "run_darkmyter.py",
    },
}

def get_gpu_memory():
    """Get current GPU memory usage in MB using nvidia-smi"""
    try:
        result = subprocess.run(
            ['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader'],
            capture_output=True, text=True
        )
        return int(result.stdout.strip().split('\n')[0])
    except:
        return None

def monitor_memory(stop_event, memory_stats):
    "Background thread to sample memory usage"
    memory_stats['peak_ram_mb'] = 0
    memory_stats['peak_gpu_mb'] = 0

    while not stop_event.is_set():
        ram_mb = psutil.virtual_memory().used / (1024 * 1024)
        memory_stats['peak_ram_mb'] = max(memory_stats['peak_ram_mb'], ram_mb)

        gpu_mb = get_gpu_memory()
        if gpu_mb:
            memory_stats['peak_gpu_mb'] = max(memory_stats['peak_gpu_mb'], gpu_mb)

        stop_event.wait(0.5)

# Ask user for processing mode
print("EVALUATION MODE SELECTION \n")
print("\nHow do you want to evaluate the videos?")
print("  1. Use clips (faster - 60s segments)")
print("  2. Use full videos (comprehensive but slower)")

mode_choice = input("\nEnter your choice (1 or 2): ").strip()
USE_CLIPS = mode_choice != '2'

if USE_CLIPS:
    print_status("Mode: CLIP-BASED EVALUATION", "INFO")
    ALL_VIDEOS = VIDEO_CLIPS
    eval_type = "clips"
else:
    print_status("Mode: FULL VIDEO EVALUATION", "INFO")
    ALL_VIDEOS = FULL_VIDEOS
    eval_type = "full"

position_to_number = {"start": 1, "middle": 2, "end": 3, "full": 1}

def run_system_on_video(system_name, system_config, video_name, segment_name, video_path):
    """Run a tracking system on a video or clip"""

    if USE_CLIPS:
        segment_number = position_to_number.get(segment_name, 1)
        output_dir = OUTPUT_DIR / video_name / "clips" / str(segment_number) / system_name
    else:
        output_dir = OUTPUT_DIR / video_name / "full" / system_name

    output_dir.mkdir(parents=True, exist_ok=True)

    if USE_CLIPS:
        print_status(f"Running {system_name} on {video_name}/clip_{segment_number}...", "INFO")
    else:
        print_status(f"Running {system_name} on {video_name} (full video)...", "INFO")

    output_file = output_dir / f"{system_name}_output.json"
    system_path = system_config.get("path", REPOS_DIR)

    # Build command
    if system_name == "eagle":
        cmd = [
            "uv", "run", "--python", system_config.get("python", "python3.13"),
            "run_eagle.py",
            "--video", str(video_path),
            "--output", str(output_file),
        ]
    else:
        cmd = [
            "python", system_config["script"],
            "--video", str(video_path),
            "--output", str(output_file),
        ] + [str(extra) for extra in system_config.get("args", [])]

    # Get baseline memory before starting
    baseline_ram = psutil.virtual_memory().used / (1024 * 1024)
    baseline_gpu = get_gpu_memory() or 0

    # Start memory monitoring
    memory_stats = {}
    stop_event = threading.Event()
    monitor_thread = threading.Thread(target=monitor_memory, args=(stop_event, memory_stats))
    monitor_thread.start()

    start_time = time.time()

    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            cwd=str(system_path),
        )

        elapsed = time.time() - start_time

        # Stop monitoring
        stop_event.set()
        monitor_thread.join()

        # Calculate peak usage above baseline
        peak_ram_delta = max(0, memory_stats.get('peak_ram_mb', 0) - baseline_ram)
        peak_gpu_delta = max(0, memory_stats.get('peak_gpu_mb', 0) - baseline_gpu)

        if result.returncode == 0 and output_file.exists():
            try:
                with open(output_file) as f:
                    data = json.load(f)

                if isinstance(data, list):
                    num_detections = len(data)
                elif isinstance(data, dict):
                    num_detections = sum(
                        len(dets) if isinstance(dets, list) else 0
                        for dets in data.values()
                    )
                else:
                    num_detections = 0

                print_status(
                    f"{system_name}: SUCCESS - {num_detections} detections in {elapsed:.1f}s "
                    f"(RAM: +{peak_ram_delta:.0f}MB, GPU: +{peak_gpu_delta:.0f}MB)",
                    "SUCCESS",
                )
                return {
                    "success": True,
                    "time": elapsed,
                    "output": str(output_file),
                    "detections": num_detections,
                    "peak_ram_mb": peak_ram_delta,
                    "peak_gpu_mb": peak_gpu_delta,
                }

            except json.JSONDecodeError as e:
                print_status(f"{system_name}: Invalid JSON", "ERROR")
                return {
                    "success": False,
                    "time": elapsed,
                    "error": f"Invalid JSON: {e}",
                    "peak_ram_mb": peak_ram_delta,
                    "peak_gpu_mb": peak_gpu_delta,
                }
        else:
            error_msg = result.stderr[-500:] if result.stderr else "Unknown error"
            print_status(f"{system_name}: FAILED", "ERROR")
            print(f"Error: {error_msg}")
            return {
                "success": False,
                "time": elapsed,
                "error": error_msg,
                "peak_ram_mb": peak_ram_delta,
                "peak_gpu_mb": peak_gpu_delta,
            }

    except subprocess.TimeoutExpired:
        stop_event.set()
        monitor_thread.join()
        peak_ram_delta = max(0, memory_stats.get('peak_ram_mb', 0) - baseline_ram)
        peak_gpu_delta = max(0, memory_stats.get('peak_gpu_mb', 0) - baseline_gpu)
        print_status(f"{system_name}: TIMEOUT", "ERROR")
        return {
            "success": False,
            "time": 10000,
            "error": "Timeout",
            "peak_ram_mb": peak_ram_delta,
            "peak_gpu_mb": peak_gpu_delta,
        }

    except Exception as e:
        stop_event.set()
        monitor_thread.join()
        peak_ram_delta = max(0, memory_stats.get('peak_ram_mb', 0) - baseline_ram)
        peak_gpu_delta = max(0, memory_stats.get('peak_gpu_mb', 0) - baseline_gpu)
        print_status(f"{system_name}: EXCEPTION - {str(e)}", "ERROR")
        return {
            "success": False,
            "time": time.time() - start_time,
            "error": str(e),
            "peak_ram_mb": peak_ram_delta,
            "peak_gpu_mb": peak_gpu_delta,
        }

def save_progress(all_results, eval_type):
    """Save current progress to disk (in Drive, via OUTPUT_DIR)"""
    progress_file = OUTPUT_DIR / f"progress_{eval_type}.json"
    with open(progress_file, "w") as f:
        json.dump(all_results, f, indent=2)
    return progress_file

def load_progress(eval_type):
    "Load existing progress if available"
    progress_file = OUTPUT_DIR / f"progress_{eval_type}.json"
    if progress_file.exists():
        try:
            with open(progress_file) as f:
                return json.load(f)
        except Exception:
            return {}
    return {}

print(f"STARTING {eval_type.upper()} EVALUATION\n")

all_results = load_progress(eval_type)
if all_results:
    print_status(f"Loaded existing progress with {len(all_results)} videos", "INFO")
    print("Videos already processed:")
    for video_name in all_results.keys():
        print(f"  - {video_name}")

    print("\nDo you want to:")
    print("  1. Continue from where you left off")
    print("  2. Start fresh (delete existing progress)")
    continue_choice = input("\nEnter your choice (1 or 2): ").strip()

    if continue_choice == '2':
        all_results = {}
        print_status("Starting fresh evaluation", "INFO")

for video_name, segments in ALL_VIDEOS.items():
    print(f"\nVIDEO: {video_name}")

    video_results = all_results.get(video_name, {})

    for segment_name, video_path in segments.items():
        if USE_CLIPS:
            segment_number = position_to_number.get(segment_name, 1)
            segment_key = f"clip_{segment_number}"
            print(f"\nProcessing clip {segment_number} ({segment_name})...")
        else:
            segment_key = "full"
            print(f"\nProcessing full video...")

        segment_results = video_results.get(segment_key, {})
        video_results[segment_key] = segment_results

        for system_name, system_config in SYSTEM_CONFIGS.items():
            if system_name in segment_results and segment_results[system_name].get("success"):
                print_status(f"{system_name}: Already completed successfully", "SKIP")
                continue
            elif system_name in segment_results:
                print_status(f"{system_name}: Retrying previous failure", "RETRY")

            result = run_system_on_video(
                system_name, system_config, video_name, segment_name, video_path
            )
            segment_results[system_name] = result

            all_results[video_name] = video_results
            progress_file = save_progress(all_results, eval_type)
            print_status(f"Progress saved to {progress_file.name}", "SAVE")

        successful = sum(1 for r in segment_results.values() if r.get("success", False))
        total = len(segment_results)

        if USE_CLIPS:
            print(f"\nClip summary: {successful}/{total} systems succeeded")
        else:
            print(f"\nVideo summary: {successful}/{total} systems succeeded")

    summary_file = OUTPUT_DIR / video_name / f"summary_{eval_type}.json"
    summary_file.parent.mkdir(parents=True, exist_ok=True)
    with open(summary_file, "w") as f:
        json.dump(video_results, f, indent=2)
    print_status(f"Video summary saved to {summary_file.name}", "SAVE")

# Save overall summary
overall_summary = OUTPUT_DIR / f"overall_summary_{eval_type}.json"
with open(overall_summary, "w") as f:
    json.dump(all_results, f, indent=2)

print("\n" + "=" * 60)
print(f"{eval_type.upper()} EVALUATION COMPLETE")
print("=" * 60)

system_stats = {
    sys: {
        "success": 0,
        "total": 0,
        "avg_time": [],
        "avg_detections": [],
        "avg_ram": [],
        "avg_gpu": [],
    }
    for sys in SYSTEM_CONFIGS.keys()
}

for video_results in all_results.values():
    for segment_results in video_results.values():
        for system_name, result in segment_results.items():
            if system_name in system_stats:
                system_stats[system_name]["total"] += 1
                if result.get("success", False):
                    system_stats[system_name]["success"] += 1
                    if "time" in result:
                        system_stats[system_name]["avg_time"].append(result["time"])
                    if "detections" in result:
                        system_stats[system_name]["avg_detections"].append(result["detections"])
                    if "peak_ram_mb" in result:
                        system_stats[system_name]["avg_ram"].append(result["peak_ram_mb"])
                    if "peak_gpu_mb" in result:
                        system_stats[system_name]["avg_gpu"].append(result["peak_gpu_mb"])

print("\nSystem Performance Summary:")
for system_name, stats in system_stats.items():
    if stats["total"] > 0:
        success_rate = (stats["success"] / stats["total"]) * 100
        print(f"\n{system_name}:")
        print(f"  Success Rate: {stats['success']}/{stats['total']} ({success_rate:.1f}%)")
        if stats["avg_time"]:
            avg_time = sum(stats["avg_time"]) / len(stats["avg_time"])
            print(f"  Avg Time: {avg_time:.1f}s")
        if stats["avg_detections"]:
            avg_det = sum(stats["avg_detections"]) / len(stats["avg_detections"])
            print(f"  Avg Detections: {avg_det:.0f}")
        if stats["avg_ram"]:
            avg_ram = sum(stats["avg_ram"]) / len(stats["avg_ram"])
            max_ram = max(stats["avg_ram"])
            print(f"  Avg RAM: +{avg_ram:.0f}MB (peak: +{max_ram:.0f}MB)")
        if stats["avg_gpu"]:
            avg_gpu = sum(stats["avg_gpu"]) / len(stats["avg_gpu"])
            max_gpu = max(stats["avg_gpu"])
            print(f"  Avg GPU: +{avg_gpu:.0f}MB (peak: +{max_gpu:.0f}MB)")

print(f"\nResults Directory: {OUTPUT_DIR}")
print(f"Overall Summary: {overall_summary}")
print(f"Progress File: {OUTPUT_DIR / f'progress_{eval_type}.json'}")

total_expected = len(ALL_VIDEOS) * len(next(iter(ALL_VIDEOS.values())))
total_processed = sum(len(video_results) for video_results in all_results.values())

if total_processed == total_expected:
    print("\n")
    print("ALL VIDEOS PROCESSED SUCCESSFULLY!")
    print("Progress file kept for reference.")
else:
    print("\n")
    print(f"PARTIAL COMPLETION: {total_processed}/{total_expected} segments processed")
    print("Run the script again to continue from where you left off.")

In [None]:
# Cell 9: Download results

from google.colab import files
import shutil

print_status("Creating archive...", "INFO")

archive_name = "tracking_results"
archive_path = BASE_DIR / archive_name

shutil.make_archive(str(archive_path), 'zip', OUTPUT_DIR)

print_status("Downloading...", "SUCCESS")
files.download(f"{archive_path}.zip")

print_status("Complete!", "SUCCESS")

In [None]:
#Cell 10: Download pre-run results from google drive.
import os

if not os.path.exists("/content/results"):
    !gdown 1-TR0bycui1zpL5TRZLzhdxcuJIv9qLUO -O results.zip
    !unzip results.zip -d /content
    print("Using cached results")


In [None]:
from google.colab import files
import shutil
import os

OUTPUT_DIR = "/content/comparison_output"

if not os.path.exists(OUTPUT_DIR):
    print(f"Error: {OUTPUT_DIR} does not exist!")
elif not os.listdir(OUTPUT_DIR):
    print(f"Error: {OUTPUT_DIR} is empty!")
else:
    archive_path = "/content/comparison_output"
    shutil.make_archive(archive_path, 'zip', OUTPUT_DIR)

    zip_file = f"{archive_path}.zip"
    if os.path.exists(zip_file):
        print(f"Archive created: {zip_file} ({os.path.getsize(zip_file)} bytes)")
        files.download(zip_file)
    else:
        print("Error: Archive was not created!")

In [None]:
# =============================================================================
# CELL A: SHARED PARSERS AND DATA STRUCTURES
# =============================================================================
# Run this cell FIRST - it defines the parsing functions used by both
# evaluation (Cell B) and visualization (Cell C)
# =============================================================================

"""
DATA NORMALIZATION DOCUMENTATION


This module normalizes outputs from three tracking systems into a unified format
for comparison. The normalization process is MINIMALLY INVASIVE, preserving
original data wherever possible.

UNIFIED DETECTION FORMAT:
    - frame_id: int (0-indexed frame number)
    - track_id: int (unique identifier for tracked object)
    - bbox: [x1, y1, x2, y2] (top-left and bottom-right corners in pixels)
    - score: float (confidence score, 0.0-1.0)
    - class_name: str ("player", "referee", "ball", "goalkeeper")

SYSTEM-SPECIFIC NORMALIZATION:

1. DARKMYTER (YOLOv8 + ByteTrack)
   Original: {"framework": "Darkmyter", "detections": [...]}
   Changes: None - already in target format

2. EAGLE (YOLOv8 + BoT-SORT + HRNet)
   Original: Hierarchical JSON with string keys
   Changes:
     - frame_id/track_id: string -> int
     - "BBox" -> "bbox", "Confidence" -> "score"
     - Ball detections: track_id = -1

3. YOLO v11 + BoT-SORT
   Original: List of detection dicts
   Changes: class_name added based on class_id

WHAT IS NOT CHANGED:
    - Bounding box pixel coordinates
    - Confidence/score values
    - Frame IDs (0-indexing preserved)
    - Track IDs (except Ball = -1 in Eagle)
"""

import json
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, field
from collections import defaultdict

# DATA STRUCTURES

@dataclass
class Detection:
    """
    Unified detection format across all tracking systems.
    Original data preserved in 'raw' field when available.
    """
    frame_id: int
    track_id: int
    bbox: List[float]  # [x1, y1, x2, y2]
    score: float
    class_name: str = "player"
    class_id: Optional[int] = None
    raw: Optional[Dict] = field(default=None, repr=False)

    @property
    def center(self) -> Tuple[float, float]:
        return ((self.bbox[0] + self.bbox[2]) / 2,
                (self.bbox[1] + self.bbox[3]) / 2)

    @property
    def width(self) -> float:
        return self.bbox[2] - self.bbox[0]

    @property
    def height(self) -> float:
        return self.bbox[3] - self.bbox[1]

    @property
    def area(self) -> float:
        return self.width * self.height

# PARSERS

def parse_darkmyter(data: Dict, preserve_raw: bool = True) -> List[Detection]:
    """
    Parse Darkmyter (YOLOv8 + ByteTrack) output.
    No coordinate changes needed - already in target format.
    """
    detections = []

    if isinstance(data, dict):
        det_list = data.get("detections", [])
    elif isinstance(data, list):
        det_list = data
    else:
        return detections

    for det in det_list:
        if not isinstance(det, dict):
            continue
        bbox = det.get("bbox", [])
        if len(bbox) != 4:
            continue

        detections.append(Detection(
            frame_id=int(det.get("frame_id", 0)),
            track_id=int(det.get("track_id", 0)),
            bbox=[float(b) for b in bbox],
            score=float(det.get("score", 0.0)),
            class_name=det.get("class_name", "player"),
            class_id=det.get("class_id"),
            raw=det if preserve_raw else None
        ))

    return detections


def parse_eagle(data: Dict, preserve_raw: bool = True) -> List[Detection]:
    """
    Parse Eagle output.

    Eagle format:
    {
        "0": {  # frame_id as string
            "Coordinates": {
                "Player": {
                    "1": {"BBox": [x1, y1, x2, y2], "Confidence": 0.95},
                    "2": {"BBox": [...], "Confidence": ...}
                },
                "Ball": {...},
                "Referee": {...}
            }
        },
        "1": {...}
    }

    Changes:
        - frame_id/track_id: string -> int
        - "BBox" -> "bbox", "Confidence" -> "score"
        - Ball detections: track_id = -1
    """
    detections = []
    class_mapping = {
        "Player": "player",
        "Ball": "ball",
        "Referee": "referee",
        "Goalkeeper": "goalkeeper"
    }

    for frame_id_str, frame_data in data.items():
        try:
            frame_id = int(frame_id_str)
        except (ValueError, TypeError):
            continue

        if not isinstance(frame_data, dict):
            continue
        coords = frame_data.get("Coordinates", {})
        if not coords:
            continue

        for category, objects in coords.items():
            if not isinstance(objects, dict):
                continue
            class_name = class_mapping.get(category, category.lower())

            for track_id_str, obj_data in objects.items():
                if not isinstance(obj_data, dict):
                    continue
                bbox = obj_data.get("BBox", [])
                if len(bbox) != 4:
                    continue

                track_id = -1 if category == "Ball" else int(track_id_str)

                detections.append(Detection(
                    frame_id=frame_id,
                    track_id=track_id,
                    bbox=[float(b) for b in bbox],
                    score=float(obj_data.get("Confidence", 0.0)),
                    class_name=class_name,
                    class_id=None,
                    raw=obj_data if preserve_raw else None
                ))

    return detections


def parse_yolo_botsort(data: List, preserve_raw: bool = True) -> List[Detection]:
    """
    Parse YOLO v11 + BoT-SORT output.
    Adds class_name from class_id mapping.
    """
    detections = []
    class_mapping = {0: "player", 1: "goalkeeper", 2: "referee", 3: "ball"}

    if not isinstance(data, list):
        return detections

    for det in data:
        if not isinstance(det, dict):
            continue
        bbox = det.get("bbox", [])
        if len(bbox) != 4:
            continue

        class_id = det.get("class_id", 0)
        detections.append(Detection(
            frame_id=int(det.get("frame_id", 0)),
            track_id=int(det.get("track_id", 0)),
            bbox=[float(b) for b in bbox],
            score=float(det.get("score", 0.0)),
            class_name=class_mapping.get(class_id, "player"),
            class_id=class_id,
            raw=det if preserve_raw else None
        ))

    return detections


# =============================================================================
# FILE LOADING UTILITIES
# =============================================================================

def detect_format(filepath: str) -> str:
    """Auto-detect tracking output format from file contents."""
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
    except Exception:
        return "unknown"

    if isinstance(data, list):
        return "yolo_botsort"
    elif isinstance(data, dict):
        if "detections" in data or "framework" in data:
            return "darkmyter"
        # Check if keys are frame numbers (Eagle format)
        if any(k.isdigit() for k in data.keys()):
            # Verify it has the Coordinates structure
            first_val = next(iter(data.values()), None)
            if isinstance(first_val, dict) and "Coordinates" in first_val:
                return "eagle"
    return "unknown"


def load_tracking_file(filepath: str, agent_name: str = "") -> List[Detection]:
    """
    Load tracking data from file, auto-detecting format.

    Args:
        filepath: Path to JSON file
        agent_name: Optional hint for format detection

    Returns:
        List of Detection objects
    """
    fmt = detect_format(filepath)

    # Use agent name as hint if format unknown
    if fmt == "unknown" and agent_name:
        name_lower = agent_name.lower()
        if "darkmyter" in name_lower:
            fmt = "darkmyter"
        elif "eagle" in name_lower:
            fmt = "eagle"
        elif "yolo" in name_lower or "botsort" in name_lower:
            fmt = "yolo_botsort"

    with open(filepath, 'r') as f:
        data = json.load(f)

    parsers = {
        "darkmyter": parse_darkmyter,
        "eagle": parse_eagle,
        "yolo_botsort": parse_yolo_botsort
    }

    if fmt in parsers:
        return parsers[fmt](data)

    # Try each parser as fallback
    for parser in [parse_yolo_botsort, parse_darkmyter, parse_eagle]:
        try:
            dets = parser(data)
            if dets:
                return dets
        except:
            continue
    return []


def organize_detections_by_frame(detections: List[Detection]) -> Dict[int, List[Detection]]:
    """Group detections by frame_id."""
    by_frame = defaultdict(list)
    for det in detections:
        by_frame[det.frame_id].append(det)
    return dict(by_frame)


def organize_detections_by_track(detections: List[Detection],
                                  class_filter: str = "player") -> Dict[int, List[Detection]]:
    """Group detections by track_id, optionally filtering by class."""
    tracks = defaultdict(list)
    for det in detections:
        if class_filter and det.class_name != class_filter:
            continue
        tracks[det.track_id].append(det)

    # Sort each track by frame_id
    for track_id in tracks:
        tracks[track_id].sort(key=lambda d: d.frame_id)

    return dict(tracks)



print_status("Shared Parsers Loaded", "SUCCESS")


In [None]:

# EVALUATION AND METRICS

"""
Comparative evaluation of player tracking systems with multi-clip aggregation.

DIRECTORY STRUCTURE EXPECTED:
    results_dir/
        video1/
            full/
                eagle/eagle_output.json
                darkmyter/darkmyter_output.json
                yolo11_botsort/yolo11_botsort_output.json
        video2/
            full/
                ...
"""

import json
import csv
import os
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
from collections import defaultdict
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

# CONFIGURATION

# Update this path to your results directory
RESULTS_DIR = Path("/content/comp-4009-tracking-results")
OUTPUT_DIR = Path("/content/comparison_output")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

AGENTS = ["darkmyter", "eagle", "yolo11_botsort"]

# SECTION 3.1: QUANTITATIVE METRICS

class TrajectoryMetrics:
    """
    Section 3.1 - Quantitative Metrics:
        - Trajectory Smoothness (Jerk Score)
        - Speed Plausibility
        - Detection Completeness
    """

    def __init__(self, detections: List[Detection], fps: float = 30.0,
                 pixels_per_meter: float = 10.0):
        self.detections = detections
        self.fps = fps
        self.pixels_per_meter = pixels_per_meter
        self.dt = 1.0 / fps
        self.tracks = self._organize_by_track()

    def _organize_by_track(self) -> Dict[int, List[Detection]]:
        tracks = defaultdict(list)
        for det in self.detections:
            if det.class_name == "player":
                tracks[det.track_id].append(det)
        for track_id in tracks:
            tracks[track_id].sort(key=lambda d: d.frame_id)
        return dict(tracks)

    # 3.1.1 Trajectory Smoothness

    def compute_jerk_score(self, track_id: int) -> Optional[float]:
        """Compute jerk (3rd derivative of position) for a track."""
        track = self.tracks.get(track_id, [])
        if len(track) < 4:
            return None

        positions = np.array([det.center for det in track])
        frames = np.array([det.frame_id for det in track])
        jerks = []

        for i in range(len(track) - 3):
            p0, p1, p2, p3 = positions[i:i+4]
            f0, f1, f2, f3 = frames[i:i+4]

            dt01, dt12, dt23 = (f1-f0)*self.dt, (f2-f1)*self.dt, (f3-f2)*self.dt
            if dt01 == 0 or dt12 == 0 or dt23 == 0:
                continue

            v1, v2, v3 = (p1-p0)/dt01, (p2-p1)/dt12, (p3-p2)/dt23
            a1 = (v2 - v1) / ((dt01 + dt12) / 2)
            a2 = (v3 - v2) / ((dt12 + dt23) / 2)
            jerk = (a2 - a1) / ((dt01 + dt12 + dt23) / 3)
            jerks.append(np.linalg.norm(jerk))

        return np.mean(jerks) if jerks else None

    def compute_all_jerk_scores(self) -> Dict[int, float]:
        scores = {}
        for track_id in self.tracks:
            score = self.compute_jerk_score(track_id)
            if score is not None:
                scores[track_id] = score
        return scores

    def get_jerk_summary(self) -> Dict:
        scores = self.compute_all_jerk_scores()
        if not scores:
            return {"mean": None, "std": None, "min": None, "max": None, "count": 0}
        values = list(scores.values())
        return {
            "mean": float(np.mean(values)), "std": float(np.std(values)),
            "min": float(np.min(values)), "max": float(np.max(values)),
            "count": len(values)
        }

    # 3.1.2 Speed Plausibility

    def compute_speeds(self, track_id: int) -> List[float]:
        """Compute instantaneous speeds in m/s."""
        track = self.tracks.get(track_id, [])
        if len(track) < 2:
            return []

        speeds = []
        for i in range(len(track) - 1):
            p1, p2 = np.array(track[i].center), np.array(track[i+1].center)
            frame_diff = track[i+1].frame_id - track[i].frame_id
            if frame_diff <= 0:
                continue
            dist_meters = np.linalg.norm(p2 - p1) / self.pixels_per_meter
            speeds.append(dist_meters / (frame_diff * self.dt))
        return speeds

    def compute_accelerations(self, track_id: int) -> List[float]:
        speeds = self.compute_speeds(track_id)
        if len(speeds) < 2:
            return []
        return [abs(speeds[i+1] - speeds[i]) / self.dt for i in range(len(speeds) - 1)]

    def get_speed_plausibility(self, max_speed_ms: float = 11.11,
                                max_accel_ms2: float = 5.0) -> Dict:
        all_speeds, all_accels = [], []
        for track_id in self.tracks:
            all_speeds.extend(self.compute_speeds(track_id))
            all_accels.extend(self.compute_accelerations(track_id))

        if not all_speeds:
            return {
                "speed_violation_rate": None, "accel_violation_rate": None,
                "mean_speed_ms": None, "max_speed_ms": None,
                "mean_accel_ms2": None, "max_accel_ms2": None,
                "total_speed_samples": 0, "total_accel_samples": 0
            }

        return {
            "speed_violation_rate": sum(1 for s in all_speeds if s > max_speed_ms) / len(all_speeds),
            "accel_violation_rate": sum(1 for a in all_accels if a > max_accel_ms2) / len(all_accels) if all_accels else None,
            "mean_speed_ms": float(np.mean(all_speeds)),
            "max_speed_ms": float(np.max(all_speeds)),
            "mean_accel_ms2": float(np.mean(all_accels)) if all_accels else None,
            "max_accel_ms2": float(np.max(all_accels)) if all_accels else None,
            "total_speed_samples": len(all_speeds),
            "total_accel_samples": len(all_accels)
        }

    # 3.1.3 Detection Completeness

    def get_detection_completeness(self, expected_players: int = 22) -> Dict:
        frame_counts = defaultdict(int)
        for det in self.detections:
            if det.class_name == "player":
                frame_counts[det.frame_id] += 1

        if not frame_counts:
            return {
                "frames_with_20_22_players": 0, "coverage_rate": 0.0,
                "mean_players_per_frame": 0.0, "std_players_per_frame": 0.0,
                "min_players_per_frame": 0, "max_players_per_frame": 0,
                "total_frames": 0, "total_tracks": 0,
                "mean_track_length": 0.0, "fragmentation_rate": 0.0
            }

        counts = list(frame_counts.values())
        frames_in_range = sum(1 for c in counts if 20 <= c <= 22)
        track_lengths = [len(track) for track in self.tracks.values()]
        total_frames = max(frame_counts.keys()) - min(frame_counts.keys()) + 1
        short_tracks = sum(1 for l in track_lengths if l < total_frames * 0.1)

        return {
            "frames_with_20_22_players": frames_in_range,
            "coverage_rate": frames_in_range / len(frame_counts),
            "mean_players_per_frame": float(np.mean(counts)),
            "std_players_per_frame": float(np.std(counts)),
            "min_players_per_frame": int(np.min(counts)),
            "max_players_per_frame": int(np.max(counts)),
            "total_frames": len(frame_counts),
            "total_tracks": len(self.tracks),
            "mean_track_length": float(np.mean(track_lengths)) if track_lengths else 0.0,
            "fragmentation_rate": short_tracks / len(self.tracks) if self.tracks else 0.0
        }

# SECTION 3.2: INTER-SYSTEM AGREEMENT

class InterSystemAgreement:
    """
    Section 3.2 - Inter-System Agreement:
        - Position Distance
        - Bounding Box Overlap (IoU)
        - Velocity Correlation
        - Disagreement Zones
    """

    def __init__(self, system1_detections: List[Detection],
                 system2_detections: List[Detection],
                 system1_name: str = "System1",
                 system2_name: str = "System2"):
        self.sys1_dets = system1_detections
        self.sys2_dets = system2_detections
        self.sys1_name = system1_name
        self.sys2_name = system2_name

        self.sys1_by_frame = self._organize_by_frame(system1_detections)
        self.sys2_by_frame = self._organize_by_frame(system2_detections)
        self.common_frames = sorted(
            set(self.sys1_by_frame.keys()) & set(self.sys2_by_frame.keys())
        )

    def _organize_by_frame(self, detections: List[Detection]) -> Dict[int, List[Detection]]:
        by_frame = defaultdict(list)
        for det in detections:
            if det.class_name == "player":
                by_frame[det.frame_id].append(det)
        return dict(by_frame)

    @staticmethod
    def compute_iou(bbox1: List[float], bbox2: List[float]) -> float:
        x1, y1 = max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1])
        x2, y2 = min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])

        if x2 <= x1 or y2 <= y1:
            return 0.0

        intersection = (x2 - x1) * (y2 - y1)
        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
        return intersection / (area1 + area2 - intersection)

    def match_detections_hungarian(self, frame_id: int,
                                    iou_threshold: float = 0.3) -> List[Tuple[Detection, Detection, float]]:
        sys1_dets = self.sys1_by_frame.get(frame_id, [])
        sys2_dets = self.sys2_by_frame.get(frame_id, [])

        if not sys1_dets or not sys2_dets:
            return []

        matches, used_sys2 = [], set()
        for det1 in sys1_dets:
            best_iou, best_det2, best_idx = 0, None, -1
            for idx, det2 in enumerate(sys2_dets):
                if idx in used_sys2:
                    continue
                iou = self.compute_iou(det1.bbox, det2.bbox)
                if iou > best_iou and iou >= iou_threshold:
                    best_iou, best_det2, best_idx = iou, det2, idx
            if best_det2 is not None:
                matches.append((det1, best_det2, best_iou))
                used_sys2.add(best_idx)
        return matches

    # 3.2.1 Position Distance

    def get_position_distance_metrics(self) -> Dict:
        all_distances = []
        for frame_id in self.common_frames:
            for det1, det2, _ in self.match_detections_hungarian(frame_id):
                dist = np.linalg.norm(np.array(det1.center) - np.array(det2.center))
                all_distances.append(dist)

        if not all_distances:
            return {"mean_distance_px": None, "std_distance_px": None,
                    "max_distance_px": None, "median_distance_px": None, "total_matches": 0}

        return {
            "mean_distance_px": float(np.mean(all_distances)),
            "std_distance_px": float(np.std(all_distances)),
            "max_distance_px": float(np.max(all_distances)),
            "median_distance_px": float(np.median(all_distances)),
            "total_matches": len(all_distances)
        }

    # 3.2.2 IoU Metrics

    def get_iou_metrics(self, iou_threshold: float = 0.3) -> Dict:
        all_ious = []
        total_sys1, total_sys2, total_matched = 0, 0, 0

        for frame_id in self.common_frames:
            sys1_dets = self.sys1_by_frame.get(frame_id, [])
            sys2_dets = self.sys2_by_frame.get(frame_id, [])
            total_sys1 += len(sys1_dets)
            total_sys2 += len(sys2_dets)

            matches = self.match_detections_hungarian(frame_id, iou_threshold)
            total_matched += len(matches)
            all_ious.extend([iou for _, _, iou in matches])

        if not all_ious:
            return {"mean_iou": None, "std_iou": None, "min_iou": None, "max_iou": None,
                    "match_rate_sys1": 0.0, "match_rate_sys2": 0.0, "total_matched": 0,
                    "total_sys1_detections": total_sys1, "total_sys2_detections": total_sys2}

        return {
            "mean_iou": float(np.mean(all_ious)), "std_iou": float(np.std(all_ious)),
            "min_iou": float(np.min(all_ious)), "max_iou": float(np.max(all_ious)),
            "match_rate_sys1": total_matched / total_sys1 if total_sys1 > 0 else 0.0,
            "match_rate_sys2": total_matched / total_sys2 if total_sys2 > 0 else 0.0,
            "total_matched": total_matched,
            "total_sys1_detections": total_sys1, "total_sys2_detections": total_sys2
        }

    # 3.2.3 Velocity Correlation

    def compute_velocities_for_frame_pairs(self) -> Tuple[List[float], List[float]]:
        sys1_velocities, sys2_velocities = [], []

        for i in range(len(self.common_frames) - 1):
            frame1, frame2 = self.common_frames[i], self.common_frames[i + 1]
            if frame2 - frame1 != 1:
                continue

            matches1 = self.match_detections_hungarian(frame1)
            matches2 = self.match_detections_hungarian(frame2)

            sys1_pos_f1 = {det1.track_id: det1.center for det1, _, _ in matches1}
            sys1_pos_f2 = {det1.track_id: det1.center for det1, _, _ in matches2}
            sys2_pos_f1 = {det2.track_id: det2.center for _, det2, _ in matches1}
            sys2_pos_f2 = {det2.track_id: det2.center for _, det2, _ in matches2}

            for tid in set(sys1_pos_f1.keys()) & set(sys1_pos_f2.keys()):
                sys1_velocities.append(np.linalg.norm(
                    np.array(sys1_pos_f2[tid]) - np.array(sys1_pos_f1[tid])))
            for tid in set(sys2_pos_f1.keys()) & set(sys2_pos_f2.keys()):
                sys2_velocities.append(np.linalg.norm(
                    np.array(sys2_pos_f2[tid]) - np.array(sys2_pos_f1[tid])))

        return sys1_velocities, sys2_velocities

    def get_velocity_correlation(self) -> Dict:
        sys1_vels, sys2_vels = self.compute_velocities_for_frame_pairs()

        if not sys1_vels or not sys2_vels:
            return {
                f"{self.sys1_name}_mean_velocity_px": None,
                f"{self.sys2_name}_mean_velocity_px": None,
                f"{self.sys1_name}_std_velocity_px": None,
                f"{self.sys2_name}_std_velocity_px": None,
                "velocity_difference_px": None,
                "velocity_samples_sys1": 0, "velocity_samples_sys2": 0
            }

        return {
            f"{self.sys1_name}_mean_velocity_px": float(np.mean(sys1_vels)),
            f"{self.sys2_name}_mean_velocity_px": float(np.mean(sys2_vels)),
            f"{self.sys1_name}_std_velocity_px": float(np.std(sys1_vels)),
            f"{self.sys2_name}_std_velocity_px": float(np.std(sys2_vels)),
            "velocity_difference_px": abs(float(np.mean(sys1_vels)) - float(np.mean(sys2_vels))),
            "velocity_samples_sys1": len(sys1_vels), "velocity_samples_sys2": len(sys2_vels)
        }

    # 3.2.4 Disagreement Zones

    def identify_disagreement_zones(self, detection_diff_threshold: int = 3,
                                     iou_threshold: float = 0.3) -> Dict:
        disagreement_frames, detection_count_diffs = [], []
        unmatched_count = 0

        for frame_id in self.common_frames:
            sys1_dets = self.sys1_by_frame.get(frame_id, [])
            sys2_dets = self.sys2_by_frame.get(frame_id, [])
            count_diff = abs(len(sys1_dets) - len(sys2_dets))
            detection_count_diffs.append(count_diff)

            matches = self.match_detections_hungarian(frame_id, iou_threshold)
            max_dets = max(len(sys1_dets), len(sys2_dets), 1)
            match_rate = len(matches) / max_dets
            unmatched_count += (len(sys1_dets) + len(sys2_dets) - 2 * len(matches))

            if count_diff > detection_diff_threshold or match_rate < 0.5:
                disagreement_frames.append({
                    "frame_id": frame_id,
                    f"{self.sys1_name}_count": len(sys1_dets),
                    f"{self.sys2_name}_count": len(sys2_dets),
                    "matched": len(matches), "match_rate": match_rate
                })

        return {
            "total_disagreement_frames": len(disagreement_frames),
            "disagreement_rate": len(disagreement_frames) / len(self.common_frames) if self.common_frames else 0.0,
            "mean_detection_count_diff": float(np.mean(detection_count_diffs)) if detection_count_diffs else 0.0,
            "max_detection_count_diff": int(np.max(detection_count_diffs)) if detection_count_diffs else 0,
            "total_unmatched_detections": unmatched_count,
            "common_frames_analyzed": len(self.common_frames),
            "disagreement_frames_sample": disagreement_frames[:10]
        }

# MULTI-CLIP DISCOVERY AND PROCESSING

def discover_clips(results_dir: str, agents: List[str]) -> Dict[str, Dict[str, str]]:
    "Find all clips and their tracking outputs."
    results_dir = Path(results_dir)
    clips = {}

    for item in results_dir.iterdir():
        if not item.is_dir():
            continue

        clip_name = item.name
        clip_agents = {}

        full_dir = item / "full"
        if full_dir.exists():
            for agent in agents:
                agent_dir = full_dir / agent
                if agent_dir.exists():
                    for f in agent_dir.iterdir():
                        if f.suffix == '.json' and 'output' in f.name.lower():
                            clip_agents[agent] = str(f)
                            break

        if not clip_agents:
            for agent in agents:
                for pattern in [item / f"{agent}_output.json", item / agent / f"{agent}_output.json"]:
                    if pattern.exists():
                        clip_agents[agent] = str(pattern)
                        break

        if clip_agents:
            clips[clip_name] = clip_agents

    return clips


def compute_clip_metrics(clip_name: str, agent_files: Dict[str, str],
                          fps: float = 30.0, pixels_per_meter: float = 10.0) -> Dict:
    "Compute all metrics for a single clip."
    agent_detections = {}
    for agent_name, filepath in agent_files.items():
        dets = load_tracking_file(filepath, agent_name)
        if dets:
            agent_detections[agent_name] = dets

    if not agent_detections:
        return {"clip_name": clip_name, "error": "No valid detections loaded"}

    quantitative = {}
    for agent_name, dets in agent_detections.items():
        tm = TrajectoryMetrics(dets, fps=fps, pixels_per_meter=pixels_per_meter)
        quantitative[agent_name] = {
            "jerk": tm.get_jerk_summary(),
            "speed": tm.get_speed_plausibility(),
            "completeness": tm.get_detection_completeness(),
            "detection_count": len(dets)
        }

    agreement = {}
    agent_names = list(agent_detections.keys())
    for i in range(len(agent_names)):
        for j in range(i + 1, len(agent_names)):
            n1, n2 = agent_names[i], agent_names[j]
            isa = InterSystemAgreement(agent_detections[n1], agent_detections[n2], n1, n2)
            agreement[f"{n1}_vs_{n2}"] = {
                "position": isa.get_position_distance_metrics(),
                "iou": isa.get_iou_metrics(),
                "velocity": isa.get_velocity_correlation(),
                "disagreement": isa.identify_disagreement_zones()
            }

    return {
        "clip_name": clip_name,
        "agents_loaded": list(agent_detections.keys()),
        "quantitative": quantitative,
        "agreement": agreement
    }

# AGGREGATION FUNCTIONS

def aggregate_metric(values: List[float], weights: Optional[List[float]] = None) -> Dict:
    "Aggregate metric values with optional weights."
    values = [v for v in values if v is not None]
    if not values:
        return {"mean": None, "std": None, "min": None, "max": None, "count": 0}

    weighted_mean = np.average(values, weights=weights) if weights else np.mean(values)
    return {
        "mean": float(weighted_mean), "std": float(np.std(values)),
        "min": float(np.min(values)), "max": float(np.max(values)), "count": len(values)
    }


def aggregate_quantitative_metrics(all_clip_results: List[Dict]) -> Dict:
    "Aggregate Section 3.1 metrics across all clips."
    agent_metrics = defaultdict(lambda: defaultdict(list))
    agent_weights = defaultdict(list)

    for clip_result in all_clip_results:
        if "error" in clip_result:
            continue

        for agent_name, metrics in clip_result.get("quantitative", {}).items():
            frame_count = metrics.get("completeness", {}).get("total_frames", 1)
            agent_weights[agent_name].append(frame_count)

            jerk = metrics.get("jerk", {})
            if jerk.get("mean") is not None:
                agent_metrics[agent_name]["jerk_mean"].append(jerk["mean"])
                agent_metrics[agent_name]["jerk_count"].append(jerk.get("count", 0))

            speed = metrics.get("speed", {})
            if speed.get("mean_speed_ms") is not None:
                agent_metrics[agent_name]["speed_mean"].append(speed["mean_speed_ms"])
                agent_metrics[agent_name]["speed_max"].append(speed["max_speed_ms"])
                agent_metrics[agent_name]["speed_violation_rate"].append(speed["speed_violation_rate"])
            if speed.get("accel_violation_rate") is not None:
                agent_metrics[agent_name]["accel_violation_rate"].append(speed["accel_violation_rate"])

            comp = metrics.get("completeness", {})
            for key in ["total_frames", "mean_players_per_frame", "coverage_rate",
                       "total_tracks", "mean_track_length", "fragmentation_rate"]:
                agent_metrics[agent_name][key].append(comp.get(key, 0))
            agent_metrics[agent_name]["detection_count"].append(metrics.get("detection_count", 0))

    aggregated = {}
    for agent_name, metrics in agent_metrics.items():
        weights = agent_weights[agent_name]
        aggregated[agent_name] = {
            "clips_analyzed": len(weights),
            "total_frames": sum(metrics["total_frames"]),
            "total_detections": sum(metrics["detection_count"]),
            "total_tracks": sum(metrics["total_tracks"]),
            "jerk": {"mean": aggregate_metric(metrics["jerk_mean"], weights),
                     "tracks_analyzed": sum(metrics["jerk_count"])},
            "speed": {"mean_speed_ms": aggregate_metric(metrics["speed_mean"], weights),
                      "max_speed_ms": aggregate_metric(metrics["speed_max"]),
                      "violation_rate": aggregate_metric(metrics["speed_violation_rate"], weights),
                      "accel_violation_rate": aggregate_metric(metrics["accel_violation_rate"], weights)},
            "completeness": {
                "mean_players_per_frame": aggregate_metric(metrics["mean_players_per_frame"], weights),
                "coverage_rate_20_22": aggregate_metric(metrics["coverage_rate"], weights),
                "mean_track_length": aggregate_metric(metrics["mean_track_length"], weights),
                "fragmentation_rate": aggregate_metric(metrics["fragmentation_rate"], weights)
            }
        }
    return aggregated


def aggregate_agreement_metrics(all_clip_results: List[Dict]) -> Dict:
    "Aggregate Section 3.2 metrics across all clips."
    pair_metrics = defaultdict(lambda: defaultdict(list))
    pair_weights = defaultdict(list)

    for clip_result in all_clip_results:
        if "error" in clip_result:
            continue

        for pair_name, metrics in clip_result.get("agreement", {}).items():
            common_frames = metrics.get("disagreement", {}).get("common_frames_analyzed", 1)
            pair_weights[pair_name].append(common_frames)

            pos = metrics.get("position", {})
            if pos.get("mean_distance_px") is not None:
                pair_metrics[pair_name]["position_mean"].append(pos["mean_distance_px"])
                pair_metrics[pair_name]["position_max"].append(pos["max_distance_px"])
                pair_metrics[pair_name]["total_matches"].append(pos["total_matches"])

            iou = metrics.get("iou", {})
            if iou.get("mean_iou") is not None:
                pair_metrics[pair_name]["iou_mean"].append(iou["mean_iou"])
                pair_metrics[pair_name]["iou_min"].append(iou["min_iou"])
                pair_metrics[pair_name]["match_rate_sys1"].append(iou["match_rate_sys1"])
                pair_metrics[pair_name]["match_rate_sys2"].append(iou["match_rate_sys2"])

            disagree = metrics.get("disagreement", {})
            pair_metrics[pair_name]["disagreement_rate"].append(disagree.get("disagreement_rate", 0))
            pair_metrics[pair_name]["mean_count_diff"].append(disagree.get("mean_detection_count_diff", 0))
            pair_metrics[pair_name]["common_frames"].append(disagree.get("common_frames_analyzed", 0))

    aggregated = {}
    for pair_name, metrics in pair_metrics.items():
        weights = pair_weights[pair_name]
        aggregated[pair_name] = {
            "clips_analyzed": len(weights),
            "total_common_frames": sum(metrics["common_frames"]),
            "total_matches": sum(metrics["total_matches"]) if metrics["total_matches"] else 0,
            "position_distance": {"mean_px": aggregate_metric(metrics["position_mean"], weights),
                                  "max_px": aggregate_metric(metrics["position_max"])},
            "iou": {"mean": aggregate_metric(metrics["iou_mean"], weights),
                    "min": aggregate_metric(metrics["iou_min"]),
                    "match_rate_sys1": aggregate_metric(metrics["match_rate_sys1"], weights),
                    "match_rate_sys2": aggregate_metric(metrics["match_rate_sys2"], weights)},
            "disagreement": {"rate": aggregate_metric(metrics["disagreement_rate"], weights),
                             "mean_count_diff": aggregate_metric(metrics["mean_count_diff"], weights)}
        }
    return aggregated

# REPORT GENERATION

def generate_comparison_table(aggregated_quant: Dict) -> str:
    "Generate comparison table for quantitative metrics."
    agents = list(aggregated_quant.keys())
    if not agents:
        return "No data to display."

    lines = ["\n" + "=" * 120, "AGGREGATED QUANTITATIVE METRICS COMPARISON TABLE", "=" * 120]

    header = f"{'Metric':<50} | " + " | ".join(f"{a:>18}" for a in agents)
    lines.extend([header, "-" * len(header)])

    metrics_to_show = [
        ("Total Clips", lambda m: m["clips_analyzed"], "{:d}"),
        ("Total Frames", lambda m: m["total_frames"], "{:,d}"),
        ("Total Detections", lambda m: m["total_detections"], "{:,d}"),
        ("Mean Jerk Score", lambda m: m["jerk"]["mean"]["mean"], "{:.2f}"),
        ("Mean Speed (m/s)", lambda m: m["speed"]["mean_speed_ms"]["mean"], "{:.2f}"),
        ("Speed Violation Rate (%)", lambda m: m["speed"]["violation_rate"]["mean"] * 100 if m["speed"]["violation_rate"]["mean"] else None, "{:.1f}"),
        ("Mean Players/Frame", lambda m: m["completeness"]["mean_players_per_frame"]["mean"], "{:.1f}"),
        ("Coverage Rate 20-22 (%)", lambda m: m["completeness"]["coverage_rate_20_22"]["mean"] * 100, "{:.1f}"),
        ("Mean Track Length (frames)", lambda m: m["completeness"]["mean_track_length"]["mean"], "{:.1f}"),
        ("Fragmentation Rate (%)", lambda m: m["completeness"]["fragmentation_rate"]["mean"] * 100, "{:.1f}"),
    ]

    for label, getter, fmt in metrics_to_show:
        values = []
        for agent in agents:
            try:
                val = getter(aggregated_quant[agent])
                values.append(f"{'N/A':>18}" if val is None else f"{fmt.format(val):>18}")
            except (KeyError, TypeError):
                values.append(f"{'N/A':>18}")
        lines.append(f"{label:<50} | " + " | ".join(values))

    lines.append("=" * 120)
    return "\n".join(lines)


def export_aggregated_csv(aggregated_quant: Dict, aggregated_agree: Dict, output_dir: str):
    "Export aggregated results to CSV files."
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Quantitative metrics CSV
    quant_rows = []
    for agent_name, m in aggregated_quant.items():
        quant_rows.append({
            "agent": agent_name,
            "clips_analyzed": m["clips_analyzed"],
            "total_frames": m["total_frames"],
            "total_detections": m["total_detections"],
            "total_tracks": m["total_tracks"],
            "jerk_mean": m["jerk"]["mean"]["mean"],
            "jerk_std": m["jerk"]["mean"]["std"],
            "speed_mean_ms": m["speed"]["mean_speed_ms"]["mean"],
            "speed_violation_rate": m["speed"]["violation_rate"]["mean"],
            "accel_violation_rate": m["speed"]["accel_violation_rate"]["mean"],
            "mean_players_per_frame": m["completeness"]["mean_players_per_frame"]["mean"],
            "coverage_rate": m["completeness"]["coverage_rate_20_22"]["mean"],
            "mean_track_length": m["completeness"]["mean_track_length"]["mean"],
            "fragmentation_rate": m["completeness"]["fragmentation_rate"]["mean"],
        })

    if quant_rows:
        quant_path = output_dir / "aggregated_quantitative_metrics.csv"
        with open(quant_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=quant_rows[0].keys())
            writer.writeheader()
            writer.writerows(quant_rows)
        print(f"Exported: {quant_path}")

    # Agreement metrics CSV
    agree_rows = []
    for pair_name, m in aggregated_agree.items():
        agree_rows.append({
            "system_pair": pair_name,
            "clips_analyzed": m["clips_analyzed"],
            "total_common_frames": m["total_common_frames"],
            "total_matches": m["total_matches"],
            "position_mean_px": m["position_distance"]["mean_px"]["mean"],
            "position_std_px": m["position_distance"]["mean_px"]["std"],
            "iou_mean": m["iou"]["mean"]["mean"],
            "iou_std": m["iou"]["mean"]["std"],
            "match_rate_sys1": m["iou"]["match_rate_sys1"]["mean"],
            "match_rate_sys2": m["iou"]["match_rate_sys2"]["mean"],
            "disagreement_rate": m["disagreement"]["rate"]["mean"],
            "mean_count_diff": m["disagreement"]["mean_count_diff"]["mean"],
        })

    if agree_rows:
        agree_path = output_dir / "aggregated_agreement_metrics.csv"
        with open(agree_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=agree_rows[0].keys())
            writer.writeheader()
            writer.writerows(agree_rows)
        print(f"Exported: {agree_path}")

# MAIN FUNCTION

def run_evaluation(results_dir: str = None, output_dir: str = None,
                   agents: List[str] = None, fps: float = 30.0,
                   pixels_per_meter: float = 10.0) -> Dict:
    """
    Run full evaluation across all clips.

    Args:
        results_dir: Directory containing tracking results
        output_dir: Directory for output files
        agents: List of agent names to compare
        fps: Video FPS for speed calculations
        pixels_per_meter: Pixel to meter conversion

    Returns:
        Dict with aggregated results
    """
    results_dir = Path(results_dir or RESULTS_DIR)
    output_dir = Path(output_dir or OUTPUT_DIR)
    agents = agents or AGENTS

    print("=" * 90)
    print("MULTI-CLIP TRACKING SYSTEM EVALUATION")
    print("=" * 90)
    print(f"Results Directory: {results_dir}")
    print(f"Output Directory: {output_dir}")
    print(f"Agents: {agents}\n")

    # Discover clips
    clips = discover_clips(str(results_dir), agents)
    print(f"Discovered {len(clips)} clips:")
    for clip_name, agent_files in clips.items():
        print(f"  {clip_name}: {list(agent_files.keys())}")
    print()

    if not clips:
        print("ERROR: No clips found!")
        return {"error": "No clips found"}

    # Process each clip
    all_clip_results = []
    for clip_name, agent_files in clips.items():
        print(f"Processing {clip_name}...")
        result = compute_clip_metrics(clip_name, agent_files, fps, pixels_per_meter)
        all_clip_results.append(result)

        if "error" not in result:
            for agent in result.get("agents_loaded", []):
                det_count = result["quantitative"].get(agent, {}).get("detection_count", 0)
                print(f"  {agent}: {det_count} detections")
    print()

    # Aggregate results
    print("Aggregating results...")
    aggregated_quant = aggregate_quantitative_metrics(all_clip_results)
    aggregated_agree = aggregate_agreement_metrics(all_clip_results)

    # Print comparison table
    table = generate_comparison_table(aggregated_quant)
    print(table)

    # Export CSV files
    output_dir.mkdir(parents=True, exist_ok=True)
    export_aggregated_csv(aggregated_quant, aggregated_agree, str(output_dir))

    # Save full results as JSON
    full_results = {
        "generated_at": datetime.now().isoformat(),
        "parameters": {"fps": fps, "pixels_per_meter": pixels_per_meter, "agents": agents},
        "clips_analyzed": len([r for r in all_clip_results if "error" not in r]),
        "aggregated_quantitative": aggregated_quant,
        "aggregated_agreement": aggregated_agree,
        "per_clip_results": all_clip_results
    }

    json_path = output_dir / "evaluation_results.json"
    with open(json_path, 'w') as f:
        json.dump(full_results, f, indent=2, default=str)
    print(f"\nSaved full results to: {json_path}")

    return full_results

# RUN EVALUATION

print_status("Evaluation Module Loaded","SUCCESS")
print("\nTo run evaluation:")
print("  results = run_evaluation()")
print("\nOr with custom paths:")
print("  results = run_evaluation(")
print("      results_dir='/content/comp-4009-tracking-results',")
print("      output_dir='/content/comparison_output'")
print("  )")
print("Running evaluation now...\n")

# Run evaluation automatically
evaluation_results = run_evaluation()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from datetime import datetime

OUTPUT_DIR = Path("/content/comparison_output/report")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

COLORS = {
    'darkmyter': '#FF69B4',
    'eagle': '#FFD700',
    'yolo11_botsort': '#4169E1'
}

SYSTEM_LABELS = {
    'darkmyter': 'Darkmyter\n(YOLOv8+ByteTrack)',
    'eagle': 'Eagle\n(YOLOv8+BoT-SORT)',
    'yolo11_botsort': 'YOLO11\n(BoT-SORT)'
}

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette([COLORS['darkmyter'], COLORS['eagle'], COLORS['yolo11_botsort']])

print("Loading data...")
quant_df = pd.read_csv('/content/comparison_output/aggregated_quantitative_metrics.csv')
agree_df = pd.read_csv('/content/comparison_output/aggregated_agreement_metrics.csv')

try:
    with open('/content/comparison_output/multi_clip_full_results.json', 'r') as f:
        full_results = json.load(f)
    has_full_results = True
except:
    has_full_results = False

print(f"Systems: {list(quant_df['agent'])}")
print(f"Clips analyzed: {quant_df['clips_analyzed'].iloc[0]}")

def save_figure(fig, name):
    path = OUTPUT_DIR / f"{name}.png"
    fig.savefig(path, dpi=150, bbox_inches='tight', facecolor='white')
    print(f"Saved: {path}")
    plt.close(fig)

def add_value_labels(ax, bars, fmt='.1f', fontsize=9):
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:{fmt}}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3), textcoords="offset points",
                    ha='center', va='bottom', fontsize=fontsize)

print("\nGenerating figures...")

fig = plt.figure(figsize=(16, 12))
fig.suptitle('Player Tracking System Comparison - Executive Summary', fontsize=16, fontweight='bold', y=0.98)

x = np.arange(len(quant_df))
width = 0.6

ax1 = fig.add_subplot(2, 3, 1)
bars = ax1.bar(x, quant_df['mean_players_per_frame'], width, color=[COLORS[a] for a in quant_df['agent']])
ax1.axhline(y=22, color='green', linestyle='--', alpha=0.7, label='Ideal (22)')
ax1.axhline(y=20, color='orange', linestyle='--', alpha=0.7, label='Minimum (20)')
ax1.set_ylabel('Players per Frame')
ax1.set_title('Detection Completeness', fontweight='bold')
ax1.set_xticks(x)
ax1.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax1.legend(loc='lower right', fontsize=8)
ax1.set_ylim(0, 25)
add_value_labels(ax1, bars)

ax2 = fig.add_subplot(2, 3, 2)
bars = ax2.bar(x, quant_df['jerk_mean'] / 1000, width, color=[COLORS[a] for a in quant_df['agent']])
ax2.set_ylabel('Mean Jerk Score (x1000)')
ax2.set_title('Trajectory Smoothness (Lower=Better)', fontweight='bold')
ax2.set_xticks(x)
ax2.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
add_value_labels(ax2, bars)

ax3 = fig.add_subplot(2, 3, 3)
bars = ax3.bar(x, quant_df['speed_violation_rate'] * 100, width, color=[COLORS[a] for a in quant_df['agent']])
ax3.set_ylabel('Violation Rate (%)')
ax3.set_title('Speed Violations (>40 km/h)', fontweight='bold')
ax3.set_xticks(x)
ax3.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
add_value_labels(ax3, bars)

ax4 = fig.add_subplot(2, 3, 4)
bars = ax4.bar(x, quant_df['mean_track_length'], width, color=[COLORS[a] for a in quant_df['agent']])
ax4.set_ylabel('Frames')
ax4.set_title('Mean Track Length (Higher=Better)', fontweight='bold')
ax4.set_xticks(x)
ax4.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax4.set_yscale('log')
for i, bar in enumerate(bars):
    height = bar.get_height()
    ax4.annotate(f'{height:.0f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)

ax5 = fig.add_subplot(2, 3, 5)
bars = ax5.bar(x, quant_df['fragmentation_rate'] * 100, width, color=[COLORS[a] for a in quant_df['agent']])
ax5.set_ylabel('Fragmentation Rate (%)')
ax5.set_title('Track Fragmentation (Lower=Better)', fontweight='bold')
ax5.set_xticks(x)
ax5.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax5.set_ylim(0, 105)
add_value_labels(ax5, bars)

ax6 = fig.add_subplot(2, 3, 6)
bars = ax6.bar(x, quant_df['coverage_rate'] * 100, width, color=[COLORS[a] for a in quant_df['agent']])
ax6.set_ylabel('Coverage Rate (%)')
ax6.set_title('Frames with 20-22 Players', fontweight='bold')
ax6.set_xticks(x)
ax6.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax6.set_ylim(0, 100)
add_value_labels(ax6, bars)

plt.tight_layout(rect=[0, 0, 1, 0.96])
save_figure(fig, '01_executive_summary')

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Inter-System Agreement Analysis', fontsize=14, fontweight='bold')

pair_labels = ['Darkmyter\nvs Eagle', 'Darkmyter\nvs YOLO11', 'Eagle\nvs YOLO11']
pair_colors = ['#FF69B4', '#FFD700', '#4169E1']

ax = axes[0]
bars = ax.bar(pair_labels, agree_df['position_mean_px'], color=pair_colors, yerr=agree_df['position_std_px'], capsize=5)
ax.set_ylabel('Mean Position Distance (pixels)')
ax.set_title('Position Agreement (Lower=Better)', fontweight='bold')
add_value_labels(ax, bars, fmt='.2f')

ax = axes[1]
bars = ax.bar(pair_labels, agree_df['iou_mean'], color=pair_colors, yerr=agree_df['iou_std'], capsize=5)
ax.set_ylabel('Mean IoU')
ax.set_title('Bounding Box Overlap (Higher=Better)', fontweight='bold')
ax.set_ylim(0, 1)
add_value_labels(ax, bars, fmt='.3f')

ax = axes[2]
bars = ax.bar(pair_labels, agree_df['disagreement_rate'] * 100, color=pair_colors)
ax.set_ylabel('Disagreement Rate (%)')
ax.set_title('Detection Disagreement (Lower=Better)', fontweight='bold')
ax.set_ylim(0, 100)
add_value_labels(ax, bars)

plt.tight_layout(rect=[0, 0, 1, 0.93])
save_figure(fig, '02_inter_system_agreement')

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, polar=True)

categories = ['Detection\nCompleteness', 'Trajectory\nSmoothness', 'Speed\nPlausibility',
              'Track\nContinuity', 'ID\nStability', 'Coverage\nRate']

def normalize_metric(values, higher_is_better=True):
    min_v, max_v = min(values), max(values)
    if max_v == min_v:
        return [0.5] * len(values)
    normalized = [(v - min_v) / (max_v - min_v) for v in values]
    if not higher_is_better:
        normalized = [1 - n for n in normalized]
    return normalized

completeness = normalize_metric(list(quant_df['mean_players_per_frame']), True)
smoothness = normalize_metric(list(quant_df['jerk_mean']), False)
speed_plaus = normalize_metric(list(quant_df['speed_violation_rate']), False)
continuity = normalize_metric(list(quant_df['mean_track_length']), True)
stability = normalize_metric(list(quant_df['fragmentation_rate']), False)
coverage = normalize_metric(list(quant_df['coverage_rate']), True)

radar_data = {}
for i, agent in enumerate(quant_df['agent']):
    radar_data[agent] = [completeness[i], smoothness[i], speed_plaus[i], continuity[i], stability[i], coverage[i]]

angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
angles += angles[:1]

for agent, values in radar_data.items():
    values_plot = values + values[:1]
    ax.plot(angles, values_plot, 'o-', linewidth=2, label=SYSTEM_LABELS[agent].replace('\n', ' '), color=COLORS[agent])
    ax.fill(angles, values_plot, alpha=0.15, color=COLORS[agent])

ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, size=11)
ax.set_ylim(0, 1)
ax.set_title('Overall System Comparison (Outer=Better)', fontsize=14, fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))

plt.tight_layout()
save_figure(fig, '03_radar_comparison')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Detection Statistics', fontsize=14, fontweight='bold')

ax = axes[0]
bars = ax.bar(x, quant_df['total_detections'] / 1e6, width, color=[COLORS[a] for a in quant_df['agent']])
ax.set_ylabel('Total Detections (Millions)')
ax.set_title('Total Detections Across All Clips', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
add_value_labels(ax, bars, fmt='.2f')

ax = axes[1]
bars = ax.bar(x, quant_df['total_tracks'] / 1000, width, color=[COLORS[a] for a in quant_df['agent']])
ax.set_ylabel('Total Unique Tracks (Thousands)')
ax.set_title('Track Count', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
for i, bar in enumerate(bars):
    height = bar.get_height()
    ax.annotate(f'{quant_df["total_tracks"].iloc[i]:,}', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)

plt.tight_layout(rect=[0, 0, 1, 0.93])
save_figure(fig, '04_detection_statistics')

fig, ax = plt.subplots(figsize=(8, 6))

systems = list(quant_df['agent'])
n_sys = len(systems)
iou_matrix = np.ones((n_sys, n_sys))

pair_iou = {
    ('darkmyter', 'eagle'): agree_df[agree_df['system_pair'] == 'darkmyter_vs_eagle']['iou_mean'].values[0],
    ('darkmyter', 'yolo11_botsort'): agree_df[agree_df['system_pair'] == 'darkmyter_vs_yolo11_botsort']['iou_mean'].values[0],
    ('eagle', 'yolo11_botsort'): agree_df[agree_df['system_pair'] == 'eagle_vs_yolo11_botsort']['iou_mean'].values[0],
}

for i, sys1 in enumerate(systems):
    for j, sys2 in enumerate(systems):
        if i != j:
            key = (sys1, sys2) if (sys1, sys2) in pair_iou else (sys2, sys1)
            if key in pair_iou:
                iou_matrix[i, j] = pair_iou[key]

sns.heatmap(iou_matrix, annot=True, fmt='.3f', cmap='RdYlGn',
            xticklabels=[s.replace('yolo11_botsort', 'YOLO11').title() for s in systems],
            yticklabels=[s.replace('yolo11_botsort', 'YOLO11').title() for s in systems],
            ax=ax, vmin=0.5, vmax=1.0, cbar_kws={'label': 'Mean IoU'})
ax.set_title('Inter-System Bounding Box Agreement (IoU)', fontsize=12, fontweight='bold')

plt.tight_layout()
save_figure(fig, '05_agreement_heatmap')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Speed Analysis', fontsize=14, fontweight='bold')

ax = axes[0]
bars = ax.bar(x, quant_df['speed_mean_ms'] * 3.6, width, color=[COLORS[a] for a in quant_df['agent']])
ax.axhline(y=20, color='orange', linestyle='--', alpha=0.7, label='Typical jog')
ax.axhline(y=35, color='red', linestyle='--', alpha=0.7, label='Elite sprint')
ax.set_ylabel('Mean Speed (km/h)')
ax.set_title('Average Player Speed', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax.legend(loc='upper right', fontsize=8)
add_value_labels(ax, bars)

ax = axes[1]
realistic = (1 - quant_df['speed_violation_rate']) * 100
bars = ax.bar(x, realistic, width, color=[COLORS[a] for a in quant_df['agent']])
ax.set_ylabel('Realistic Speed Frames (%)')
ax.set_title('Frames with Plausible Speeds (<40 km/h)', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([SYSTEM_LABELS[a] for a in quant_df['agent']], fontsize=9)
ax.set_ylim(90, 100)
add_value_labels(ax, bars)

plt.tight_layout(rect=[0, 0, 1, 0.93])
save_figure(fig, '06_speed_analysis')

fig, ax = plt.subplots(figsize=(10, 8))

for i, row in quant_df.iterrows():
    agent = row['agent']
    size = np.log10(row['mean_track_length'] + 1) * 200
    ax.scatter(row['fragmentation_rate'] * 100, row['mean_players_per_frame'],
               s=size, c=COLORS[agent], alpha=0.7, edgecolors='black', linewidth=2,
               label=f"{SYSTEM_LABELS[agent].replace(chr(10), ' ')} (Track: {row['mean_track_length']:.0f}f)")

ax.axhline(y=22, color='green', linestyle='--', alpha=0.5, label='Ideal 22 players')
ax.axhline(y=20, color='orange', linestyle='--', alpha=0.5, label='Minimum 20 players')
ax.set_xlabel('Fragmentation Rate (%)', fontsize=12)
ax.set_ylabel('Mean Players per Frame', fontsize=12)
ax.set_title('Detection vs. Tracking Trade-off (Bubble size = Track Length)', fontsize=14, fontweight='bold')
ax.legend(loc='lower left', fontsize=9)
ax.set_xlim(50, 105)
ax.set_ylim(15, 25)

plt.tight_layout()
save_figure(fig, '07_tradeoff_analysis')

print("\nGenerating text report...")

report_lines = []
report_lines.append("=" * 80)
report_lines.append("PLAYER TRACKING SYSTEM COMPARISON REPORT")
report_lines.append("=" * 80)
report_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report_lines.append(f"Clips Analyzed: {quant_df['clips_analyzed'].iloc[0]}")
report_lines.append(f"Systems: {', '.join(quant_df['agent'])}")
report_lines.append("")

best_detection = quant_df.loc[quant_df['mean_players_per_frame'].idxmax(), 'agent']
best_smoothness = quant_df.loc[quant_df['jerk_mean'].idxmin(), 'agent']
best_speed = quant_df.loc[quant_df['speed_violation_rate'].idxmin(), 'agent']
best_continuity = quant_df.loc[quant_df['mean_track_length'].idxmax(), 'agent']
best_stability = quant_df.loc[quant_df['fragmentation_rate'].idxmin(), 'agent']
best_coverage = quant_df.loc[quant_df['coverage_rate'].idxmax(), 'agent']

report_lines.append("CATEGORY WINNERS:")
report_lines.append(f"  Detection Completeness: {best_detection.upper()}")
report_lines.append(f"  Trajectory Smoothness:  {best_smoothness.upper()}")
report_lines.append(f"  Speed Plausibility:     {best_speed.upper()}")
report_lines.append(f"  Track Continuity:       {best_continuity.upper()}")
report_lines.append(f"  ID Stability:           {best_stability.upper()}")
report_lines.append(f"  Coverage Rate:          {best_coverage.upper()}")
report_lines.append("")

report_lines.append("METRIC DEFINITIONS:")
report_lines.append("  Jerk Score: Third derivative of position. Lower = smoother motion.")
report_lines.append("  Speed Violation: % of frames exceeding 40 km/h. Lower = more realistic.")
report_lines.append("  Detection Completeness: Avg players/frame. Ideal = 20-22.")
report_lines.append("  Coverage Rate: % frames with 20-22 detections.")
report_lines.append("  Track Length: Avg duration of continuous tracks. Higher = better continuity.")
report_lines.append("  Fragmentation: Ratio of broken tracks. Lower = more stable IDs.")
report_lines.append("")

for _, row in quant_df.iterrows():
    agent = row['agent']
    report_lines.append(f"--- {agent.upper()} ---")
    report_lines.append(f"  Frames: {row['total_frames']:,} | Detections: {row['total_detections']:,} | Tracks: {row['total_tracks']:,}")
    report_lines.append(f"  Jerk: {row['jerk_mean']:.0f} | Speed: {row['speed_mean_ms']*3.6:.1f} km/h | Violations: {row['speed_violation_rate']*100:.1f}%")
    report_lines.append(f"  Players/Frame: {row['mean_players_per_frame']:.1f} | Coverage: {row['coverage_rate']*100:.1f}%")
    report_lines.append(f"  Track Length: {row['mean_track_length']:.0f} frames | Fragmentation: {row['fragmentation_rate']*100:.1f}%")
    report_lines.append("")

report_lines.append("INTER-SYSTEM AGREEMENT:")
for _, row in agree_df.iterrows():
    pair = row['system_pair'].replace('_vs_', ' vs ')
    report_lines.append(f"  {pair}: IoU={row['iou_mean']:.3f}, Distance={row['position_mean_px']:.2f}px, Disagreement={row['disagreement_rate']*100:.1f}%")

def calc_score(row):
    idx = quant_df[quant_df['agent'] == row['agent']].index[0]
    return 0.20*completeness[idx] + 0.15*smoothness[idx] + 0.15*speed_plaus[idx] + 0.20*continuity[idx] + 0.20*stability[idx] + 0.10*coverage[idx]

scores = {row['agent']: calc_score(row) for _, row in quant_df.iterrows()}
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)

report_lines.append("")
report_lines.append("OVERALL RANKING:")
for i, (agent, score) in enumerate(ranked, 1):
    report_lines.append(f"  {i}. {agent.upper()} (Score: {score:.3f})")

report_lines.append("")
report_lines.append("RECOMMENDATIONS:")
report_lines.append(f"  Tactical Analysis: {best_detection.upper()} (best detection)")
report_lines.append(f"  Player Tracking:   {best_continuity.upper()} (best continuity)")
report_lines.append(f"  General Use:       {ranked[0][0].upper()} (best overall)")

report_path = OUTPUT_DIR / "comparison_report.txt"
with open(report_path, 'w') as f:
    f.write('\n'.join(report_lines))
print(f"Saved: {report_path}")

summary_data = {
    'System': [SYSTEM_LABELS[a].replace('\n', ' ') for a in quant_df['agent']],
    'Players/Frame': quant_df['mean_players_per_frame'].round(1),
    'Jerk': quant_df['jerk_mean'].round(0),
    'Speed (km/h)': (quant_df['speed_mean_ms'] * 3.6).round(1),
    'Violation %': (quant_df['speed_violation_rate'] * 100).round(1),
    'Track Length': quant_df['mean_track_length'].round(0),
    'Fragment %': (quant_df['fragmentation_rate'] * 100).round(1),
    'Coverage %': (quant_df['coverage_rate'] * 100).round(1),
    'Score': [scores[a] for a in quant_df['agent']]
}
summary_df = pd.DataFrame(summary_data)
summary_path = OUTPUT_DIR / "summary_table.csv"
summary_df.to_csv(summary_path, index=False)
print(f"Saved: {summary_path}")

import shutil
from google.colab import files

shutil.make_archive("/content/tracking_report", 'zip', OUTPUT_DIR)

print(f"\nFiles in {OUTPUT_DIR}:")
for f in sorted(OUTPUT_DIR.iterdir()):
    print(f"  {f.name}")

print("\nDownloading...")
files.download("/content/tracking_report.zip")
print("Done!")

In [None]:
# CELL C: VISUALIZATION (FIXED)
"""
Tracking Visualization Module

Creates overlay and side-by-side comparison videos showing tracking results
from multiple systems on the same video frames.

Visualization Options:
    1. Overlay: All systems drawn on the same video (different colors)
    2. Side-by-side: Three panels showing each system separately

NOTE: Videos are re-encoded with ffmpeg for browser/Colab compatibility.
"""

import cv2
import numpy as np
import json
import subprocess
from pathlib import Path
from collections import defaultdict
from IPython.display import display, HTML, Video
import ipywidgets as widgets

# CONFIGURATION

# Update these paths to match your setup
TRACKING_RESULTS_DIR = Path("/content/comp-4009-tracking-results")
VIDEOS_DIR = Path("/content/videos")
CLIPS_DIR = Path("/content/clips")

# Output directory - use /content/output/visualization/ which persists in Colab
VIZ_OUTPUT_DIR = Path("/content/output/visualization")
VIZ_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Visualization output directory: {VIZ_OUTPUT_DIR}")

AGENTS = ["eagle", "darkmyter", "yolo11_botsort"]

# Colors for each system (BGR for OpenCV)
SYSTEM_COLORS = {
    "eagle": (0, 165, 255),        # Orange
    "yolo11_botsort": (0, 255, 0), # Green
    "darkmyter": (255, 0, 0),      # Blue
}

SYSTEM_LABELS = {
    "eagle": "Eagle",
    "yolo11_botsort": "YOLO11+BoT-SORT",
    "darkmyter": "Darkmyter",
}

# HELPER FUNCTIONS

def reencode_video(input_path: Path, output_path: Path):
    # Re-encode video with H264 for browser compatibility
    import shutil

    try:
        result = subprocess.run([
            "ffmpeg", "-y", "-i", str(input_path),
            "-c:v", "libx264", "-preset", "fast", "-crf", "23",
            "-pix_fmt", "yuv420p",
            str(output_path)
        ], check=True, capture_output=True, text=True)

        # Verify output was created
        if output_path.exists() and output_path.stat().st_size > 0:
            # Remove temp file only after confirming output exists
            if input_path.exists() and input_path != output_path:
                input_path.unlink()
            print(f"[OK] Video saved: {output_path}")
            return True
        else:
            print(f"[WARNING] ffmpeg ran but output file is missing or empty")
            if input_path.exists():
                shutil.move(str(input_path), str(output_path))
            return False

    except subprocess.CalledProcessError as e:
        print(f"[WARNING] ffmpeg failed - saving original file")
        if input_path.exists():
            shutil.move(str(input_path), str(output_path))
            print(f"[OK] Saved original file: {output_path}")
        return False
    except FileNotFoundError:
        print("[WARNING] ffmpeg not found - saving original file")
        if input_path.exists():
            shutil.move(str(input_path), str(output_path))
            print(f"[OK] Saved original file: {output_path}")
        return False


def load_tracking_for_viz(video_name: str, system: str, class_filter: str = "player") -> list:
    """
    Load tracking data for visualization.
    Uses parsers from Cell A (parse_eagle, parse_darkmyter, parse_yolo_botsort).
    """
    json_path = TRACKING_RESULTS_DIR / video_name / "full" / system / f"{system}_output.json"

    if not json_path.exists():
        print(f"  [WARNING] Not found: {json_path}")
        return []

    with open(json_path) as f:
        data = json.load(f)

    # Use appropriate parser from Cell A
    if system == "eagle":
        detections = parse_eagle(data)
    elif system == "darkmyter":
        detections = parse_darkmyter(data)
    elif system == "yolo11_botsort":
        detections = parse_yolo_botsort(data)
    else:
        return []

    # Filter by class and convert to dict format for visualization
    result = []
    for det in detections:
        if class_filter and det.class_name != class_filter:
            continue
        result.append({
            "frame_id": det.frame_id,
            "track_id": det.track_id,
            "bbox": det.bbox,
            "score": det.score,
            "class_name": det.class_name
        })

    return result


def organize_by_frame_viz(detections: list) -> dict:
    # Group detections by frame_id for visualization
    by_frame = defaultdict(list)
    for det in detections:
        by_frame[det["frame_id"]].append(det)
    return dict(by_frame)


def discover_videos_with_results() -> list:
    # Find all videos that have tracking results
    videos = []
    for item in TRACKING_RESULTS_DIR.iterdir():
        if item.is_dir():
            full_dir = item / "full"
            if full_dir.exists():
                has_outputs = any((full_dir / agent).exists() for agent in AGENTS)
                if has_outputs:
                    videos.append(item.name)
    return sorted(videos)


def find_source_video(video_name: str) -> Path:
    # Find the source video file for a given video name
    search_locations = [VIDEOS_DIR, CLIPS_DIR, Path("/content")]
    extensions = ['.mp4', '.avi', '.mov', '.mkv', '.MP4', '.AVI', '.MOV', '.MKV']

    for loc in search_locations:
        if not loc.exists():
            continue

        # Try exact match
        for ext in extensions:
            video_path = loc / f"{video_name}{ext}"
            if video_path.exists():
                return video_path

        # Try partial match (video names can be truncated)
        for f in loc.glob("*"):
            if f.is_file() and f.suffix.lower() in [e.lower() for e in extensions]:
                # Check if names are related
                if video_name[:25] in f.stem or f.stem[:25] in video_name:
                    return f

    return None


# DRAWING FUNCTIONS

def draw_detection(frame, det: dict, color: tuple, prefix: str):
    # Draw a single detection bounding box on the frame
    x1, y1, x2, y2 = [int(v) for v in det["bbox"]]
    track_id = det["track_id"]

    # Draw bounding box
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

    # Draw label background and text
    label = f"{prefix}{track_id}"
    (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
    cv2.rectangle(frame, (x1, y1 - h - 6), (x1 + w + 4, y1), color, -1)
    cv2.putText(frame, label, (x1 + 2, y1 - 4),
                cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)


def draw_legend(frame, systems: list, start_y: int = 30):
    # Draw color legend showing which system uses which color
    for i, system in enumerate(systems):
        color = SYSTEM_COLORS.get(system, (128, 128, 128))
        label = SYSTEM_LABELS.get(system, system)
        y = start_y + i * 25

        # Color box
        cv2.rectangle(frame, (10, y - 15), (30, y), color, -1)
        # Label text
        cv2.putText(frame, label, (35, y - 2),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)


def draw_frame_counter(frame, frame_idx: int, width: int):
    # Draw frame counter in corner
    cv2.putText(frame, f"Frame: {frame_idx}", (width - 150, 30),
               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)


# VIDEO CREATION FUNCTIONS

def create_overlay_video(video_name: str, systems: list = None,
                         max_frames: int = 300, show_legend: bool = True,
                         output_filename: str = None) -> Path:
    """
    Create a video with all tracking systems overlaid on the same frames.

    Args:
        video_name: Name of the video (directory name in results)
        systems: List of systems to include (default: all)
        max_frames: Maximum number of frames to process
        show_legend: Whether to show color legend
        output_filename: Custom output filename

    Returns:
        Path to output video, or None if failed
    """
    systems = systems or AGENTS

    print(f"\n{'='*60}")
    print(f"Creating OVERLAY video: {video_name}")
    print(f"Systems: {', '.join(systems)}")
    print(f"{'='*60}\n")

    # Find source video
    video_path = find_source_video(video_name)
    if video_path is None:
        print(f"[ERROR] Could not find source video for: {video_name}")
        print(f"  Searched in: {VIDEOS_DIR}, {CLIPS_DIR}")
        return None

    print(f"Source video: {video_path}")

    # Load tracking data for each system
    all_tracks = {}
    for system in systems:
        print(f"Loading {system}...", end=" ")
        detections = load_tracking_for_viz(video_name, system)
        if detections:
            all_tracks[system] = organize_by_frame_viz(detections)
            print(f"{len(detections)} detections")
        else:
            print("No data")

    if not all_tracks:
        print("[ERROR] No tracking data loaded for any system")
        return None

    # Open source video
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print(f"[ERROR] Could not open video: {video_path}")
        return None

    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    print(f"Video: {width}x{height} @ {fps:.1f} fps, {total_frames} frames")

    # Setup output paths - save to visualization/<video_name>/
    safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in video_name)
    video_output_dir = VIZ_OUTPUT_DIR / safe_name
    video_output_dir.mkdir(parents=True, exist_ok=True)

    if output_filename is None:
        systems_str = "_".join(systems)
        output_filename = f"{safe_name}_{systems_str}.mp4"

    temp_output_path = video_output_dir / f"temp_{output_filename}"
    final_output_path = video_output_dir / output_filename

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(temp_output_path), fourcc, fps, (width, height))

    frames_to_process = min(max_frames, total_frames)
    print(f"\nProcessing {frames_to_process} frames...")

    frame_idx = 0
    while frame_idx < frames_to_process:
        ret, frame = cap.read()
        if not ret:
            break

        # Draw detections from each system
        for system in systems:
            if system not in all_tracks:
                continue

            color = SYSTEM_COLORS.get(system, (128, 128, 128))
            prefix = SYSTEM_LABELS.get(system, system)[0]  # First letter as prefix

            for det in all_tracks[system].get(frame_idx, []):
                draw_detection(frame, det, color, prefix)

        # Draw legend
        if show_legend:
            active_systems = [s for s in systems if s in all_tracks]
            draw_legend(frame, active_systems)

        # Draw frame counter
        draw_frame_counter(frame, frame_idx, width)

        out.write(frame)
        frame_idx += 1

        if frame_idx % 100 == 0:
            print(f"  Processed {frame_idx}/{frames_to_process}")

    cap.release()
    out.release()

    # Re-encode for browser compatibility
    print("Re-encoding for browser compatibility...")
    reencode_video(temp_output_path, final_output_path)

    # Verify file exists
    if final_output_path.exists():
        file_size_mb = final_output_path.stat().st_size / (1024 * 1024)
        print(f"\n{'='*60}")
        print(f"SAVED TO: {final_output_path}")
        print(f"File size: {file_size_mb:.1f} MB")
        print(f"{'='*60}")
    else:
        print(f"\n[ERROR] File was not saved! Check permissions for: {final_output_path.parent}")

    return final_output_path


def create_side_by_side_video(video_name: str, max_frames: int = 300,
                               output_filename: str = None) -> Path:
    """
    Create a 3-panel side-by-side video showing each system separately.

    Args:
        video_name: Name of the video (directory name in results)
        max_frames: Maximum number of frames to process
        output_filename: Custom output filename

    Returns:
        Path to output video, or None if failed
    """
    print(f"\n{'='*60}")
    print(f"Creating SIDE-BY-SIDE video: {video_name}")
    print(f"{'='*60}\n")

    # Find source video
    video_path = find_source_video(video_name)
    if video_path is None:
        print(f"[ERROR] Could not find source video")
        return None

    print(f"Source video: {video_path}")

    # Load all tracking data
    all_tracks = {}
    for system in AGENTS:
        detections = load_tracking_for_viz(video_name, system)
        if detections:
            all_tracks[system] = organize_by_frame_viz(detections)
            print(f"Loaded {system}: {len(detections)} detections")

    # Open source video
    cap = cv2.VideoCapture(str(video_path))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate panel dimensions (3 panels side by side)
    panel_width = width // 2
    panel_height = height // 2
    output_width = panel_width * 3
    output_height = panel_height

    # Setup output paths - save to visualization/<video_name>/
    safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in video_name)
    video_output_dir = VIZ_OUTPUT_DIR / safe_name
    video_output_dir.mkdir(parents=True, exist_ok=True)

    if output_filename is None:
        output_filename = f"{safe_name}_sidebyside.mp4"

    temp_output_path = video_output_dir / f"temp_{output_filename}"
    final_output_path = video_output_dir / output_filename

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(temp_output_path), fourcc, fps, (output_width, output_height))

    frames_to_process = min(max_frames, total_frames)
    print(f"\nProcessing {frames_to_process} frames...")
    print(f"Output size: {output_width}x{output_height}")

    frame_idx = 0
    while frame_idx < frames_to_process:
        ret, frame = cap.read()
        if not ret:
            break

        panels = []
        for system in AGENTS:
            # Create scaled panel
            panel = cv2.resize(frame.copy(), (panel_width, panel_height))

            color = SYSTEM_COLORS.get(system, (128, 128, 128))
            label = SYSTEM_LABELS.get(system, system)

            # Draw system label at top
            cv2.putText(panel, label, (10, 25),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

            # Draw detections (with scaled coordinates)
            if system in all_tracks:
                scale_x = panel_width / width
                scale_y = panel_height / height

                for det in all_tracks[system].get(frame_idx, []):
                    bbox = det["bbox"]
                    scaled_bbox = [
                        bbox[0] * scale_x, bbox[1] * scale_y,
                        bbox[2] * scale_x, bbox[3] * scale_y
                    ]
                    x1, y1, x2, y2 = [int(v) for v in scaled_bbox]
                    cv2.rectangle(panel, (x1, y1), (x2, y2), color, 1)

                    # Small track ID label
                    tid = det["track_id"]
                    cv2.putText(panel, str(tid), (x1, y1 - 2),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)

            panels.append(panel)

        # Combine panels horizontally
        combined = np.hstack(panels)
        out.write(combined)
        frame_idx += 1

        if frame_idx % 100 == 0:
            print(f"  Processed {frame_idx}/{frames_to_process}")

    cap.release()
    out.release()

    # Re-encode for browser compatibility
    print("Re-encoding for browser compatibility...")
    reencode_video(temp_output_path, final_output_path)

    # Verify file exists
    if final_output_path.exists():
        file_size_mb = final_output_path.stat().st_size / (1024 * 1024)
        print(f"\n{'='*60}")
        print(f"SAVED TO: {final_output_path}")
        print(f"File size: {file_size_mb:.1f} MB")
        print(f"{'='*60}")
    else:
        print(f"\n[ERROR] File was not saved! Check permissions for: {final_output_path.parent}")

    return final_output_path


# FRAME EXTRACTION (for report screenshots)

def extract_comparison_frame(video_name: str, frame_number: int,
                              systems: list = None, output_filename: str = None) -> Path:
    """
    Extract a single frame with all tracking overlays for use in reports.

    Args:
        video_name: Name of the video
        frame_number: Frame number to extract
        systems: Systems to overlay (default: all)
        output_filename: Output image filename

    Returns:
        Path to saved image
    """
    systems = systems or AGENTS

    video_path = find_source_video(video_name)
    if video_path is None:
        print(f"[ERROR] Could not find source video")
        return None

    # Load tracking data
    all_tracks = {}
    for system in systems:
        detections = load_tracking_for_viz(video_name, system)
        if detections:
            all_tracks[system] = organize_by_frame_viz(detections)

    # Open video and seek to frame
    cap = cv2.VideoCapture(str(video_path))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    cap.release()

    if not ret:
        print(f"[ERROR] Could not read frame {frame_number}")
        return None

    # Draw detections
    for system in systems:
        if system not in all_tracks:
            continue

        color = SYSTEM_COLORS.get(system, (128, 128, 128))
        prefix = SYSTEM_LABELS.get(system, system)[0]

        for det in all_tracks[system].get(frame_number, []):
            draw_detection(frame, det, color, prefix)

    # Draw legend
    draw_legend(frame, [s for s in systems if s in all_tracks])
    draw_frame_counter(frame, frame_number, width)

    # Save frame to visualization/<video_name>/
    safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in video_name)
    video_output_dir = VIZ_OUTPUT_DIR / safe_name
    video_output_dir.mkdir(parents=True, exist_ok=True)

    if output_filename is None:
        output_filename = f"{safe_name}_frame_{frame_number}.png"

    output_path = video_output_dir / output_filename
    cv2.imwrite(str(output_path), frame)
    print(f"\n{'='*60}")
    print(f"SAVED TO: {output_path}")
    print(f"{'='*60}")

    return output_path


# INTERACTIVE UI

def run_visualization_ui():
    # Interactive widget-based UI for creating comparison videos

    videos = discover_videos_with_results()

    if not videos:
        print("[ERROR] No videos with tracking results found!")
        print(f"  Searched in: {TRACKING_RESULTS_DIR}")
        return

    print("=" * 60)
    print("TRACKING VISUALIZATION TOOL")
    print("=" * 60)
    print(f"\nFound {len(videos)} videos with tracking results.\n")

    # Create widgets
    video_dropdown = widgets.Dropdown(
        options=videos,
        value=videos[0],
        description='Video:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='90%')
    )

    eagle_cb = widgets.Checkbox(value=True, description='Eagle (Orange)')
    darkmyter_cb = widgets.Checkbox(value=True, description='Darkmyter (Blue)')
    yolo_cb = widgets.Checkbox(value=True, description='YOLO11+BoT-SORT (Green)')

    frame_slider = widgets.IntSlider(
        value=300, min=60, max=1800, step=60,
        description='Max Frames:',
        style={'description_width': 'initial'}
    )

    output_type = widgets.RadioButtons(
        options=['Overlay (all on one video)', 'Side-by-side (3 panels)'],
        value='Overlay (all on one video)',
        description='Style:'
    )

    generate_btn = widgets.Button(
        description='Generate Comparison Video',
        button_style='success',
        layout=widgets.Layout(width='250px')
    )

    output_area = widgets.Output()

    def on_generate(b):
        with output_area:
            output_area.clear_output()

            video_name = video_dropdown.value

            # Get selected systems
            systems = []
            if eagle_cb.value:
                systems.append("eagle")
            if darkmyter_cb.value:
                systems.append("darkmyter")
            if yolo_cb.value:
                systems.append("yolo11_botsort")

            if not systems:
                print("[ERROR] Select at least one system!")
                return

            # Create video
            if output_type.value.startswith('Side'):
                path = create_side_by_side_video(video_name, frame_slider.value)
            else:
                path = create_overlay_video(video_name, systems, frame_slider.value)

            # Display result
            if path and path.exists():
                print("\n" + "=" * 60)
                print("VIDEO READY!")
                print("=" * 60)
                display(Video(str(path), embed=True, width=900))

    generate_btn.on_click(on_generate)

    # Layout widgets
    display(widgets.VBox([
        widgets.HTML("<h3>1. Select Video</h3>"),
        video_dropdown,
        widgets.HTML("<h3>2. Select Systems to Compare</h3>"),
        widgets.HBox([eagle_cb, darkmyter_cb, yolo_cb]),
        widgets.HTML("<h3>3. Options</h3>"),
        frame_slider,
        output_type,
        widgets.HTML("<br>"),
        generate_btn,
        output_area
    ]))


# QUICK FUNCTIONS

def quick_overlay(video_name: str, max_frames: int = 300) -> Path:
    # Quick function: Create overlay video with all 3 systems
    return create_overlay_video(video_name, AGENTS, max_frames)


def quick_sidebyside(video_name: str, max_frames: int = 300) -> Path:
    # Quick function: Create side-by-side comparison video
    return create_side_by_side_video(video_name, max_frames)


def quick_frame(video_name: str, frame_number: int) -> Path:
    # Quick function: Extract a single comparison frame
    return extract_comparison_frame(video_name, frame_number)


# RUN

print("CELL C: Visualization Module Loaded")

print("\nQuick functions:")
print("  quick_overlay('VIDEO_NAME')     - All systems on one video")
print("  quick_sidebyside('VIDEO_NAME')  - Side-by-side panels")
print("  quick_frame('VIDEO_NAME', 100)  - Extract frame 100 as image")
print("\nExample:")
print("  quick_overlay('FULL MATCH  Belgium 1-2 Italy  VIP Tactical Camera 720-seg14')")
print("\nOr use the interactive UI below:\n")

# Launch interactive UI
run_visualization_ui()