In [1]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install ultralytics
# from ultralytics import YOLO

# # Load model (use path after downloading)
# model = YOLO("/kaggle/input/yolo_v_11/pytorch/default/1/UltrlyticsYOLOv11.pt")  # replace with actual path


Collecting ultralytics
  Downloading ultralytics-8.3.160-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cusolver_cu12-11.6

# Step1  Detection

In [5]:
from ultralytics import YOLO
import cv2
import os

def detect_players(video_path, output_dir, model_path='/kaggle/input/yolo_v_11/pytorch/default/1/UltrlyticsYOLOv11.pt', conf_thres=0.4):
    os.makedirs(output_dir, exist_ok=True)
    
    model = YOLO(model_path)

    cap = cv2.VideoCapture(video_path)
    video_name = os.path.basename(video_path).split('.')[0]
    frame_idx = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Run YOLO detection
        results = model(frame, conf=conf_thres)
        
        for result in results:
            boxes = result.boxes.cpu().numpy()
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = box.conf[0]
                cls = int(box.cls[0])
                
                label = f'{model.names[cls]} {conf:.2f}'
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                txt_name = os.path.join(output_dir, f"{video_name}_frame_{frame_idx:05d}.txt")
                with open(txt_name, "a") as f:
                    f.write(f"{frame_idx},{x1},{y1},{x2},{y2},{conf:.4f},{cls}\n")


        cv2.imwrite(os.path.join(output_dir, f"{video_name}_frame_{frame_idx:05d}.jpg"), frame)
        frame_idx += 1

    cap.release()
    print(f"Done: {video_name} → {output_dir}")


if __name__ == "__main__":
    detect_players("/kaggle/input/mc-mot/broadcast.mp4", "outputs/broadcast_detections")
    detect_players("/kaggle/input/mc-mot/tacticam.mp4", "outputs/tacticam_detections")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.

0: 384x640 3 players, 72.9ms
Speed: 8.4ms preprocess, 72.9ms inference, 231.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 player, 47.8ms
Speed: 3.8ms preprocess, 47.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 players, 47.8ms
Speed: 3.7ms preprocess, 47.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 47.7ms
Speed: 2.2ms preprocess, 47.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 47.8ms
Speed: 2.2ms preprocess, 47.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 players, 1 referee, 41.3ms
Speed

In [6]:
import cv2
import os

def frames_to_video(frames_dir, output_video, fps=30):
    frame_files = sorted([f for f in os.listdir(frames_dir) if f.endswith(".jpg")])
    if not frame_files:
        print("No frames found.")
        return
    
    first_frame = cv2.imread(os.path.join(frames_dir, frame_files[0]))
    height, width, _ = first_frame.shape

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

    for fname in frame_files:
        frame = cv2.imread(os.path.join(frames_dir, fname))
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video}")

if __name__ == "__main__":
    frames_to_video("/kaggle/working/outputs/broadcast_detections", "broadcast_detected.mp4")
    frames_to_video("/kaggle/working/outputs/tacticam_detections", "tacticam_detected.mp4")


Video saved to broadcast_detected.mp4
Video saved to tacticam_detected.mp4


In [7]:
!pip install filterpy lap scikit-image
# pip install filterpy lap scikit-image


Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.0/178.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lap
  Downloading lap-0.5.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)
Downloading lap-0.5.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hBuilding wheels for collected packages: filterpy
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py3-none-any.whl size=110460 sha256=2822747981873538794efff186f7099228a07c842706d2c519e31b80af632f9b
  Stored in directory: /root/.cache/pip/wheels/12/dc/3c

In [8]:
!pip install boxmot

Collecting boxmot
  Downloading boxmot-13.0.13-py3-none-any.whl.metadata (13 kB)
Collecting bayesian-optimization>=2.0.4 (from boxmot)
  Downloading bayesian_optimization-3.0.0-py3-none-any.whl.metadata (10 kB)
Collecting ftfy<7.0.0,>=6.1.3 (from boxmot)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting lapx<1.0.0,>=0.5.5 (from boxmot)
  Downloading lapx-0.5.11.post1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Collecting loguru<1.0.0,>=0.7.2 (from boxmot)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting mplcursors>=0.6 (from boxmot)
  Downloading mplcursors-0.6-py3-none-any.whl.metadata (3.0 kB)
Collecting pyqt5>=5.15.11 (from boxmot)
  Downloading PyQt5-5.15.11-cp38-abi3-manylinux_2_17_x86_64.whl.metadata (2.1 kB)
Collecting scikit-learn<2.0.0,>=1.3.0 (from boxmot)
  Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB

# Step2

In [9]:
!pip install scikit-learn==1.2.2 --force-reinstall --no-cache-dir


Collecting scikit-learn==1.2.2
  Downloading scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy>=1.17.3 (from scikit-learn==1.2.2)
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy>=1.3.2 (from scikit-learn==1.2.2)
  Downloading scipy-1.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m250.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.1.1 (from scikit-learn==1.2.2)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn==1.2.2)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014

In [10]:
import cv2
import numpy as np
import torch
print("cv2:", cv2.__version__)
print("numpy:", np.__version__)
print("torch:", torch.__version__)
from boxmot import BotSort


import inspect
print(inspect.getsource(BotSort.__init__))

cv2: 4.11.0
numpy: 1.26.4
torch: 2.6.0+cu124
    def __init__(
        self,
        reid_weights: Path,
        device: torch.device,
        half: bool,
        per_class: bool = False,
        track_high_thresh: float = 0.5,
        track_low_thresh: float = 0.1,
        new_track_thresh: float = 0.6,
        track_buffer: int = 30,
        match_thresh: float = 0.8,
        proximity_thresh: float = 0.5,
        appearance_thresh: float = 0.25,
        cmc_method: str = "ecc",
        frame_rate=30,
        fuse_first_associate: bool = False,
        with_reid: bool = True,
    ):
        super().__init__(per_class=per_class)
        self.lost_stracks = []  # type: list[STrack]
        self.removed_stracks = []  # type: list[STrack]
        BaseTrack.clear_count()

        self.per_class = per_class
        self.track_high_thresh = track_high_thresh
        self.track_low_thresh = track_low_thresh
        self.new_track_thresh = new_track_thresh
        self.match_thresh = match_th

In [11]:
import yaml
import os

# Define BoT-SORT configuration for boxmot
botsort_config = {
    "track_high_thresh": {"default": 0.5},
    "track_low_thresh": {"default": 0.1},
    "new_track_thresh": {"default": 0.6},
    "track_buffer": {"default": 30},
    "match_thresh": {"default": 0.8},
    "proximity_thresh": {"default": 0.5},
    "appearance_thresh": {"default": 0.25},
    "with_reid": {"default": False},
    "frame_rate": {"default": 30.0}
}

config_path = "/kaggle/working/botsort.yaml"
with open(config_path, "w") as f:
    yaml.dump(botsort_config, f)

print(f"Configuration saved to {config_path}")

Configuration saved to /kaggle/working/botsort.yaml


In [12]:

import cv2
import os
import numpy as np
from boxmot import create_tracker
import torch

def run_botsort(video_path, detection_dir, output_img_dir, output_txt_dir, model_name):
    os.makedirs(output_img_dir, exist_ok=True)
    os.makedirs(output_txt_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    frame_idx = 0

    tracker = create_tracker(
        tracker_type="botsort",
        tracker_config="/kaggle/working/botsort.yaml",  
        device="cuda:0" if torch.cuda.is_available() else "cpu"
    )

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        txt_file = os.path.join(detection_dir, f"{model_name}_frame_{frame_idx:05d}.txt")
        detections = []
        if os.path.exists(txt_file):
            with open(txt_file, "r") as f:
                for line in f:
                    parts = list(map(float, line.strip().split(',')))
                    detections.append(parts[1:6])  # [x1, y1, x2, y2, conf]

        detections = np.array(detections) if detections else np.empty((0, 5))
        dummy_class_ids = np.zeros((detections.shape[0], 1))
        detections_input = np.hstack((detections, dummy_class_ids)) if detections.size > 0 else np.empty((0, 6))

        tracks = tracker.update(detections_input, frame)
        frame_save_path = os.path.join(output_img_dir, f"{frame_idx:05d}.jpg")
        txt_save_path = os.path.join(output_txt_dir, f"{frame_idx:05d}.txt")

        with open(txt_save_path, "w") as f:
            for track in tracks:
                x1, y1, x2, y2, track_id = map(int, track[:5])
                conf = track[4] if len(track) > 5 else 1.0
                class_id = 0  
                f.write(f"{x1},{y1},{x2},{y2},{conf:.4f},{class_id},{track_id}\n")

                
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                cv2.putText(frame, f"ID {track_id}", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        cv2.imwrite(frame_save_path, frame)
        frame_idx += 1

    cap.release()
    print(f" Finished tracking {model_name}")


run_botsort(
    video_path="/kaggle/working/broadcast_detected.mp4",
    detection_dir="/kaggle/working/outputs/broadcast_detections",
    output_img_dir="/kaggle/working/outputs/broadcast_tracks",
    output_txt_dir="/kaggle/working/outputs/broadcast_tracks/track_data",
    model_name="broadcast"
)

run_botsort(
    video_path="/kaggle/working/tacticam_detected.mp4",
    detection_dir="/kaggle/working/outputs/tacticam_detections",
    output_img_dir="/kaggle/working/outputs/tacticam_tracks",
    output_txt_dir="/kaggle/working/outputs/tacticam_tracks/track_data",
    model_name="tacticam"
)


✅ Finished tracking broadcast
✅ Finished tracking tacticam


In [13]:
frames_to_video("outputs/broadcast_tracks", "broadcast_tracked.mp4")
frames_to_video("outputs/tacticam_tracks", "tacticam_tracked.mp4")


Video saved to broadcast_tracked.mp4
Video saved to tacticam_tracked.mp4


In [14]:
!pip install ftfy regex tqdm transformers torchvision
!pip install git+https://github.com/openai/CLIP.git


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-fnyz9mya
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-fnyz9mya
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=4918a63bff61055dea330735373f7bb417d949f772fcc132d1d0f5813078be5f
  Stored in directory: /tmp/pip-ephem-wheel-cache-0_6g0qtc/wheels/3f/7c/a4/9b490845988bf7a4db33674d52f709f088f64392063872eb9a
Successfully built clip
Installing collected packages: clip
Successfully installed clip-1.0


# Step 3

# clip + hungarian

In [15]:
import clip
import torch
from PIL import Image
import os
import cv2
import numpy as np
from collections import defaultdict
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


track_dir = "/kaggle/working/outputs/broadcast_tracks"
box_txt_dir = "/kaggle/working/outputs/broadcast_tracks/track_data"
embedding_save_path = "broadcast_clip_embeddings.npy"

embedding_db = defaultdict(list)

for frame_file in tqdm(sorted(os.listdir(track_dir))):
    if not frame_file.endswith(".jpg"):
        continue
    frame_id = int(frame_file.split(".")[0])
    frame_path = os.path.join(track_dir, frame_file)
    box_txt = os.path.join(box_txt_dir, f"{frame_id:05d}.txt")

    if not os.path.exists(box_txt):
        continue

    frame = cv2.imread(frame_path)
    with open(box_txt, "r") as f:
        for line in f:
            x1, y1, x2, y2, conf, class_id, track_id = map(float, line.strip().split(","))
            x1, y1, x2, y2, track_id = map(int, [x1, y1, x2, y2, track_id])
            crop = frame[y1:y2, x1:x2]

            if crop.shape[0] == 0 or crop.shape[1] == 0:
                continue  # skip invalid crops

            crop_pil = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
            image_input = preprocess(crop_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = model.encode_image(image_input)
                embedding /= embedding.norm(dim=-1, keepdim=True)

            embedding_db[track_id].append(embedding.cpu().numpy())


player_features = {
    tid: np.mean(np.vstack(embeds), axis=0)
    for tid, embeds in embedding_db.items()
}

np.save(embedding_save_path, player_features)
print(f"Saved embeddings to {embedding_save_path}")




100%|███████████████████████████████████████| 338M/338M [00:04<00:00, 83.0MiB/s]
100%|██████████| 133/133 [00:26<00:00,  5.04it/s]

Saved embeddings to broadcast_clip_embeddings.npy





In [16]:
import clip
import torch
from PIL import Image
import os
import cv2
import numpy as np
from collections import defaultdict
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

track_dir = "/kaggle/working/outputs/tacticam_tracks"
box_txt_dir = "/kaggle/working/outputs/tacticam_tracks/track_data"
embedding_save_path = "tacticam_clip_embeddings.npy"

embedding_db = defaultdict(list)

for frame_file in tqdm(sorted(os.listdir(track_dir))):
    if not frame_file.endswith(".jpg"):
        continue
    frame_id = int(frame_file.split(".")[0])
    frame_path = os.path.join(track_dir, frame_file)
    box_txt = os.path.join(box_txt_dir, f"{frame_id:05d}.txt")

    if not os.path.exists(box_txt):
        continue

    frame = cv2.imread(frame_path)
    with open(box_txt, "r") as f:
        for line in f:
            x1, y1, x2, y2, conf, class_id, track_id = map(float, line.strip().split(","))
            x1, y1, x2, y2, track_id = map(int, [x1, y1, x2, y2, track_id])
            crop = frame[y1:y2, x1:x2]

            if crop.shape[0] == 0 or crop.shape[1] == 0:
                continue  

            crop_pil = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
            image_input = preprocess(crop_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = model.encode_image(image_input)
                embedding /= embedding.norm(dim=-1, keepdim=True)

            embedding_db[track_id].append(embedding.cpu().numpy())


player_features = {
    tid: np.mean(np.vstack(embeds), axis=0)
    for tid, embeds in embedding_db.items()
}


np.save(embedding_save_path, player_features)
print(f"Saved embeddings to {embedding_save_path}")




100%|██████████| 202/202 [01:18<00:00,  2.57it/s]

Saved embeddings to tacticam_clip_embeddings.npy





In [17]:
import numpy as np
from scipy.optimize import linear_sum_assignment
from sklearn.metrics.pairwise import cosine_similarity


broadcast_embeds = np.load("broadcast_clip_embeddings.npy", allow_pickle=True).item()
tacticam_embeds = np.load("tacticam_clip_embeddings.npy", allow_pickle=True).item()

broadcast_ids = list(broadcast_embeds.keys())
tacticam_ids = list(tacticam_embeds.keys())


broadcast_matrix = np.stack([broadcast_embeds[bid] for bid in broadcast_ids])
tacticam_matrix = np.stack([tacticam_embeds[tid] for tid in tacticam_ids])


sim_matrix = cosine_similarity(tacticam_matrix, broadcast_matrix) 
cost_matrix = -sim_matrix

row_ind, col_ind = linear_sum_assignment(cost_matrix)
mapping = {}
for t_idx, b_idx in zip(row_ind, col_ind):
    tacticam_id = tacticam_ids[t_idx]
    broadcast_id = broadcast_ids[b_idx]
    score = sim_matrix[t_idx, b_idx]
    mapping[tacticam_id] = (broadcast_id, float(score))

# Show results
print("\n Re-ID Mapping Tacticam → Broadcast:")
for t_id, (b_id, score) in mapping.items():
    print(f"  Tacticam ID {t_id} ↔ Broadcast ID {b_id} (similarity: {score:.4f})")

# Save mapping
np.save("tacticam_to_broadcast_id_map.npy", mapping)



🔁 Re-ID Mapping Tacticam → Broadcast:
  Tacticam ID 1 ↔ Broadcast ID 10 (similarity: 0.9207)
  Tacticam ID 2 ↔ Broadcast ID 20 (similarity: 0.9549)
  Tacticam ID 3 ↔ Broadcast ID 6 (similarity: 0.9464)
  Tacticam ID 4 ↔ Broadcast ID 2 (similarity: 0.8605)
  Tacticam ID 5 ↔ Broadcast ID 3 (similarity: 0.9161)
  Tacticam ID 7 ↔ Broadcast ID 8 (similarity: 0.9592)
  Tacticam ID 8 ↔ Broadcast ID 4 (similarity: 0.9481)
  Tacticam ID 9 ↔ Broadcast ID 12 (similarity: 0.9521)
  Tacticam ID 10 ↔ Broadcast ID 55 (similarity: 0.8990)
  Tacticam ID 11 ↔ Broadcast ID 46 (similarity: 0.9757)
  Tacticam ID 12 ↔ Broadcast ID 13 (similarity: 0.9153)
  Tacticam ID 13 ↔ Broadcast ID 25 (similarity: 0.9643)
  Tacticam ID 14 ↔ Broadcast ID 64 (similarity: 0.9326)
  Tacticam ID 15 ↔ Broadcast ID 61 (similarity: 0.9702)
  Tacticam ID 16 ↔ Broadcast ID 41 (similarity: 0.9505)
  Tacticam ID 17 ↔ Broadcast ID 5 (similarity: 0.9104)
  Tacticam ID 19 ↔ Broadcast ID 9 (similarity: 0.9704)
  Tacticam ID 20 ↔ Broad

In [18]:
!apt-get update
!apt-get install -y tesseract-ocr
!pip install pytesseract


Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,628 B]                
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]                                
Get:5 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [79.8 kB]                 
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease                                              
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,801 kB]
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]                             
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]                           
Get:10 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [4,587 kB]        
Get:11 https://

In [19]:
import os
import cv2
import numpy as np
import pytesseract
import re
from tqdm import tqdm
from ultralytics import YOLO
custom_config = r'--oem 3 --psm 6 outputbase digits'


yolo = YOLO('yolov8n.pt')

def run_ocr_on_crop(crop_img):
    text = pytesseract.image_to_string(crop_img, config=custom_config)
    numbers = re.findall(r'\b\d{1,3}\b', text)
    return [int(num) for num in numbers]


def crop_upper_body(image, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    height = y2 - y1
    upper_y2 = y1 + height // 2
    return image[y1:upper_y2, x1:x2]


def preprocess_crop(crop):
    resized = cv2.resize(crop, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
    yuv = cv2.cvtColor(resized, cv2.COLOR_BGR2YUV)
    yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])
    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)

def extract_numbers_with_ids(folder_path, label):
    ocr_dict = {}
    image_files = sorted(os.listdir(folder_path))

    for fname in tqdm(image_files, desc=f"Processing {label}"):
        fpath = os.path.join(folder_path, fname)
        frame = cv2.imread(fpath)
        if frame is None:
            continue

        results = yolo(frame, verbose=False)[0]
        boxes = results.boxes.xyxy.cpu().numpy()
        ids = results.boxes.id.cpu().numpy() if results.boxes.id is not None else None

        if ids is None:
            continue

        for box, track_id in zip(boxes, ids):
            crop = crop_upper_body(frame, box)
            crop = preprocess_crop(crop)
            numbers = run_ocr_on_crop(crop)
            if numbers:
                ocr_dict[int(track_id)] = numbers[0]

    return ocr_dict


broadcast_path = "/kaggle/working/outputs/broadcast_tracks"
tacticam_path = "/kaggle/working/outputs/tacticam_tracks"

broadcast_ocr = extract_numbers_with_ids(broadcast_path, "broadcast")
tacticam_ocr = extract_numbers_with_ids(tacticam_path, "tacticam")

np.save("broadcast_jersey_ocr_dict.npy", broadcast_ocr)
np.save("tacticam_jersey_ocr_dict.npy", tacticam_ocr)


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 85.7MB/s]
Processing broadcast: 100%|██████████| 133/133 [00:03<00:00, 40.94it/s]
Processing tacticam: 100%|██████████| 202/202 [00:04<00:00, 47.16it/s]


In [20]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.optimize import linear_sum_assignment

broadcast_embeds = np.load("broadcast_clip_embeddings.npy", allow_pickle=True).item()
tacticam_embeds = np.load("tacticam_clip_embeddings.npy", allow_pickle=True).item()

broadcast_ocr = np.load("broadcast_jersey_ocr_dict.npy", allow_pickle=True).item()
tacticam_ocr = np.load("tacticam_jersey_ocr_dict.npy", allow_pickle=True).item()

broadcast_ids = sorted(broadcast_embeds.keys(), key=int)
tacticam_ids = sorted(tacticam_embeds.keys(), key=int)

broadcast_matrix = np.stack([broadcast_embeds[bid] for bid in broadcast_ids])
tacticam_matrix = np.stack([tacticam_embeds[tid] for tid in tacticam_ids])

sim_matrix = cosine_similarity(tacticam_matrix, broadcast_matrix)

ocr_bonus = 0.10
ocr_penalty = 0.10

for i, t_id in enumerate(tacticam_ids):
    t_num = tacticam_ocr.get(int(t_id), None)
    for j, b_id in enumerate(broadcast_ids):
        b_num = broadcast_ocr.get(int(b_id), None)

        if t_num is None or b_num is None:
            continue

        if t_num == b_num:
            sim_matrix[i, j] += ocr_bonus
        else:
            sim_matrix[i, j] -= ocr_penalty

cost_matrix = -sim_matrix
row_ind, col_ind = linear_sum_assignment(cost_matrix)

mapping = {}
for t_idx, b_idx in zip(row_ind, col_ind):
    t_id = tacticam_ids[t_idx]
    b_id = broadcast_ids[b_idx]
    score = sim_matrix[t_idx, b_idx]
    mapping[t_id] = (b_id, float(score))


print("\nRefined Re-ID Mapping with OCR Constraint (Tacticam → Broadcast):")
for t_id, (b_id, score) in mapping.items():
    print(f"  Tacticam ID {t_id} ↔ Broadcast ID {b_id} (score: {score:.4f})")

np.save("refined_tacticam_to_broadcast_id_map.npy", mapping)



🔁 Refined Re-ID Mapping with OCR Constraint (Tacticam → Broadcast):
  Tacticam ID 1 ↔ Broadcast ID 10 (score: 0.9207)
  Tacticam ID 2 ↔ Broadcast ID 20 (score: 0.9549)
  Tacticam ID 3 ↔ Broadcast ID 6 (score: 0.9464)
  Tacticam ID 4 ↔ Broadcast ID 2 (score: 0.8605)
  Tacticam ID 5 ↔ Broadcast ID 3 (score: 0.9161)
  Tacticam ID 7 ↔ Broadcast ID 8 (score: 0.9592)
  Tacticam ID 8 ↔ Broadcast ID 4 (score: 0.9481)
  Tacticam ID 9 ↔ Broadcast ID 12 (score: 0.9521)
  Tacticam ID 10 ↔ Broadcast ID 55 (score: 0.8990)
  Tacticam ID 11 ↔ Broadcast ID 46 (score: 0.9757)
  Tacticam ID 12 ↔ Broadcast ID 13 (score: 0.9153)
  Tacticam ID 13 ↔ Broadcast ID 25 (score: 0.9643)
  Tacticam ID 14 ↔ Broadcast ID 64 (score: 0.9326)
  Tacticam ID 15 ↔ Broadcast ID 61 (score: 0.9702)
  Tacticam ID 16 ↔ Broadcast ID 41 (score: 0.9505)
  Tacticam ID 17 ↔ Broadcast ID 5 (score: 0.9104)
  Tacticam ID 19 ↔ Broadcast ID 9 (score: 0.9704)
  Tacticam ID 20 ↔ Broadcast ID 1 (score: 0.9386)
  Tacticam ID 21 ↔ Broadcast 