In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install ultralytics opencv-python-headless torchvision scikit-learn



In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from sklearn.metrics.pairwise import cosine_similarity
import os


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from sklearn.metrics.pairwise import cosine_similarity
import os

def process_video_with_tracking(model_path, video_path, output_path, conf_thresh=0.3):
    """Runs YOLOv11 with tracking on the input video, saves output, and returns track embeddings."""
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    id_to_vec = {}  # mapping: local ID -> appearance vector
    id_to_last_bbox = {}  # for drawing labels

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        results = model.track(frame, persist=True, conf=conf_thresh, tracker='bytetrack.yaml', verbose=False)  # or 'botsort.yaml'
        boxes = results[0].boxes
        if boxes:
            for b in boxes:
                if b.id is not None and b.xyxy is not None and len(b.xyxy[0]) == 4:
                    player_id = int(b.id)
                    x1, y1, x2, y2 = int(b.xyxy[0][0]), int(b.xyxy[0][1]), int(b.xyxy[0][2]), int(b.xyxy[0][3])
                    crop = frame[y1:y2, x1:x2]
                    if crop.size == 0: continue
                    color_hist = cv2.calcHist([crop], [0, 1, 2], None, [8, 8, 8], [0,256,0,256,0,256]).flatten()
                    color_hist = color_hist / (np.linalg.norm(color_hist) + 1e-8)

                    if player_id not in id_to_vec:
                        id_to_vec[player_id] = color_hist
                    else:
                        id_to_vec[player_id] = 0.8*id_to_vec[player_id] + 0.2*color_hist
                    id_to_last_bbox[player_id] = (x1, y1, x2, y2)

                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
                    cv2.putText(frame, f"ID {player_id}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
        out.write(frame)

    cap.release()
    out.release()
    return id_to_vec

In [None]:
MODEL_PATH = '/content/drive/MyDrive/Computer Vision/best.pt'
VIDEO1_PATH = '/content/drive/MyDrive/Computer Vision/broadcast.mp4'
VIDEO2_PATH = '/content/drive/MyDrive/Computer Vision/tacticam.mp4'
OUTPUT1_PATH = '/content/drive/MyDrive/Computer Vision/annotated_broadcast.mp4'
OUTPUT2_PATH = '/content/drive/MyDrive/Computer Vision/annotated_tacticam.mp4'


In [None]:
# Collect feature vectors
ids1, feats1 = list(vecs_1.keys()), np.array(list(vecs_1.values()))
ids2, feats2 = list(vecs_2.keys()), np.array(list(vecs_2.values()))

# Compute similarity and assign consistent IDs
sim_matrix = cosine_similarity(feats2, feats1)
row_ind = np.argmax(sim_matrix, axis=1)

# Mapping: tacticam local id to broadcast local id
id_map = {ids2[i]: ids1[row_ind[i]] for i in range(len(ids2))}
print("ID Map tacticam->broadcast:", id_map)


ID Map tacticam->broadcast: {1: 20, 2: 18, 3: 20, 4: 167, 5: 19, 6: 13, 7: 19, 8: 23, 9: 16, 10: 129, 11: 129, 12: 23, 13: 15, 14: 167, 15: 102, 16: 20, 17: 22, 18: 129, 19: 18, 20: 18, 21: 20, 22: 156, 23: 24, 24: 156, 31: 45, 34: 45, 41: 166, 45: 28, 46: 24, 54: 14, 59: 129, 61: 28, 63: 167, 64: 129, 72: 14, 79: 166, 85: 156, 87: 156, 88: 30, 93: 156, 98: 156, 102: 156, 107: 20, 109: 30, 116: 129, 119: 46, 121: 14}


In [30]:
def relabel_video_with_consistent_ids(model_path, input_video, output_video, id_map=None, is_tacticam=False, conf_thresh=0.3):
    model = YOLO(model_path)
    cap = cv2.VideoCapture(input_video)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model.track(frame, persist=True, conf=conf_thresh, tracker='bytetrack.yaml', verbose=False)
        boxes = results[0].boxes

        if boxes:
            for b in boxes:
                # Check if tracking ID exists
                if b.id is not None:
                    player_id = int(b.id)
                    # If tacticam, remap ID
                    if is_tacticam and player_id in id_map:
                        consistent_id = id_map[player_id]
                    else:
                        consistent_id = player_id
                    # Extract coordinates from the tensor and convert to integers
                    x1, y1, x2, y2 = int(b.xyxy[0][0]), int(b.xyxy[0][1]), int(b.xyxy[0][2]), int(b.xyxy[0][3])
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
                    cv2.putText(frame, f"ID {consistent_id}", (x1, max(20,y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,0,0), 2)
        out.write(frame)
    cap.release()
    out.release()

FINAL_TACTICAM = '/content/drive/MyDrive/Computer Vision/final_annotated_tacticam.mp4'
FINAL_BROADCAST = '/content/drive/MyDrive/Computer vision/final_annotated_broadcast.mp4'

relabel_video_with_consistent_ids(MODEL_PATH, VIDEO1_PATH, FINAL_BROADCAST, None, is_tacticam=False)
relabel_video_with_consistent_ids(MODEL_PATH, VIDEO2_PATH, FINAL_TACTICAM, id_map, is_tacticam=True)