## code to read frames from tacticam file and generate the bounding box for set of frames

In [17]:
from ultralytics import YOLO
import cv2
import os

#defin the model and video
model = YOLO('model/best.pt')

video_path = "dataset/tacticam.mp4"
cap = cv2.VideoCapture(video_path)

In [18]:
if not cap.isOpened():
    print("no capture found")
else:
    print("capture found")

capture found


In [19]:
# creating output dir
output_dir = "saved_output"
os.makedirs(output_dir, exist_ok=True)


In [20]:
#set threshold to process
max_frames = 5
frame_count = 0

In [21]:
## read the frames and process the boxes
while frame_count<max_frames:
    ret, frame = cap.read()

    #checking if we have frame
    if not ret:
        print("End of frames reacherd")
        break

    #run inference on model 
    output = model.predict(source=frame, conf=0.50, save = False, verbose=False)
    output = output[0]

    #get the annotated frame with labels
    annotated_frame = output.plot()
    
    #save the frame
    save_frame_dir = os.path.join(output_dir, f"frame_{frame_count}")
    os.makedirs(save_frame_dir, exist_ok=True)
    save_frame_obj_dir = os.path.join(save_frame_dir, f"frame_{frame_count}.jpg")
    cv2.imwrite(save_frame_obj_dir, annotated_frame)
    print(f"saved frame to : {save_frame_obj_dir}")

    #save each player crop
    boxes = output.boxes
    if boxes is not None:
        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls[0].item())
            class_name = model.names[cls_id]

            if class_name.lower() == "player":  
                crop = frame[y1:y2, x1:x2]
                player_path = os.path.join(save_frame_dir, f"player_{i+1}.jpg")
                cv2.imwrite(player_path, crop)
                print(f"Saved player crop: {player_path}")

    frame_count += 1

cap.release()


saved frame to : saved_output/frame_0/frame_0.jpg
Saved player crop: saved_output/frame_0/player_1.jpg
Saved player crop: saved_output/frame_0/player_2.jpg
Saved player crop: saved_output/frame_0/player_3.jpg
Saved player crop: saved_output/frame_0/player_4.jpg
Saved player crop: saved_output/frame_0/player_5.jpg
Saved player crop: saved_output/frame_0/player_6.jpg
Saved player crop: saved_output/frame_0/player_7.jpg
Saved player crop: saved_output/frame_0/player_8.jpg
Saved player crop: saved_output/frame_0/player_9.jpg
Saved player crop: saved_output/frame_0/player_10.jpg
Saved player crop: saved_output/frame_0/player_11.jpg
Saved player crop: saved_output/frame_0/player_12.jpg
Saved player crop: saved_output/frame_0/player_13.jpg
Saved player crop: saved_output/frame_0/player_14.jpg
Saved player crop: saved_output/frame_0/player_15.jpg
Saved player crop: saved_output/frame_0/player_16.jpg
Saved player crop: saved_output/frame_0/player_17.jpg
Saved player crop: saved_output/frame_0/p

In [29]:
import os
import cv2
from ultralytics import YOLO

# Load the model
model = YOLO("model/best.pt")

# Open the video
video_path = "dataset/tacticam.mp4"
output_dir = "saved_frames_tacticam"
# video_path = "dataset/broadcast.mp4"
# output_dir = "saved_frames_broadcast"
cap = cv2.VideoCapture(video_path)

os.makedirs(output_dir, exist_ok=True)

frame_count = 0
max_frames = 5
font = cv2.FONT_HERSHEY_SIMPLEX

while frame_count < max_frames:
    ret, frame = cap.read()
    if not ret:
        print("End of frames reached.")
        break

    # Run YOLO inference
    output = model.predict(source=frame, conf=0.75, save=False, verbose=False)[0]

    # Prepare to manually draw on frame
    annotated_frame = frame.copy()

    boxes = output.boxes
    player_id = 0  # Start from 0 for each frame

    # Prepare directories
    save_frame_dir = os.path.join(output_dir, f"frame_{frame_count}")
    os.makedirs(save_frame_dir, exist_ok=True)

    if boxes is not None:
        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls[0].item())
            class_name = model.names[cls_id]

            if class_name.lower() == "player":
                # Draw bounding box and custom player ID
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                conf_score = float(box.conf[0])
                label_text = f"ID: {player_id}, Conf: {conf_score:.2f}"
                cv2.putText(annotated_frame, label_text, (x1, y1 - 10),
                            font, 0.6, (0, 255, 0), 2)

                # Save player crop
                crop = frame[y1:y2, x1:x2]
                player_path = os.path.join(save_frame_dir, f"player_{player_id}.jpg")
                cv2.imwrite(player_path, crop)
                print(f"Saved player crop: {player_path}")

                player_id += 1

    # Save annotated frame with custom IDs
    save_frame_path = os.path.join(save_frame_dir, f"frame_{frame_count}.jpg")
    cv2.imwrite(save_frame_path, annotated_frame)
    print(f"Saved frame with player IDs: {save_frame_path}")

    frame_count += 1

cap.release()

Saved player crop: saved_frames_tacticam/frame_0/player_0.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_1.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_2.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_3.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_4.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_5.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_6.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_7.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_8.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_9.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_10.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_11.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_12.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_13.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_14.jpg
Saved player crop: saved_frames_tacticam/frame_0/player_15.jpg
Sa

In [None]:
import os
import cv2
import torch
import numpy as np
import torch.nn.functional as F
from ultralytics import YOLO
from torchreid.utils import FeatureExtractor

# Load YOLO model
model = YOLO("model/best.pt")

# Load ReID model
extractor = FeatureExtractor(
    model_name='osnet_x1_0',
    model_path='osnet_x1_0_market1501.pth',
    device='cpu'  # Change to 'cuda' if using GPU
)

# Config
video_path = "dataset/tacticam.mp4"
output_dir = "saved_frames_tacticam_1"
cap = cv2.VideoCapture(video_path)

os.makedirs(output_dir, exist_ok=True)

# Variables
frame_count = 0
max_frames = 5
font = cv2.FONT_HERSHEY_SIMPLEX
threshold = 0.8  # similarity threshold

# Store known player embeddings and their unique IDs
embed_array = []  # list of (embedding tensor, player_id)
global_player_id = 0

while frame_count < max_frames:
    ret, frame = cap.read()
    if not ret:
        print("End of frames reached.")
        break

    output = model.predict(source=frame, conf=0.75, save=False, verbose=False)[0]
    annotated_frame = frame.copy()
    boxes = output.boxes

    # Frame directory
    save_frame_dir = os.path.join(output_dir, f"frame_{frame_count}")
    os.makedirs(save_frame_dir, exist_ok=True)

    if boxes is not None:
        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls[0].item())
            class_name = model.names[cls_id]

            if class_name.lower() == "player":
                # Crop player
                crop = frame[y1:y2, x1:x2]
                crop_rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
                emb = extractor(crop_rgb)
                emb = F.normalize(emb, p=2, dim=1)  # normalize embedding

                matched_id = None
                max_sim = 0

                for saved_emb, pid in embed_array:
                    sim = torch.mm(emb, saved_emb.t()).item()
                    if sim > max_sim and sim > threshold:
                        max_sim = sim
                        matched_id = pid

                if matched_id is not None:
                    # Update embedding (simple average for now)
                    new_emb = (emb + [e for e, id in embed_array if id == matched_id][0]) / 2
                    new_emb = F.normalize(new_emb, p=2, dim=1)
                    for idx, (e, id) in enumerate(embed_array):
                        if id == matched_id:
                            embed_array[idx] = (new_emb, matched_id)
                            break
                    player_id = matched_id
                else:
                    # New player
                    player_id = global_player_id
                    embed_array.append((emb, player_id))
                    global_player_id += 1

                # Annotate
                conf_score = float(box.conf[0])
                label_text = f"ID: {player_id}, Conf: {conf_score:.2f}"
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(annotated_frame, label_text, (x1, y1 - 10),
                            font, 0.6, (0, 255, 0), 2)

                # Save player crop
                player_path = os.path.join(save_frame_dir, f"player_{player_id}.jpg")
                cv2.imwrite(player_path, crop)
                print(f"Saved player {player_id} crop: {player_path}")

    # Save annotated frame
    save_frame_path = os.path.join(save_frame_dir, f"frame_{frame_count}.jpg")
    cv2.imwrite(save_frame_path, annotated_frame)
    print(f"Saved annotated frame: {save_frame_path}")

    frame_count += 1

cap.release()

Successfully loaded imagenet pretrained weights from "/Users/vivek/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Model: osnet_x1_0
- params: 2,193,616
- flops: 978,878,352
🧍 Saved player 0 crop: saved_frames_tacticam_1/frame_0/player_0.jpg
🧍 Saved player 1 crop: saved_frames_tacticam_1/frame_0/player_1.jpg
🧍 Saved player 2 crop: saved_frames_tacticam_1/frame_0/player_2.jpg
🧍 Saved player 3 crop: saved_frames_tacticam_1/frame_0/player_3.jpg
🧍 Saved player 0 crop: saved_frames_tacticam_1/frame_0/player_0.jpg
🧍 Saved player 4 crop: saved_frames_tacticam_1/frame_0/player_4.jpg
🧍 Saved player 1 crop: saved_frames_tacticam_1/frame_0/player_1.jpg
🧍 Saved player 1 crop: saved_frames_tacticam_1/frame_0/player_1.jpg
🧍 Saved player 1 crop: saved_frames_tacticam_1/frame_0/player_1.jpg
🧍 Saved player 5 crop: saved_frames_tacticam_1/frame_0/player_5.jpg
🧍 Saved player 6 crop: saved

## incorparting spatial distance in corrosponsig bouding box at i frame and i-1 frame for similiary search

In [None]:
import os
import cv2
import torch
import numpy as np
from ultralytics import YOLO
from torchreid.utils import FeatureExtractor
import torch.nn.functional as F

# Load the model
model = YOLO("model/best.pt")

# Load the ReID extractor
extractor = FeatureExtractor(
    model_name='osnet_x1_0',
    model_path='osnet_x1_0_market1501.pth',
    device='cpu'
)

# Video settings
video_path = "dataset/tacticam.mp4"
output_dir = "saved_frames_tacticam_2"
cap = cv2.VideoCapture(video_path)
os.makedirs(output_dir, exist_ok=True)

frame_count = 0
max_frames = 5
font = cv2.FONT_HERSHEY_SIMPLEX

# Tracking database
embed_array = []  # stores (embedding, player_id, (cx, cy))
global_player_id = 0

while frame_count < max_frames:
    ret, frame = cap.read()
    if not ret:
        print("End of frames reached.")
        break

    # Run YOLO inference
    output = model.predict(source=frame, conf=0.75, save=False, verbose=False)[0]

    # Prepare to manually draw on frame
    annotated_frame = frame.copy()

    boxes = output.boxes

    # Prepare directories
    save_frame_dir = os.path.join(output_dir, f"frame_{frame_count}")
    os.makedirs(save_frame_dir, exist_ok=True)

    if boxes is not None:
        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls[0].item())
            class_name = model.names[cls_id]

            if class_name.lower() == "player":
                crop = frame[y1:y2, x1:x2]
                crop_rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)

                # Extract embedding
                emb = extractor(crop_rgb)
                emb = F.normalize(emb, p=2, dim=1)

                # Compute center of bounding box
                cx, cy = (x1 + x2) / 2, (y1 + y2) / 2

                matched_id = None
                best_score = 0

                for saved_emb, pid, (prev_cx, prev_cy) in embed_array:
                    sim = torch.mm(emb, saved_emb.t()).item()
                    spatial_dist = np.sqrt((cx - prev_cx) ** 2 + (cy - prev_cy) ** 2)
                    box_diag = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
                    norm_dist = spatial_dist / (box_diag + 1e-6)
                    final_score = (sim * 6 + (1 - norm_dist)) / 7

                    if final_score > best_score and final_score > 0.7:
                        best_score = final_score
                        matched_id = pid

                if matched_id is not None:
                    for idx, (e, pid, pos) in enumerate(embed_array):
                        if pid == matched_id:
                            new_emb = F.normalize(e * 0.6 + emb * 0.4, p=2, dim=1)
                            embed_array[idx] = (new_emb, matched_id, (cx, cy))
                            break
                    player_id = matched_id
                else:
                    player_id = global_player_id
                    embed_array.append((emb, player_id, (cx, cy)))
                    global_player_id += 1

                # Draw box and ID
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                conf_score = float(box.conf[0])
                label_text = f"ID: {player_id}, Conf: {conf_score:.2f}"
                cv2.putText(annotated_frame, label_text, (x1, y1 - 10), font, 0.6, (0, 255, 0), 2)

                # Save crop
                player_path = os.path.join(save_frame_dir, f"player_{player_id}.jpg")
                cv2.imwrite(player_path, crop)
                print(f"Saved player crop: {player_path}")

    # Save annotated frame
    save_frame_path = os.path.join(save_frame_dir, f"frame_{frame_count}.jpg")
    cv2.imwrite(save_frame_path, annotated_frame)
    print(f"Saved frame with player IDs: {save_frame_path}")

    frame_count += 1

cap.release()