In [4]:
from ultralytics import YOLO
import cv2
import os

# Load the YOLOv8 model
model = YOLO("models/yolo_players.pt")

# Load the video
video_path = "videos/15sec_input_720p.mp4"
cap = cv2.VideoCapture(video_path)

# Create results folder if not exists
os.makedirs("results", exist_ok=True)

# Video writer to save output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("results/output_detected.mp4", fourcc, 30, (1280, 720))

# Loop through video frames
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 detection
    results = model(frame)

    # Draw results
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls_id = int(box.cls[0])
            conf = float(box.conf[0])

            if cls_id == 0:  # Class 0 = player
                label = f'Player {conf:.2f}'
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.6, (255, 255, 255), 2)

    out.write(frame)
    cv2.imshow("Detected Players", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 1 ball, 16 players, 2 referees, 656.5ms
Speed: 16.0ms preprocess, 656.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 634.2ms
Speed: 3.6ms preprocess, 634.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 617.6ms
Speed: 2.4ms preprocess, 617.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 617.4ms
Speed: 3.5ms preprocess, 617.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 611.3ms
Speed: 2.0ms preprocess, 611.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 612.9ms
Speed: 3.2ms preprocess, 612.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 627.7ms
Speed: 3.9ms preprocess, 627.7ms inference, 1.3ms postprocess p

In [19]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
from sort.sort import Sort  # Import SORT tracker
from sklearn.metrics.pairwise import cosine_similarity
import cv2

def get_histogram(frame, x1, y1, x2, y2):
    crop = frame[int(y1):int(y2), int(x1):int(x2)]
    if crop.size == 0:
        return None

    crop = cv2.resize(crop, (64, 128))
    hist = cv2.calcHist([crop], [0, 1, 2], None, [8, 8, 8],
                        [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist
known_players = {}     # player_id: color histogram
id_map = {}            # SORT ID → Consistent ID
next_id = 1            # Starts at Player 1

for obj in tracked_objects:
    x1, y1, x2, y2, sort_id = map(int, obj)

    hist = get_histogram(frame, x1, y1, x2, y2)
    assigned_id = None
    best_score = 0.90  # Threshold to accept match

    if hist is not None:
        # Compare with all saved players
        for pid, saved_hist in known_players.items():
            score = cosine_similarity([hist], [saved_hist])[0][0]
            if score > best_score:
                assigned_id = pid
                best_score = score

    # No match found → new player
    if assigned_id is None:
        assigned_id = next_id
        known_players[assigned_id] = hist
        next_id += 1

    id_map[sort_id] = assigned_id

    # Draw final box with consistent player ID
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
    cv2.putText(frame, f'Player {assigned_id}', (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)


# Load YOLOv8 model
model = YOLO("models/yolo_players.pt")

# Load video
video_path = "videos/15sec_input_720p.mp4"
cap = cv2.VideoCapture(video_path)

# Create results folder
os.makedirs("results", exist_ok=True)

# Output video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("results/output_tracked.mp4", fourcc, 30, (1280, 720))

# Initialize SORT tracker
tracker = Sort()

# Process video frame-by-frame
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO detection
    results = model(frame)
    detections = []

    # Extract detections from YOLO results
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].cpu().numpy()
            cls_id = int(box.cls[0].cpu().numpy())

            if cls_id == 0:  # Only track players
                detections.append([x1, y1, x2, y2, conf])

    # Convert to numpy array
    dets = np.array(detections)

 # ...existing code...

    # Convert to numpy array with correct shape
    if len(detections) > 0:
        dets = np.array(detections)
    else:
        dets = np.empty((0, 5))

    # Update tracker
    tracked_objects = tracker.update(dets)

# ...existing code...

    # Draw tracked objects
    for obj in tracked_objects:
        x1, y1, x2, y2, track_id = map(int, obj)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
        cv2.putText(frame, f'Player {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    out.write(frame)
    cv2.imshow("Tracked Players", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
out.release()
out = cv2.VideoWriter("results/output_reid.mp4", fourcc, 30, (1280, 720))
cap.release()
cv2.destroyAllWindows()



0: 384x640 1 ball, 16 players, 2 referees, 572.0ms
Speed: 2.7ms preprocess, 572.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 597.9ms
Speed: 1.9ms preprocess, 597.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 573.4ms
Speed: 1.8ms preprocess, 573.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 573.5ms
Speed: 1.7ms preprocess, 573.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 575.8ms
Speed: 1.6ms preprocess, 575.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 585.9ms
Speed: 1.6ms preprocess, 585.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 603.1ms
Speed: 2.1ms preprocess, 603.1ms inference, 0.9ms postprocess pe

In [26]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
from sort.sort import Sort
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract color histogram (jersey color)
def get_histogram(frame, x1, y1, x2, y2):
    crop = frame[int(y1):int(y2), int(x1):int(x2)]
    if crop.size == 0:
        return None
    crop = cv2.resize(crop, (64, 128))
    hist = cv2.calcHist([crop], [0, 1, 2], None, [8, 8, 8],
                        [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

# Re-ID storage
known_players = {}     # player_id: histogram
id_map = {}            # sort_id → consistent ID
next_id = 1            # Start assigning from Player 1

# Load model
model = YOLO("models/yolo_players.pt")

# Load video
cap = cv2.VideoCapture("videos/15sec_input_720p.mp4")

# Create result folder
os.makedirs("results", exist_ok=True)

# Output video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("results/output_reid.mp4", fourcc, 30, (1280, 720))

# Initialize SORT tracker
tracker = Sort()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO detection
    results = model(frame)
    detections = []

    # Extract bounding boxes of players
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
        conf = box.conf[0].cpu().numpy()
        cls_id = int(box.cls[0].cpu().numpy())

        print("Class ID:", cls_id)  # Optional: for checking

        if cls_id in [2, 3]:  # ✅ Detect only players
            detections.append([x1, y1, x2, y2, conf])



    # Convert to numpy array
    dets = np.array(detections) if len(detections) > 0 else np.empty((0, 5))

    # Update tracker
    tracked_objects = tracker.update(dets)

    # Loop over tracked players and apply Re-ID
    for obj in tracked_objects:
        x1, y1, x2, y2, sort_id = map(int, obj)

        hist = get_histogram(frame, x1, y1, x2, y2)
        assigned_id = None
        best_score = 0.90

        if hist is not None:
            for pid, saved_hist in known_players.items():
                score = cosine_similarity([hist], [saved_hist])[0][0]
                if score > best_score:
                    assigned_id = pid
                    best_score = score

        if assigned_id is None:
            assigned_id = next_id
            known_players[assigned_id] = hist
            next_id += 1

        id_map[sort_id] = assigned_id

        # Draw the box and ID on frame
        cv2.putText(frame, f"Class {cls_id}", (x1, y2 + 15),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
        cv2.putText(frame, f'Player {assigned_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    out.write(frame)
    cv2.imshow("Tracked and Re-ID Players", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 1 ball, 16 players, 2 referees, 598.2ms
Speed: 2.8ms preprocess, 598.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 3

0: 384x640 18 players, 2 referees, 599.9ms
Speed: 2.0ms preprocess, 599.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 2

0: 384x640 1 ball, 16 players, 2 referees, 590.5ms
Speed: 1.7ms preprocess, 590.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 0

0: 384x640 1 ball, 14 players, 2 referees, 589.1ms
Speed: 1.9ms preprocess, 589.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 3

0: 384x640 1 ball, 14 players, 2 referees, 612.1ms
Speed: 1.7ms preprocess, 612.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 0

0: 384x640 1 ball, 16 players, 2 referees, 592.2ms
Speed: 2.2ms preprocess, 592.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Class ID: 0

0: 384x640 15 players, 2 referees, 

KeyboardInterrupt: 

In [None]:
#best code which has worked yet
from ultralytics import YOLO
import cv2
import os
import numpy as np
from sort.sort import Sort
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract color histogram (jersey color)
def get_histogram(frame, x1, y1, x2, y2):
    crop = frame[int(y1):int(y2), int(x1):int(x2)]
    if crop.size == 0:
        return None
    crop = cv2.resize(crop, (64, 128))
    hist = cv2.calcHist([crop], [0, 1, 2], None, [8, 8, 8],
                        [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

# Re-ID and team storage
known_players = {}      # player_id: histogram
player_team = {}        # player_id: 'A' or 'B'
team_refs = []          # Reference histograms for 2 teams
id_map = {}             # sort_id → consistent ID
next_id = 1             # Start from Player 1

# Load YOLO model (trained on soccer data)
model = YOLO("models/yolo_players.pt")

# Load video
cap = cv2.VideoCapture("videos/15sec_input_720p.mp4")

# Create result folder
os.makedirs("results", exist_ok=True)

# Output writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("results/output_reid_teams_ball.mp4", fourcc, 30, (1280, 720))

# Initialize tracker
tracker = Sort()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    detections = []
    ball_coords = None

    # Extract boxes
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].cpu().numpy()
            cls_id = int(box.cls[0].cpu().numpy())

            if cls_id in [2, 3]:  # player
                detections.append([x1, y1, x2, y2, conf])
            elif cls_id == 0:  # ball
                ball_coords = (int((x1 + x2) / 2), int((y1 + y2) / 2))

    dets = np.array(detections) if len(detections) > 0 else np.empty((0, 5))
    tracked_objects = tracker.update(dets)

    for obj in tracked_objects:
        x1, y1, x2, y2, sort_id = map(int, obj)

        hist = get_histogram(frame, x1, y1, x2, y2)
        assigned_id = None
        best_score = 0.90

        if hist is not None:
            for pid, saved_hist in known_players.items():
                score = cosine_similarity([hist], [saved_hist])[0][0]
                if score > best_score:
                    assigned_id = pid
                    best_score = score

        if assigned_id is None:
            assigned_id = next_id
            known_players[assigned_id] = hist

            # Assign team based on color similarity
            if len(team_refs) < 2:
                team_refs.append(hist)
                player_team[assigned_id] = 'A' if len(team_refs) == 1 else 'B'
            else:
                score_A = cosine_similarity([hist], [team_refs[0]])[0][0]
                score_B = cosine_similarity([hist], [team_refs[1]])[0][0]
                player_team[assigned_id] = 'A' if score_A > score_B else 'B'

            next_id += 1

        id_map[sort_id] = assigned_id

        # Get team color
        team = player_team.get(assigned_id, 'A')
        color = (255, 0, 0) if team == 'A' else (0, 0, 255)  # BGR: Blue or Red
        label = f'Player {assigned_id} (Team {team})'

        # Draw box
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

        # Shadow text
        cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 4)
        cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # ⚽ Draw ball if detected
    if ball_coords is not None:
        bx, by = ball_coords
        cv2.circle(frame, (bx, by), 8, (255, 255, 255), -1)
        cv2.putText(frame, "Ball", (bx + 10, by - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    out.write(frame)
    cv2.imshow("Player Re-ID with Teams & Ball", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 1 ball, 16 players, 2 referees, 618.6ms
Speed: 4.0ms preprocess, 618.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 593.2ms
Speed: 1.8ms preprocess, 593.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 603.4ms
Speed: 1.7ms preprocess, 603.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 596.2ms
Speed: 3.0ms preprocess, 596.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 599.3ms
Speed: 1.7ms preprocess, 599.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 599.0ms
Speed: 1.6ms preprocess, 599.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 629.3ms
Speed: 1.6ms preprocess, 629.3ms inference, 1.0ms postprocess pe

KeyboardInterrupt: 

In [4]:
# CLIP + Action Recognition + Team Detection + Ball Tracking
import os
import cv2
import numpy as np
import torch
import open_clip  # Make sure you have installed open_clip_torch
from PIL import Image
from ultralytics import YOLO
from sort.sort import Sort
from torchvision import transforms
from torchvision.models.video import r3d_18

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load OpenCLIP model (use open_clip, not clip)
clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
clip_model = clip_model.to(device)
tokenizer = open_clip.get_tokenizer('ViT-B-32')
team_prompts = ["a football player wearing a red jersey", "a football player wearing a blue jersey"]
text_tokens = tokenizer(team_prompts).to(device)

# Load Action Recognition Model
action_model = r3d_18(pretrained=True).eval().to(device)
action_transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize([0.43216, 0.394666, 0.37645], [0.22803, 0.22145, 0.216989])
])

# Buffer for player action clips
frame_buffer = {}

# Function: extract top 40% of the bounding box (jersey region)
def get_jersey_crop(frame, x1, y1, x2, y2):
    h = int(y2 - y1)
    return frame[int(y1):int(y1 + h * 0.4), int(x1):int(x2)]

# Function: use CLIP to classify jersey color
def predict_team_clip(crop_bgr):
    if crop_bgr is None or crop_bgr.size == 0:
        return "A"
    image = Image.fromarray(cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB))
    image_input = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = clip_model.encode_image(image_input)
        text_features = clip_model.encode_text(text_tokens)
        logits_per_image = (image_features @ text_features.T).float()
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()[0]
    return "A" if probs[0] > probs[1] else "B"

# Function: predict action using last 16 cropped frames
def predict_action(crops):
    if len(crops) < 16:
        return ""
    frames = [Image.fromarray(cv2.cvtColor(f, cv2.COLOR_BGR2RGB)) for f in crops[-16:] if f is not None and f.size > 0]
    if len(frames) < 16:
        return ""
    tensor = torch.stack([action_transform(f) for f in frames])
    tensor = tensor.permute(1, 0, 2, 3).unsqueeze(0).to(device)
    with torch.no_grad():
        out = action_model(tensor)
        _, pred = torch.max(out, 1)
    return f"Action {pred.item()}"

# Load YOLO model
model = YOLO("models/yolo_players.pt")
cap = cv2.VideoCapture("videos/15sec_input_720p.mp4")
os.makedirs("results", exist_ok=True)
out = cv2.VideoWriter("results/final_clip_output.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30, (1280, 720))
tracker = Sort()

# Tracking setup
frame_index = 0
player_id_map, player_team = {}, {}
next_id = 1

while True:
    ret, frame = cap.read()
    if not ret:
        break

    width = frame.shape[1]
    results = model(frame)
    detections, ball_coords = [], None

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            conf = float(box.conf[0])
            cls_id = int(box.cls[0])

            if conf < 0.3:
                continue
            if cls_id in [2, 3]:
                detections.append([x1, y1, x2, y2, conf])
            elif cls_id == 0:
                ball_coords = (int((x1 + x2) / 2), int((y1 + y2) / 2))

    dets = np.array(detections) if detections else np.empty((0, 5))
    tracked = tracker.update(dets)

    for obj in tracked:
        x1, y1, x2, y2, sid = map(int, obj)
        crop = get_jersey_crop(frame, x1, y1, x2, y2)

        if sid not in player_id_map:
            player_id_map[sid] = next_id
            next_id += 1

            if crop.size > 0:
                team = predict_team_clip(crop)
                player_team[player_id_map[sid]] = team
            else:
                player_team[player_id_map[sid]] = "A"  # fallback

        pid = player_id_map[sid]
        team = player_team[pid]
        color = (0, 0, 255) if team == "A" else (255, 0, 0)
        label = f"Player {pid} (Team {team})"

        # Action recognition
        if pid not in frame_buffer:
            frame_buffer[pid] = []
        full_crop = frame[y1:y2, x1:x2]
        frame_buffer[pid].append(full_crop)
        if len(frame_buffer[pid]) > 16:
            frame_buffer[pid] = frame_buffer[pid][-16:]

        action = predict_action(frame_buffer[pid])

        # Draw everything
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label, (x1, y1 - 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3)
        cv2.putText(frame, label, (x1, y1 - 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        if action:
            cv2.putText(frame, action, (x1, y2 + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    if ball_coords:
        cv2.circle(frame, ball_coords, 8, (255, 255, 255), -1)
        cv2.putText(frame, "Ball", (ball_coords[0] + 10, ball_coords[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    out.write(frame)
    cv2.imshow("CLIP + Action Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_index += 1

cap.release()
out.release()
cv2.destroyAllWindows()

  torch.utils._pytree._register_pytree_node(


Downloading (…)ip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development



0: 384x640 1 ball, 16 players, 2 referees, 598.2ms
Speed: 2.6ms preprocess, 598.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 575.1ms
Speed: 2.2ms preprocess, 575.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 571.5ms
Speed: 2.9ms preprocess, 571.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 567.7ms
Speed: 3.3ms preprocess, 567.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 564.8ms
Speed: 2.1ms preprocess, 564.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 567.9ms
Speed: 1.7ms preprocess, 567.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 570.2ms
Speed: 2.1ms preprocess, 570.2ms inference, 1.0ms postprocess pe