<a href="https://colab.research.google.com/github/arzhrd/Basketball-Player-Detail-Using-Computer-Vision/blob/main/Basketball_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Configure API Keys and check for GPU
import os
from google.colab import userdata
from pathlib import Path
import torch

# Load API keys from Colab secrets
try:
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
    os.environ["ROBOFLOW_API_KEY"] = userdata.get("ROBOFLOW_API_KEY")
    print("API keys loaded successfully.")
except Exception as e:
    print("Could not load API keys. Please set them up in Colab Secrets (🔑).")
    print("Required secrets: 'HF_TOKEN' and 'ROBOFLOW_API_KEY'")

# Check for GPU
!nvidia-smi

# Set home directory
HOME = Path.cwd()
print("HOME:", HOME)

# Set ONNX provider to use GPU
os.environ["ONNXRUNTIME_EXECUTION_PROVIDERS"] = "[CUDAExecutionProvider]"

# 2. Install SAM2 (Segment Anything Model 2) for tracking
!git clone https://github.com/Gy920/segment-anything-2-real-time.git
%cd {HOME}/segment-anything-2-real-time
!pip install -e . -q
!python setup.py build_ext --inplace
!(cd checkpoints && bash download_ckpts.sh)
%cd {HOME}

# 3. Install all other required Python packages
!pip install -q gdown inference-gpu supervision transformers num2words
!pip install -q git+https://github.com/roboflow/sports.git@feat/basketball
!pip install -q flash-attn --no-build-isolation

print("\n✅ All installations are complete.")

In [None]:
# 1. Download sample videos and fonts
SOURCE_VIDEO_DIRECTORY = HOME / "source"
!gdown -q https://drive.google.com/drive/folders/1eDJYqQ77Fytz15tKGdJCMeYSgmoQ-2-H -O {SOURCE_VIDEO_DIRECTORY} --folder
!gdown -q https://drive.google.com/drive/folders/1RBjpI5Xleb58lujeusxH0W5zYMMA4ytO -O {HOME / "fonts"} --folder
print("Sample videos and fonts downloaded.")

# 2. Define the source video path you want to process
# You can change the filename to process a different clip from the `source` directory
SOURCE_VIDEO_PATH = SOURCE_VIDEO_DIRECTORY / "boston-celtics-new-york-knicks-game-1-q1-04.28-04.20.mp4"

# 3. Define Team Rosters and Colors
TEAM_ROSTERS = {
  "New York Knicks": {
    "55": "Hukporti", "1": "Payne", "0": "Wright", "11": "Brunson", "3": "Hart",
    "32": "Towns", "44": "Shamet", "25": "Bridges", "2": "McBride",
    "23": "Robinson", "8": "Anunoby", "4": "Dadiet", "5": "Achiuwa", "13": "Kolek"
  },
  "Boston Celtics": {
    "42": "Horford", "55": "Scheierman", "9": "White", "20": "Davison",
    "7": "Brown", "0": "Tatum", "27": "Walsh", "4": "Holiday", "8": "Porzingis",
    "40": "Kornet", "88": "Queta", "11": "Pritchard", "30": "Hauser",
    "12": "Craig", "26": "Tillman"
  }
}

TEAM_COLORS = {
    "New York Knicks": "#006BB6",
    "Boston Celtics": "#007A33"
}

In [None]:
import supervision as sv
from inference import get_model
from sam2.build_sam import build_sam2_camera_predictor
import torch

# 1. Load Player and Number Detection Model (RF-DETR)
PLAYER_DETECTION_MODEL_ID = "basketball-player-detection-3-ycjdo/4"
PLAYER_DETECTION_MODEL = get_model(model_id=PLAYER_DETECTION_MODEL_ID)

# 2. Load Player Tracking Model (SAM2.1)
%cd {HOME}/segment-anything-2-real-time
SAM2_CHECKPOINT = "checkpoints/sam2.1_hiera_large.pt"
SAM2_CONFIG = "configs/sam2.1/sam2.1_hiera_l.yaml"
sam_predictor = build_sam2_camera_predictor(SAM2_CONFIG, SAM2_CHECKPOINT)
%cd {HOME}

# 3. Load Jersey Number Recognition Model (SmolVLM2)
NUMBER_RECOGNITION_MODEL_ID = "basketball-jersey-numbers-ocr/3"
NUMBER_RECOGNITION_MODEL = get_model(model_id=NUMBER_RECOGNITION_MODEL_ID)
NUMBER_RECOGNITION_MODEL_PROMPT = "Read the number."

print("\n✅ All models loaded successfully.")

In [None]:
import numpy as np
import supervision as sv
from tqdm import tqdm
from sports.common.team import TeamClassifier

# Class IDs for different player-related detections
PLAYER_CLASS_IDS = [3, 4, 5, 6, 7]

def shrink_boxes(xyxy: np.ndarray, scale: float) -> np.ndarray:
    """Shrinks bounding boxes to focus on the jersey."""
    x1, y1, x2, y2 = xyxy[:, 0], xyxy[:, 1], xyxy[:, 2], xyxy[:, 3]
    cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
    w, h = (x2 - x1) * scale, (y2 - y1) * scale
    new_x1, new_y1 = cx - w / 2, cy - h / 2
    new_x2, new_y2 = cx + w / 2, cy + h / 2
    return np.stack([new_x1, new_y1, new_x2, new_y2], axis=1)

# 1. Collect player crops from all videos to build a training set
crops = []
for video_path in sv.list_files_with_extensions(SOURCE_VIDEO_DIRECTORY, extensions=["mp4"]):
    frame_generator = sv.get_video_frames_generator(source_path=str(video_path), stride=30)
    for frame in tqdm(frame_generator, desc=f"Processing {video_path.name}"):
        result = PLAYER_DETECTION_MODEL.infer(frame, confidence=0.4, iou_threshold=0.9, class_agnostic_nms=True)[0]
        detections = sv.Detections.from_inference(result)
        detections = detections[np.isin(detections.class_id, PLAYER_CLASS_IDS)]
        boxes = shrink_boxes(xyxy=detections.xyxy, scale=0.4)
        for box in boxes:
            crops.append(sv.crop_image(frame, box))

# 2. Train the team classifier and predict teams for the collected crops
team_classifier = TeamClassifier(device="cuda")
team_classifier.fit(crops)
teams = team_classifier.predict(crops)

# 3. Display the results of clustering for manual verification
team_0 = [crop for crop, team in zip(crops, teams) if team == 0]
team_1 = [crop for crop, team in zip(crops, teams) if team == 1]

print("--- CLUSTER 0 ---")
sv.plot_images_grid(images=team_0[:20], grid_size=(2, 10), size=(10, 2))
print("\n--- CLUSTER 1 ---")
sv.plot_images_grid(images=team_1[:20], grid_size=(2, 10), size=(10, 2))

In [None]:
# 4. MANUALLY ASSIGN TEAM NAMES BASED ON THE GRIDS ABOVE
# Look at the images for Cluster 0 and Cluster 1 and assign the correct team name.
# Uncomment the correct dictionary.

TEAM_NAMES = {
    0: "New York Knicks",
    1: "Boston Celtics",
}

# TEAM_NAMES = {
#     0: "Boston Celtics",
#     1: "New York Knicks",
# }

print("Team names assigned:")
print(f"Cluster 0 -> {TEAM_NAMES[0]}")
print(f"Cluster 1 -> {TEAM_NAMES[1]}")