In [None]:
%reset -f
import os
import sys
import importlib
import src 
import logging
from pathlib import Path
import polars as pl

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Setup project path
proj = Path.cwd()
if (proj / "src").exists():
    root = proj
elif (proj.parent / "src").exists():
    root = proj.parent
else:
    root = next(p for p in [proj, *proj.parents] if (p / "src").exists())

# Set up python path
os.chdir(root)
if str(root) not in sys.path:
    sys.path.insert(0, str(root))
logger.info(f"Project root configured: {root}")

# Verify critical paths exist
for path in ["src", "data", "data/raw", "data/processed", "notebooks"]:
    if not (root / path).exists():
        raise RuntimeError(f"Missing required path: {root / path}")

2025-11-16 16:12:34,418 - INFO - Project root configured: e:\OneDrive\Documents\Courses\Artificial Intelligence\Project\UF_CAP4261_F25_TEAM9


In [15]:
from __future__ import annotations
import src.features.formations
importlib.reload(src.features.formations)
from src.features.formations import train_formation_knn, load_knn_model, knn_neighbors_by_index, hdbscan_formations, get_cluster_examples

### 1) Train a KNN model on offense formations from the first frame

In [None]:
model_off, meta_off, X_off = train_formation_knn(
    players_parquet="data/processed/players_test.parquet",
    side="Offense",                      
    n_neighbors=10,
    save_path="src/models/knn_offense_formations.pkl",
)

### 2) Look at the first offense formation and its 5 nearest neighbors

In [11]:
neighbors_df = knn_neighbors_by_index(model_off, meta_off, X_off, idx=0, k=5)
neighbors_df

game_id,play_id,player_side,distance
i64,i64,str,f64
2024120805,74,"""Offense""",0.0
2025010515,524,"""Offense""",2.398884
2025010515,1329,"""Offense""",3.806156
2025010515,578,"""Offense""",4.205153
2024121502,3381,"""Offense""",4.581024


In [24]:
model_off, formations_off = hdbscan_formations(
    players_parquet="data/processed/players_test.parquet",
    side="Offense",                       # "Offense" or "Defense"
    max_players=11,
    frame_policy="first",                 # use first frame per play-side
    min_cluster_size=20,                  # tweak as desired
    min_samples=5,
    save_model_path="src/models/hdbscan_offense_formations.joblib",
)

formations_off.head()



game_id,play_id,player_side,frame_id,n_players,cluster,cluster_prob
i64,i64,str,i64,i64,i64,f64
2024120805,74,"""Offense""",1,5,0,1.0
2024120805,143,"""Offense""",1,3,-1,0.0
2024120805,312,"""Offense""",1,6,1,0.97094
2024120805,388,"""Offense""",1,6,1,0.686721
2024120805,453,"""Offense""",1,6,1,0.997419


In [22]:
formations_off["cluster"].unique()

cluster
i64
-1
0
1
