In [20]:
%reset -f
import os
import sys
import importlib
import logging
from pathlib import Path
import polars as pl

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Setup project path
proj = Path.cwd()
if (proj / "src").exists():
    root = proj
elif (proj.parent / "src").exists():
    root = proj.parent
else:
    root = next(p for p in [proj, *proj.parents] if (p / "src").exists())

# Set up python path
os.chdir(root)
if str(root) not in sys.path:
    sys.path.insert(0, str(root))
logger.info(f"Project root configured: {root}")

# Verify critical paths exist
for path in ["src", "data", "data/raw", "data/processed", "notebooks"]:
    if not (root / path).exists():
        raise RuntimeError(f"Missing required path: {root / path}")

2025-11-24 23:28:36,161 - INFO - Project root configured: e:\OneDrive\Documents\Courses\Artificial Intelligence\Project\UF_CAP4261_F25_TEAM9


In [21]:
import torch
import importlib
import traceback
import inspect

# TRAINING utilities & config come from train_player_model
# Import as a module to make debugging easier (avoid ImportError hiding root-cause)
try:
    import src.models.train_player_model as tpm
    importlib.reload(tpm)

    # Bind commonly used symbols from the module:
    hyperparam_search = tpm.hyperparam_search
    train_final_model = tpm.train_final_model
    predict_on_test = tpm.predict_on_test
    FEATURE_COLS = tpm.FEATURE_COLS
    TARGET_COLS = tpm.TARGET_COLS
    RANDOM_SEED = tpm.RANDOM_SEED
    set_seed = tpm.set_seed

    print("Loaded train_player_model OK:", [n for n in dir(tpm) if not n.startswith('_')])
except Exception:
    traceback.print_exc()
    raise

# Data utilities come from player_dataset
try:
    import src.models.player_dataset as pd_mod
    importlib.reload(pd_mod)
    load_processed_data = pd_mod.load_processed_data
    join_teamframe = pd_mod.join_teamframe
    build_player_id_map = pd_mod.build_player_id_map
    print("Loaded player_dataset OK:", [n for n in dir(pd_mod) if not n.startswith('_')])
except Exception:
    traceback.print_exc()
    raise

from src.models.physics_transformer import PhysicsTransformer, TransformerConfig

set_seed(RANDOM_SEED)


Loaded train_player_model OK: ['BASE_FEATURE_COLS', 'BATCH_SIZE', 'DEVICE', 'DataLoader', 'Dict', 'FEATURE_COLS', 'FINAL_EPOCHS', 'List', 'MAX_SEQ_LEN', 'PhysicsTransformer', 'PlayerSequenceDataset', 'RANDOM_SEED', 'SEARCH_EPOCHS', 'TARGET_COLS', 'TEAMFRAME_COLS', 'TransformerConfig', 'Tuple', 'VAL_FRACTION', 'asdict', 'build_dataloaders', 'build_player_id_map', 'eval_one_epoch', 'hyperparam_search', 'join_teamframe', 'load_processed_data', 'main', 'np', 'os', 'predict_on_test', 'random', 'random_split', 'sample_config', 'set_seed', 'torch', 'train_final_model', 'train_one_epoch']
Loaded player_dataset OK: ['Dataset', 'Dict', 'List', 'Optional', 'PlayerSequenceDataset', 'TEAMFRAME_COLS', 'Tuple', 'build_player_id_map', 'join_teamframe', 'load_processed_data', 'np', 'pl', 'torch']


In [22]:
players_train, players_test, teamframe_train, teamframe_test = load_processed_data()

players_train_joined = join_teamframe(players_train, teamframe_train)
players_test_joined  = join_teamframe(players_test, teamframe_test)

player_id_map = build_player_id_map(players_train_joined)

print("players_train_joined:", players_train_joined.shape)
print("players_test_joined:", players_test_joined.shape)
print("num unique players:", len(player_id_map))


players_train_joined: (4880579, 58)
players_test_joined: (49753, 58)
num unique players: 1384


In [24]:
players_train_joined.select([
    *[pl.col(c).is_null().sum().alias(f"{c}_nulls") for c in FEATURE_COLS]
]).to_pandas().T.head(40)


Unnamed: 0,0
x_norm_nulls,0
y_norm_nulls,0
dir_norm_nulls,0
o_norm_nulls,0
s_nulls,0
a_nulls,0
vx_nulls,0
vy_nulls,0
ax_nulls,0
ay_nulls,0


In [25]:
best_cfg = hyperparam_search(
    players_joined=players_train_joined,
    player_id_map=player_id_map,
    num_trials=1,          # try 3â€“4 to start
)

best_cfg


Starting tiny hyperparam search over 1 trials.


: 

In [None]:
model = train_final_model(
    players_joined=players_train_joined,
    player_id_map=player_id_map,
    cfg=best_cfg,
)



In [None]:

predict_on_test(
    model=model,
    players_test_joined=players_test_joined,
    player_id_map=player_id_map,
    out_path="models/player_level_predictions.csv",
)