In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# --- Hyperparameters ---
# Data dimensions from Big Data Bowl (from dataclean.ipynb processing)
T_HIST = 25         # Number of historical frames (max in dataset)
T_PRED = 25         # Number of frames to predict (max in dataset)
N_AGENTS = 9        # Actual number of agents per frame in data
D_AGENT = 33        # Agent features: player_height, player_weight, s, a, dir, o, x_rel, y_rel + one-hot encoded position/side/role
D_GLOBAL = 18       # Global features: down, yards_to_go + one-hot encoded dropback_type, team_coverage_type

# Model architecture hyperparameters
D_MODEL = 128       # Transformer Embedding Dimension
D_LATENT = 32       # Latent variable Z dimension
N_HEADS = 8         # Transformer Heads
N_LAYERS = 3        # Transformer Encoder Layers
KL_BETA = 0.01      # KL Loss Weight (needs tuning/annealing)

In [2]:
class HeliocentricityTransformer(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        
        # Unpack kwargs for clarity
        self.T_HIST, self.T_PRED, self.N_AGENTS = kwargs['T_HIST'], kwargs['T_PRED'], kwargs['N_AGENTS']
        self.D_AGENT, self.D_GLOBAL, self.D_MODEL = kwargs['D_AGENT'], kwargs['D_GLOBAL'], kwargs['D_MODEL']
        self.D_LATENT, self.N_HEADS, self.N_LAYERS = kwargs['D_LATENT'], kwargs['N_HEADS'], kwargs['N_LAYERS']
        self.KL_BETA = kwargs['KL_BETA']
        
        # --- 1. Initial Embedding Layers ---
        self.agent_embed = nn.Linear(self.D_AGENT, self.D_MODEL)
        self.global_embed = nn.Linear(self.D_GLOBAL, self.D_MODEL)
        
        # --- 2. Transformer Encoder (Core STT) ---
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.D_MODEL, 
            nhead=self.N_HEADS, 
            dim_feedforward=self.D_MODEL * 4, 
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=self.N_LAYERS)
        
        # --- 3. CVAE Heads (Prediction Heads from Context C) ---
        # CAVE requires a context vector (C) for prior/recognition networks
        
        # CVAE: Prior Network (p(z|C)) -> outputs mu_prior, log_var_prior
        self.mlp_prior = nn.Sequential(
            nn.Linear(self.D_MODEL, self.D_MODEL),
            nn.ReLU(),
            nn.Linear(self.D_MODEL, 2 * self.D_LATENT)
        )

        # CVAE: Recognition Network (q(z|C, Y_truth)) -> outputs mu_rec, log_var_rec
        # Input is C + flattened Y_truth (context + ground truth trajectory)
        self.mlp_recognition = nn.Sequential(
            nn.Linear(self.D_MODEL + self.T_PRED * self.N_AGENTS * 2, self.D_MODEL),
            nn.ReLU(),
            nn.Linear(self.D_MODEL, 2 * self.D_LATENT)
        )

        # --- 4. Decoder Head (Trajectory Generator) ---
        # Input is C + Z. Output is the flattened trajectory (x, y coordinates)
        self.mlp_decoder = nn.Sequential(
            nn.Linear(self.D_MODEL + self.D_LATENT, self.D_MODEL * 2),
            nn.ReLU(),
            # Output shape: (Batch, T_PRED * N_AGENTS * 2)
            nn.Linear(self.D_MODEL * 2, self.T_PRED * self.N_AGENTS * 2)
        )
        
    def reparameterize(self, mu, log_var):
        # Sampling Z = mu + sigma * epsilon
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, X_hist_agents, X_global, Y_truth=None):
        B = X_hist_agents.size(0)
        
        # 1. Agent Embedding (Per-Frame)
        # (B, T_hist, N_agents, D_agent) -> (B, T_hist, N_agents, D_MODEL)
        agent_emb = self.agent_embed(X_hist_agents)
        
        # 2. Global CLS Token Embedding
        # (B, D_global) -> (B, D_MODEL)
        global_emb = self.global_embed(X_global)
        # Expand for T_hist: (B, 1, 1, D_MODEL). Expand(T_hist) not needed as we use flatten below
        
        # 3. Prepare Sequence for Transformer
        
        # Create CLS Token for each time step in the historical sequence
        # Shape: (B, T_hist, 1, D_MODEL)
        cls_tokens = global_emb.unsqueeze(1).unsqueeze(1).expand(-1, self.T_HIST, -1, -1)
        
        # Concatenate CLS token to the front of each frame's set of agents
        # Shape: (B, T_hist, N_agents + 1, D_MODEL)
        input_sequence = torch.cat([cls_tokens, agent_emb], dim=2)
        
        # Flatten time and agent dimensions for Transformer input
        # Shape: (B, T_hist * (N_agents + 1), D_MODEL)
        flat_input = input_sequence.view(B, -1, self.D_MODEL)
        
        # Add Positional/Temporal Encoding here (Omitted)
        
        # 4. Transformer Encoding
        # Encoded_Output: (B, T_hist * (N_agents + 1), D_MODEL)
        encoded_output = self.transformer_encoder(flat_input)
        
        # 5. Extract Context Vector C from the first CLS token
        # The first token is CLS at t=0. C should capture the full context.
        # Context C: (B, D_MODEL)
        C = encoded_output[:, 0, :]
        
        # --- CVAE Latent Space ---
        # Prior Network: p(z|C)
        mu_prior, log_var_prior = self.mlp_prior(C).chunk(2, dim=-1)

        # Recognition Network: q(z|C, Y_truth) is only used during training
        if Y_truth is not None:
            # Flatten Y_truth: (B, T_pred * N_agents * 2)
            Y_flat = Y_truth.view(B, -1)
            rec_input = torch.cat([C, Y_flat], dim=-1)
            mu_rec, log_var_rec = self.mlp_recognition(rec_input).chunk(2, dim=-1)
            Z = self.reparameterize(mu_rec, log_var_rec)
        else:
            # Inference: Sample Z from the Prior distribution p(z|C)
            # This is key for generating diverse, expected trajectories (E)
            Z = self.reparameterize(mu_prior, log_var_prior)
            mu_rec, log_var_rec = mu_prior, log_var_prior # Use prior stats for loss calc placeholder

        # --- Decoder ---
        # Input: [C, Z]
        decoder_input = torch.cat([C, Z], dim=-1)
        
        # Output: (B, T_pred * N_agents * 2)
        Y_pred_flat = self.mlp_decoder(decoder_input)
        
        # Reshape to (B, T_pred, N_agents, 2)
        Y_pred = Y_pred_flat.view(B, self.T_PRED, self.N_AGENTS, 2)
        
        return Y_pred, mu_rec, log_var_rec, mu_prior, log_var_prior

In [3]:
def vae_loss(Y_pred, Y_truth, mu_rec, log_var_rec, mu_prior, log_var_prior, KL_BETA):
    # 1. Reconstruction Loss (L_recon): RMSE on the predicted x, y coordinates
    # We use MSE here for simplicity in PyTorch, but RMSE is the metric.
    L_recon = F.mse_loss(Y_pred, Y_truth, reduction='sum') / Y_pred.size(0) # Mean over batch

    # 2. KL Divergence Loss (L_KL): KL(q(z|C, Y) || p(z|C))
    # Closed-form KL for Gaussian: 0.5 * sum(1 + log(sigma_prior^2) - log(sigma_rec^2) - (mu_rec - mu_prior)^2 / sigma_prior^2 - exp(log(sigma_rec^2)) / sigma_prior^2)
    # Using torch.exp(log_var) = sigma^2
    kl_loss = 0.5 * torch.sum(
        log_var_prior - log_var_rec - 1 
        + (torch.exp(log_var_rec) + (mu_rec - mu_prior).pow(2)) / torch.exp(log_var_prior)
    ) / Y_pred.size(0)

    # Total Loss (Weighted sum)
    total_loss = L_recon + KL_BETA * kl_loss
    return total_loss, L_recon.item(), kl_loss.item()

# --- Heliocentricity Inference Function (E Generator) ---
@torch.no_grad()
def generate_expected_trajectories(model, X_hist_agents, X_global, K=10):
    """
    Generates K diverse, plausible trajectories for the defense (E) 
    by sampling the latent space Z from the prior distribution.
    """
    model.eval()
    B = X_hist_agents.size(0)
    
    # Repeat inputs K times to batch the K samples
    X_hist_agents_K = X_hist_agents.repeat_interleave(K, dim=0)
    X_global_K = X_global.repeat_interleave(K, dim=0)

    # Since Y_truth=None, Z is sampled from the prior p(z|C)
    Y_pred_K, _, _, _, _ = model(X_hist_agents_K, X_global_K, Y_truth=None)
    
    # Reshape: (B * K, T_pred, N_agents, 2) -> (B, K, T_pred, N_agents, 2)
    return Y_pred_K.view(B, K, model.T_PRED, model.N_AGENTS, 2)

# Note: The final step of calculating Heliocentricity (H) based on 
# min separation distance (A vs E) is a NumPy/Pandas operation after this PyTorch step.

In [4]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import numpy as np

# --- 1. Setup: Instantiate Model and Optimizer ---

# Define a dictionary for easy configuration
model_config = {
    'T_HIST': 25, 'T_PRED': 25, 'N_AGENTS': 9, 'D_AGENT': 33, 
    'D_GLOBAL': 18, 'D_MODEL': 128, 'D_LATENT': 32, 'N_HEADS': 8, 
    'N_LAYERS': 3, 'KL_BETA': 0.01 
}

# Instantiate the model and move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HeliocentricityTransformer(**model_config).to(device)
optimizer = Adam(model.parameters(), lr=1e-4)
NUM_EPOCHS = 10
BATCH_SIZE = 32

# --- 2. Load Big Data Bowl Data from Disk ---
from pathlib import Path
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence

# Load processed data
data_path = Path('dataset/processed/processed_data.pt')
print(f"Loading data from {data_path}...")
loaded_data = torch.load(data_path)

# Extract data
historical_agent_features = loaded_data['historical_agent_features']
ground_truth_trajectories = loaded_data['ground_truth_trajectories']
global_context_features = loaded_data['global_context_features']

print(f"Loaded {len(historical_agent_features)} plays")
print(f"Global context shape: {global_context_features.shape}")

# Custom Dataset with Padding (both time and agent dimensions)
class FootballDataset(Dataset):
    def __init__(self, hist_features, gt_trajectories, global_features, max_hist_len=None, max_pred_len=None, max_n_agents=None):
        self.hist_features = hist_features
        self.gt_trajectories = gt_trajectories
        self.global_features = global_features
        
        # Determine max lengths if not provided
        self.max_hist_len = max_hist_len or max(x.shape[0] for x in hist_features)
        self.max_pred_len = max_pred_len or max(y.shape[0] for y in gt_trajectories)
        self.max_n_agents = max_n_agents or max(x.shape[1] for x in hist_features)
        
    def __len__(self):
        return len(self.hist_features)
    
    def __getitem__(self, idx):
        hist = self.hist_features[idx]  # (T_hist_actual, N_agents_actual, D_agent)
        gt = self.gt_trajectories[idx]  # (T_pred_actual, N_agents_actual, 2)
        global_feat = self.global_features[idx]  # (D_global,)
        
        # Get actual lengths
        hist_len = hist.shape[0]
        pred_len = gt.shape[0]
        n_agents = hist.shape[1]
        
        # Pad historical features to max_hist_len and max_n_agents
        # First pad time dimension
        if hist_len < self.max_hist_len:
            pad_hist_time = torch.zeros(self.max_hist_len - hist_len, hist.shape[1], hist.shape[2], dtype=hist.dtype)
            hist = torch.cat([hist, pad_hist_time], dim=0)
        else:
            hist = hist[:self.max_hist_len]
            hist_len = self.max_hist_len
        
        # Then pad agent dimension
        if n_agents < self.max_n_agents:
            pad_hist_agents = torch.zeros(hist.shape[0], self.max_n_agents - n_agents, hist.shape[2], dtype=hist.dtype)
            hist_padded = torch.cat([hist, pad_hist_agents], dim=1)
        else:
            hist_padded = hist[:, :self.max_n_agents, :]
            n_agents = self.max_n_agents
        
        # Pad ground truth to max_pred_len and max_n_agents
        # First pad time dimension
        if pred_len < self.max_pred_len:
            pad_gt_time = torch.zeros(self.max_pred_len - pred_len, gt.shape[1], 2, dtype=gt.dtype)
            gt = torch.cat([gt, pad_gt_time], dim=0)
        else:
            gt = gt[:self.max_pred_len]
            pred_len = self.max_pred_len
        
        # Then pad agent dimension
        if gt.shape[1] < self.max_n_agents:
            pad_gt_agents = torch.zeros(gt.shape[0], self.max_n_agents - gt.shape[1], 2, dtype=gt.dtype)
            gt_padded = torch.cat([gt, pad_gt_agents], dim=1)
        else:
            gt_padded = gt[:, :self.max_n_agents, :]
        
        return hist_padded, global_feat, gt_padded, hist_len, pred_len

# Create dataset with padding
# Use model config values as max lengths
dataset = FootballDataset(
    historical_agent_features, 
    ground_truth_trajectories, 
    global_context_features,
    max_hist_len=model_config['T_HIST'],
    max_pred_len=model_config['T_PRED'],
    max_n_agents=model_config['N_AGENTS']
)

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f"Created DataLoader with {len(dataset)} samples, batch size {BATCH_SIZE}")
print(f"Max hist length: {dataset.max_hist_len}, Max pred length: {dataset.max_pred_len}, Max agents: {dataset.max_n_agents}")




Loading data from dataset/processed/processed_data.pt...
Loaded 14108 plays
Global context shape: torch.Size([14108, 18])
Created DataLoader with 14108 samples, batch size 32
Max hist length: 25, Max pred length: 25, Max agents: 9


In [5]:
# --- 3. The Training Loop with Masking ---

def create_mask(lengths, max_len, device):
    """Create attention mask: True for valid positions, False for padding"""
    batch_size = len(lengths)
    mask = torch.arange(max_len, device=device).expand(batch_size, max_len) < lengths.unsqueeze(1)
    return mask

def train_model(model, train_loader, optimizer, model_config, device, num_epochs=20):
    """
    Train the Heliocentricity Transformer model.
    
    Args:
        model: The HeliocentricityTransformer model
        train_loader: DataLoader for training data
        optimizer: Optimizer for training
        model_config: Dictionary with model configuration
        device: Device to train on (cuda/cpu)
        num_epochs: Number of epochs to train
        
    Returns:
        Dictionary with training history
    """
    print(f"Starting training on {device}...")
    
    history = {
        'total_loss': [],
        'recon_loss': [],
        'kl_loss': []
    }
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        total_recon_loss = 0
        total_kl_loss = 0
        
        for batch_idx, (X_agents, X_global, Y_truth, hist_lens, pred_lens) in enumerate(train_loader):
            
            # Move to device
            X_agents = X_agents.to(device)
            X_global = X_global.to(device)
            Y_truth = Y_truth.to(device)
            hist_lens = hist_lens.to(device)
            pred_lens = pred_lens.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # 1. Forward Pass
            # The forward pass uses the recognition network q(z|C, Y_truth) since Y_truth is provided.
            Y_pred, mu_rec, log_var_rec, mu_prior, log_var_prior = model(X_agents, X_global, Y_truth=Y_truth)
            
            # 2. Create mask for ground truth prediction loss
            # Only compute loss on valid (non-padded) timesteps
            pred_mask = create_mask(pred_lens, model_config['T_PRED'], device)
            # Expand mask to match Y_pred shape: (B, T_pred, N_agents, 2)
            pred_mask_expanded = pred_mask.unsqueeze(-1).unsqueeze(-1).expand_as(Y_pred)
            
            # Apply mask to predictions and ground truth
            Y_pred_masked = Y_pred * pred_mask_expanded
            Y_truth_masked = Y_truth * pred_mask_expanded
            
            # 3. Compute VAE Loss with masked outputs
            # Reconstruction loss only on valid timesteps
            L_recon = F.mse_loss(Y_pred_masked, Y_truth_masked, reduction='sum') / pred_lens.sum()
            
            # KL loss (not masked, as it's based on latent distribution)
            kl_loss = 0.5 * torch.sum(
                log_var_prior - log_var_rec - 1 
                + (torch.exp(log_var_rec) + (mu_rec - mu_prior).pow(2)) / torch.exp(log_var_prior)
            ) / X_agents.size(0)
            
            loss = L_recon + model_config['KL_BETA'] * kl_loss
            
            # 4. Backward Pass and Optimization
            loss.backward()
            # Optional: Gradient clipping to stabilize Transformer training
            # nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0) 
            optimizer.step()
            
            # Accumulate metrics
            total_loss += loss.item()
            total_recon_loss += L_recon.item()
            total_kl_loss += kl_loss.item()
            
            # Print update every 100 batches
            if (batch_idx + 1) % 100 == 0:
                print(f"  Batch {batch_idx+1}/{len(train_loader)} | Total Loss: {total_loss / (batch_idx+1):.4f} | Recon: {total_recon_loss / (batch_idx+1):.4f} | KL: {total_kl_loss / (batch_idx+1):.4f}")
    
        # --- End of Epoch ---
        avg_epoch_loss = total_loss / len(train_loader)
        avg_recon = total_recon_loss / len(train_loader)
        avg_kl = total_kl_loss / len(train_loader)
        
        history['total_loss'].append(avg_epoch_loss)
        history['recon_loss'].append(avg_recon)
        history['kl_loss'].append(avg_kl)
        
        print(f"\n--- Epoch {epoch+1}/{num_epochs} Complete ---")
        print(f"Average Total Loss: {avg_epoch_loss:.4f} | Recon: {avg_recon:.4f} | KL: {avg_kl:.4f}")
        
        # Optional: Implement a validation loop here and save the best model weights
        # torch.save(model.state_dict(), f"best_heliocentricity_model.pt")
    
    return history

# Run training
# training_history = train_model(model, train_loader, optimizer, model_config, device, NUM_EPOCHS)

In [6]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import mean_squared_error

# Load the saved model weights
# model.load_state_dict(torch.load("best_heliocentricity_model.pt"))

@torch.no_grad()
def evaluate_model(model, data_loader, device):
    model.eval()
    total_rmse = []
    total_recon_loss = 0
    total_kl_loss = 0
    
    # Store data for final Heliocentricity calculation outside the loop
    results_for_H_calc = []

    for batch_idx, (X_agents, X_global, Y_truth, hist_lens, pred_lens) in enumerate(train_loader):
            
        # Move to device
        X_agents = X_agents.to(device)
        X_global = X_global.to(device)
        Y_truth = Y_truth.to(device)
        hist_lens = hist_lens.to(device)
        pred_lens = pred_lens.to(device)

        # 1. Deterministic Prediction (for Reconstruction Loss)
        # Uses the recognition network q(z|C, Y_truth) which yields the best reconstruction
        Y_pred, mu_rec, log_var_rec, mu_prior, log_var_prior = model(X_agents, X_global, Y_truth=Y_truth)
        
        # Calculate Loss components
        loss, L_recon, L_KL = vae_loss(
            Y_pred, Y_truth, 
            mu_rec, log_var_rec, mu_prior, log_var_prior, 
            model.KL_BETA
        )
        total_recon_loss += L_recon
        total_kl_loss += L_KL

        # 2. Calculate Root Mean Squared Error (RMSE) on the deterministic prediction
        # Detach and convert to numpy for standard metric calculation
        Y_pred_np = Y_pred.cpu().numpy()
        Y_truth_np = Y_truth.cpu().numpy()
        
        # Calculate RMSE for each sample and average (Flattening all T_pred * N_agents * 2 dimensions)
        sample_rmse = np.sqrt(mean_squared_error(Y_truth_np.reshape(-1, 1), Y_pred_np.reshape(-1, 1)))
        total_rmse.append(sample_rmse)

        # 3. Generate K Stochastic Predictions for Heliocentricity (E)
        # This uses the prior network p(z|C) for diverse sampling
        K = 10 # Number of samples per play
        Y_pred_K = generate_expected_trajectories(model, X_agents, X_global, K=K).cpu().numpy()
        
        # Store results (You would need to include the actual Star Receiver ID/Index here)
        for i in range(Y_truth_np.shape[0]):
            results_for_H_calc.append({
                'Y_truth': Y_truth_np[i],
                'Y_pred_K': Y_pred_K[i],
                # Assume you have a way to link back to the play ID and the Star Receiver Index
                'star_idx': 4 # Pseudo-Index for the star receiver
            })

        # Print update every 100 batches
        if (batch_idx + 1) % 100 == 0:
            print(f"Batch {batch_idx+1}/{len(train_loader)}")

    avg_rmse = np.mean(total_rmse)
    avg_recon = total_recon_loss / len(data_loader)
    avg_kl = total_kl_loss / len(data_loader)
    
    print(f"\n--- Validation Results ---")
    print(f"Avg Trajectory RMSE: {avg_rmse:.4f} meters")
    print(f"Avg Reconstruction Loss: {avg_recon:.4f}")
    print(f"Avg KL Divergence: {avg_kl:.4f}")
    
    return results_for_H_calc

In [7]:
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.optim import Adam
import numpy as np

# --- 1. Setup: Instantiate Model and Optimizer ---

# Define a dictionary for easy configuration
model_config = {
    'T_HIST': 25, 'T_PRED': 25, 'N_AGENTS': 9, 'D_AGENT': 33, 
    'D_GLOBAL': 18, 'D_MODEL': 128, 'D_LATENT': 32, 'N_HEADS': 8, 
    'N_LAYERS': 3, 'KL_BETA': 0.01 
}

# Instantiate the model and move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HeliocentricityTransformer(**model_config).to(device)
optimizer = Adam(model.parameters(), lr=1e-4)
NUM_EPOCHS = 10
BATCH_SIZE = 64

# --- 2. Load Big Data Bowl Data from Disk ---
from pathlib import Path
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence

# Load processed data
data_path = Path('dataset/processed/processed_data.pt')
print(f"Loading data from {data_path}...")
loaded_data = torch.load(data_path)

# Extract data
historical_agent_features = loaded_data['historical_agent_features']
ground_truth_trajectories = loaded_data['ground_truth_trajectories']
global_context_features = loaded_data['global_context_features']

print(f"Loaded {len(historical_agent_features)} plays")
print(f"Global context shape: {global_context_features.shape}")

# Custom Dataset with Padding (both time and agent dimensions)
class FootballDataset(Dataset):
    def __init__(self, hist_features, gt_trajectories, global_features, max_hist_len=None, max_pred_len=None, max_n_agents=None):
        self.hist_features = hist_features
        self.gt_trajectories = gt_trajectories
        self.global_features = global_features
        
        # Determine max lengths if not provided
        self.max_hist_len = max_hist_len or max(x.shape[0] for x in hist_features)
        self.max_pred_len = max_pred_len or max(y.shape[0] for y in gt_trajectories)
        self.max_n_agents = max_n_agents or max(x.shape[1] for x in hist_features)
        
    def __len__(self):
        return len(self.hist_features)
    
    def __getitem__(self, idx):
        hist = self.hist_features[idx]  # (T_hist_actual, N_agents_actual, D_agent)
        gt = self.gt_trajectories[idx]  # (T_pred_actual, N_agents_actual, 2)
        global_feat = self.global_features[idx]  # (D_global,)
        
        # Get actual lengths
        hist_len = hist.shape[0]
        pred_len = gt.shape[0]
        n_agents = hist.shape[1]
        
        # Pad historical features to max_hist_len and max_n_agents
        # First pad time dimension
        if hist_len < self.max_hist_len:
            pad_hist_time = torch.zeros(self.max_hist_len - hist_len, hist.shape[1], hist.shape[2], dtype=hist.dtype)
            hist = torch.cat([hist, pad_hist_time], dim=0)
        else:
            hist = hist[:self.max_hist_len]
            hist_len = self.max_hist_len
        
        # Then pad agent dimension
        if n_agents < self.max_n_agents:
            pad_hist_agents = torch.zeros(hist.shape[0], self.max_n_agents - n_agents, hist.shape[2], dtype=hist.dtype)
            hist_padded = torch.cat([hist, pad_hist_agents], dim=1)
        else:
            hist_padded = hist[:, :self.max_n_agents, :]
            n_agents = self.max_n_agents
        
        # Pad ground truth to max_pred_len and max_n_agents
        # First pad time dimension
        if pred_len < self.max_pred_len:
            pad_gt_time = torch.zeros(self.max_pred_len - pred_len, gt.shape[1], 2, dtype=gt.dtype)
            gt = torch.cat([gt, pad_gt_time], dim=0)
        else:
            gt = gt[:self.max_pred_len]
            pred_len = self.max_pred_len
        
        # Then pad agent dimension
        if gt.shape[1] < self.max_n_agents:
            pad_gt_agents = torch.zeros(gt.shape[0], self.max_n_agents - gt.shape[1], 2, dtype=gt.dtype)
            gt_padded = torch.cat([gt, pad_gt_agents], dim=1)
        else:
            gt_padded = gt[:, :self.max_n_agents, :]
        
        return hist_padded, global_feat, gt_padded, hist_len, pred_len

# Create dataset with padding
# Use model config values as max lengths
dataset = FootballDataset(
    historical_agent_features, 
    ground_truth_trajectories, 
    global_context_features,
    max_hist_len=model_config['T_HIST'],
    max_pred_len=model_config['T_PRED'],
    max_n_agents=model_config['N_AGENTS']
)

# Define the desired lengths for the train and test sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

# Perform the split
train_dataset, test_dataset = random_split(
    dataset, [train_size, test_size]
)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f'train_size: {train_size}\ntest_size: {test_size}')

Loading data from dataset/processed/processed_data.pt...
Loaded 14108 plays
Global context shape: torch.Size([14108, 18])
train_size: 11286
test_size: 2822


In [8]:
PRETRAINED_WGTS = Path('dataset/pretrained/best_heliocentricity_model.pt')

if PRETRAINED_WGTS is None:
    print('Training model from scratch:')
    train_model(model,
                train_loader,
                optimizer,
                model_config,
                device,
                num_epochs=NUM_EPOCHS)
    torch.save(model.state_dict(), 'dataset/pretrained/best_heliocentricity_model.pt')
else:
    print('Loading model from pretrained weights:')
    state_dict = torch.load(PRETRAINED_WGTS, map_location=device)
    model.load_state_dict(state_dict)

Loading model from pretrained weights:


In [9]:
pred = evaluate_model(model, test_loader, device)

Batch 100/177

--- Validation Results ---
Avg Trajectory RMSE: 2.8821 meters
Avg Reconstruction Loss: 14763.0471
Avg KL Divergence: 608.6204


In [10]:
def min_separation_distance(receiver_coords, defense_coords):
    # receiver_coords: (T_pred, 2)
    # defense_coords: (T_pred, N_defenders, 2)
    
    # Calculate distance from receiver to every defender at every frame
    # (T_pred, N_defenders)
    dist_to_defenders = np.linalg.norm(
        receiver_coords[:, np.newaxis, :] - defense_coords, axis=2
    )
    
    # Find the minimum separation at each frame (T_pred,)
    min_dist = np.min(dist_to_defenders, axis=1)
    return min_dist # Actual Attention (A) or Expected Coverage (E)

In [11]:
def compute_heliocentricity(play_data):
    # Data is extracted from the evaluation loop's results_for_H_calc
    Y_truth = play_data['Y_truth']       # (T_pred, N_agents, 2)
    Y_pred_K = play_data['Y_pred_K']     # (K, T_pred, N_agents, 2)
    star_idx = play_data['star_idx']     # Index of the star receiver
    
    # --- 1. Identify Offensive/Defensive Players ---
    # Assuming player 0-10 are Offense, 11-21 are Defense (Adjust based on BDB index)
    def_indices = np.arange(11, 22) 
    
    # --- 2. Calculate Actual Attention (A) ---
    actual_R_coords = Y_truth[:, star_idx, :]
    actual_D_coords = Y_truth[:, def_indices, :]
    A = min_separation_distance(actual_R_coords, actual_D_coords) # (T_pred,)
    
    # --- 3. Calculate Expected Coverage (E) ---
    E_K = []
    for k in range(Y_pred_K.shape[0]):
        # The star receiver's true position is used, but compared to predicted defense
        predicted_D_coords = Y_pred_K[k, :, def_indices, :] 
        
        # E_k is the min separation based on the k-th predicted defensive trajectory
        E_k = min_separation_distance(actual_R_coords, predicted_D_coords)
        E_K.append(E_k)
        
    # E_mean: The average expected minimum separation across all K samples (T_pred,)
    E_mean = np.mean(np.stack(E_K, axis=0), axis=0) 
    
    # --- 4. Calculate Heliocentricity (H) ---
    # H = (E - A) averaged over the prediction window (T_pred)
    H_frame_diff = E_mean - A
    H_score = np.mean(H_frame_diff)
    
    return H_score, H_frame_diff # Return both the scalar and the time-series