In [1]:
# === CELL 1: Imports ===
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

PyTorch: 2.5.1+cu121
CUDA: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:
# === CELL 2: Load data ===
df = pd.read_parquet('data/sequences_continuous_balanced.parquet')
print(f"Sequences: {len(df):,}")
print(f"Goals: {df['goal'].sum()} ({df['goal'].mean()*100:.1f}%)")

# Load vocabulary
with open('data/vocab_continuous.json', 'r') as f:
    type_vocab = json.load(f)

with open('data/id_to_type_continuous.json', 'r') as f:
    id_to_type = json.load(f)
    id_to_type = {int(k): v for k, v in id_to_type.items()}

print(f"Vocab size: {len(type_vocab)}")
print(f"\nFirst sequence:")
print(df['events'].iloc[0][:5])

Sequences: 19,019
Goals: 951 (5.0%)
Vocab size: 7

First sequence:
[{'end_x': 99.0, 'end_y': 74.4, 'type': 'START'}
 {'end_x': 92.9, 'end_y': 73.9, 'type': 'Carry'}
 {'end_x': 90.4, 'end_y': 55.2, 'type': 'Pass'}
 {'end_x': 90.4, 'end_y': 55.4, 'type': 'Carry'}
 {'end_x': 102.8, 'end_y': 54.8, 'type': 'Pass'}]


In [3]:
# === Normalizacja w Dataset ===
class ContinuousEventDataset(Dataset):
    def __init__(self, df, type_vocab, max_length=14):
        self.sequences = df['events'].tolist()
        self.type_vocab = type_vocab
        self.max_length = max_length
        self.pad_id = type_vocab['<pad>']
        # Normalizacja constants
        self.x_max = 120.0
        self.y_max = 80.0
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        events = self.sequences[idx]
        
        # Parse + normalize positions
        types = [self.type_vocab[e['type']] for e in events]
        positions = [
            [e['end_x'] / self.x_max if e['end_x'] is not None else 0.0,
             e['end_y'] / self.y_max if e['end_y'] is not None else 0.0] 
            for e in events
        ]
        has_position = [e['end_x'] is not None for e in events]
        
        # Causal shift
        input_types = types[:-1]
        input_positions = positions[:-1]
        input_mask = has_position[:-1]
        
        target_types = types[1:]
        target_positions = positions[1:]
        target_mask = has_position[1:]
        
        # Pad
        seq_len = len(input_types)
        pad_len = self.max_length - 1 - seq_len
        
        input_types += [self.pad_id] * pad_len
        input_positions += [[0.0, 0.0]] * pad_len
        input_mask += [False] * pad_len
        
        target_types += [-100] * pad_len
        target_positions += [[0.0, 0.0]] * pad_len
        target_mask += [False] * pad_len
        
        return {
            'input_types': torch.tensor(input_types[:self.max_length-1], dtype=torch.long),
            'input_positions': torch.tensor(input_positions[:self.max_length-1], dtype=torch.float),
            'input_mask': torch.tensor(input_mask[:self.max_length-1], dtype=torch.bool),
            'target_types': torch.tensor(target_types[:self.max_length-1], dtype=torch.long),
            'target_positions': torch.tensor(target_positions[:self.max_length-1], dtype=torch.float),
            'target_mask': torch.tensor(target_mask[:self.max_length-1], dtype=torch.bool)
        }

In [4]:
# === CELL 4: Train/Val Split ===
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['goal'],
    random_state=42
)

print(f"Total:    {len(df):,}")
print(f"Train:    {len(train_df):,} ({len(train_df)/len(df)*100:.1f}%)")
print(f"Val:      {len(val_df):,} ({len(val_df)/len(df)*100:.1f}%)")
print(f"\nTrain goals: {train_df['goal'].sum()} ({train_df['goal'].mean()*100:.1f}%)")
print(f"Val goals:   {val_df['goal'].sum()} ({val_df['goal'].mean()*100:.1f}%)")

Total:    19,019
Train:    15,215 (80.0%)
Val:      3,804 (20.0%)

Train goals: 761 (5.0%)
Val goals:   190 (5.0%)


In [5]:
# === CELL 5: Create DataLoaders ===
train_dataset = ContinuousEventDataset(train_df, type_vocab)
val_dataset = ContinuousEventDataset(val_df, type_vocab)

batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches:   {len(val_loader)}")

# Test batch
batch = next(iter(train_loader))
print(f"\nBatch shapes:")
for k, v in batch.items():
    print(f"  {k}: {v.shape}")

Train batches: 951
Val batches:   238

Batch shapes:
  input_types: torch.Size([16, 13])
  input_positions: torch.Size([16, 13, 2])
  input_mask: torch.Size([16, 13])
  target_types: torch.Size([16, 13])
  target_positions: torch.Size([16, 13, 2])
  target_mask: torch.Size([16, 13])


In [6]:
# === CELL 6: Model ===
import math

class ContinuousEventModel(nn.Module):
    def __init__(self, n_types, d_model=256, n_heads=8, n_layers=6):
        super().__init__()
        self.d_model = d_model
        
        # Type embedding
        self.type_embedding = nn.Embedding(n_types, d_model)
        
        # Position encoding (learned MLP)
        self.position_encoder = nn.Sequential(
            nn.Linear(2, d_model // 2),
            nn.ReLU(),
            nn.Linear(d_model // 2, d_model)
        )
        
        # Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        
        # Output heads
        self.type_head = nn.Linear(d_model, n_types)
        self.position_head = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Linear(d_model // 2, 4)  # [x_mean, x_std, y_mean, y_std]
        )
    
    def forward(self, input_types, input_positions, input_mask):
        # Embeddings
        type_emb = self.type_embedding(input_types)  # [B, L, D]
        pos_emb = self.position_encoder(input_positions)  # [B, L, D]
        
        # Mask positions (zero out Shot/GOAL)
        pos_emb = pos_emb * input_mask.unsqueeze(-1).float()
        
        # Combine
        hidden = type_emb + pos_emb  # [B, L, D]
        
        # Causal mask
        seq_len = hidden.size(1)
        causal_mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool().to(hidden.device)
        
        # Transformer
        hidden = self.transformer(hidden, mask=causal_mask)  # [B, L, D]
        
        # Predictions
        type_logits = self.type_head(hidden)  # [B, L, n_types]
        pos_pred = self.position_head(hidden)  # [B, L, 4]
        
        x_mean, x_std, y_mean, y_std = pos_pred.chunk(4, dim=-1)
        x_std = torch.exp(x_std)  # positive
        y_std = torch.exp(y_std)
        
        return type_logits, x_mean.squeeze(-1), x_std.squeeze(-1), y_mean.squeeze(-1), y_std.squeeze(-1)

# Create model
model = ContinuousEventModel(
    n_types=len(type_vocab),
    d_model=512,
    n_heads=8,
    n_layers=12
).cuda()
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

Parameters: 38,100,491


In [7]:
# Test forward pass
batch = next(iter(train_loader))
input_types = batch['input_types'].cuda()
input_positions = batch['input_positions'].cuda()
input_mask = batch['input_mask'].cuda()

with torch.no_grad():
    type_logits, x_mean, x_std, y_mean, y_std = model(input_types, input_positions, input_mask)

print("Output shapes:")
print(f"  type_logits: {type_logits.shape}")
print(f"  x_mean: {x_mean.shape}")
print(f"  x_std: {x_std.shape}")
print(f"  y_mean: {y_mean.shape}")
print(f"  y_std: {y_std.shape}")

print(f"\nSample predictions:")
print(f"  x_std range: [{x_std.min():.2f}, {x_std.max():.2f}]")
print(f"  y_std range: [{y_std.min():.2f}, {y_std.max():.2f}]")

Output shapes:
  type_logits: torch.Size([16, 13, 7])
  x_mean: torch.Size([16, 13])
  x_std: torch.Size([16, 13])
  y_mean: torch.Size([16, 13])
  y_std: torch.Size([16, 13])

Sample predictions:
  x_std range: [0.78, 1.36]
  y_std range: [0.95, 1.53]


In [8]:
# === CELL 7: Loss ===
def gaussian_nll_loss(mean, std, target, mask):
    """Gaussian negative log-likelihood"""
    loss = 0.5 * torch.log(2 * math.pi * std**2) + (target - mean)**2 / (2 * std**2)
    loss = loss * mask.float()  # apply mask
    return loss.sum() / mask.sum().clamp(min=1)

def compute_loss(model, batch):
    input_types = batch['input_types'].cuda()
    input_positions = batch['input_positions'].cuda()
    input_mask = batch['input_mask'].cuda()
    target_types = batch['target_types'].cuda()
    target_positions = batch['target_positions'].cuda()
    target_mask = batch['target_mask'].cuda()
    
    # Forward
    type_logits, x_mean, x_std, y_mean, y_std = model(input_types, input_positions, input_mask)
    
    # Type loss (cross entropy)
    type_loss = nn.functional.cross_entropy(
        type_logits.reshape(-1, type_logits.size(-1)),
        target_types.reshape(-1),
        ignore_index=-100
    )
    
    # Position losses (Gaussian NLL, masked)
    x_loss = gaussian_nll_loss(x_mean, x_std, target_positions[..., 0], target_mask)
    y_loss = gaussian_nll_loss(y_mean, y_std, target_positions[..., 1], target_mask)
    
    # Combined (equal weights for now)
    total_loss = type_loss + x_loss + y_loss
    
    return total_loss, type_loss, x_loss, y_loss

# Test
batch = next(iter(train_loader))
total_loss, type_loss, x_loss, y_loss = compute_loss(model, batch)
print(f"Losses:")
print(f"  type: {type_loss.item():.4f}")
print(f"  x:    {x_loss.item():.4f}")
print(f"  y:    {y_loss.item():.4f}")
print(f"  total: {total_loss.item():.4f}")

Losses:
  type: 1.9671
  x:    1.2997
  y:    1.2777
  total: 4.5445


In [9]:
# === CELL 8: Training ===
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=1e-4)
epochs = 15

print(f"Starting training...")
print(f"Epochs: {epochs}\n")

for epoch in range(epochs):
    model.train()
    total_loss = 0
    total_type_loss = 0
    total_x_loss = 0
    total_y_loss = 0
    
    for batch_idx, batch in enumerate(train_loader):
        loss, type_loss, x_loss, y_loss = compute_loss(model, batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        total_type_loss += type_loss.item()
        total_x_loss += x_loss.item()
        total_y_loss += y_loss.item()
        
        if (batch_idx + 1) % 100 == 0:
            avg_loss = total_loss / (batch_idx + 1)
            print(f"Epoch {epoch+1}/{epochs} | Batch {batch_idx+1}/{len(train_loader)} | Loss: {avg_loss:.4f}")
    
    # Epoch summary
    n = len(train_loader)
    print(f"âœ… Epoch {epoch+1} | Loss: {total_loss/n:.4f} (type: {total_type_loss/n:.4f}, x: {total_x_loss/n:.4f}, y: {total_y_loss/n:.4f})\n")

print("ðŸŽ‰ Training finished!")

Starting training...
Epochs: 15

Epoch 1/15 | Batch 100/951 | Loss: 1.3628
Epoch 1/15 | Batch 200/951 | Loss: 1.2014
Epoch 1/15 | Batch 300/951 | Loss: 1.0625
Epoch 1/15 | Batch 400/951 | Loss: 0.8580
Epoch 1/15 | Batch 500/951 | Loss: 0.6844
Epoch 1/15 | Batch 600/951 | Loss: 0.5559
Epoch 1/15 | Batch 700/951 | Loss: 0.4534
Epoch 1/15 | Batch 800/951 | Loss: 0.3658
Epoch 1/15 | Batch 900/951 | Loss: 0.2940
âœ… Epoch 1 | Loss: 0.2596 (type: 0.8325, x: -0.4102, y: -0.1627)

Epoch 2/15 | Batch 100/951 | Loss: -0.2665
Epoch 2/15 | Batch 200/951 | Loss: -0.2861
Epoch 2/15 | Batch 300/951 | Loss: -0.2893
Epoch 2/15 | Batch 400/951 | Loss: -0.2981
Epoch 2/15 | Batch 500/951 | Loss: -0.3179
Epoch 2/15 | Batch 600/951 | Loss: -0.3246
Epoch 2/15 | Batch 700/951 | Loss: -0.3322
Epoch 2/15 | Batch 800/951 | Loss: -0.3374
Epoch 2/15 | Batch 900/951 | Loss: -0.3361
âœ… Epoch 2 | Loss: -0.3363 (type: 0.7489, x: -0.6578, y: -0.4274)

Epoch 3/15 | Batch 100/951 | Loss: -0.3985
Epoch 3/15 | Batch 200/9

In [10]:
# === CELL 9: Validation ===
model.eval()
val_loss = 0
val_type_loss = 0
val_x_loss = 0
val_y_loss = 0

with torch.no_grad():
    for batch in val_loader:
        loss, type_loss, x_loss, y_loss = compute_loss(model, batch)
        val_loss += loss.item()
        val_type_loss += type_loss.item()
        val_x_loss += x_loss.item()
        val_y_loss += y_loss.item()

n = len(val_loader)
print(f"Validation Results:")
print(f"  Total: {val_loss/n:.4f}")
print(f"  Type:  {val_type_loss/n:.4f}")
print(f"  X:     {val_x_loss/n:.4f}")
print(f"  Y:     {val_y_loss/n:.4f}")

Validation Results:
  Total: -0.6098
  Type:  0.6743
  X:     -0.7523
  Y:     -0.5318


In [11]:
torch.save(model.state_dict(), 'artifacts/continuous_model.pt')

In [12]:
# === CELL 10: Monte Carlo xT ===
def calculate_xT_continuous(model, start_events, type_vocab, id_to_type,
                            n_rollouts=1000, n_steps=5, device='cuda'):
    """
    Monte Carlo xT dla continuous model.
    start_events: lista dict [{'type': 'START', 'end_x': 85.0, 'end_y': 15.0}, ...]
    """
    model.eval()
    goal_count = 0
    goal_id = type_vocab['GOAL']
    pad_id = type_vocab['<pad>']
    
    for _ in range(n_rollouts):
        # Skopiuj start sequence
        current_types = [type_vocab[e['type']] for e in start_events]
        current_positions = [[e['end_x']/120.0, e['end_y']/80.0] for e in start_events]
        current_masks = [e['end_x'] is not None for e in start_events]
        
        for step in range(n_steps):
            # Przygotuj input (ostatnie 13)
            inp_types = current_types[-13:] if len(current_types) > 13 else current_types
            inp_pos = current_positions[-13:] if len(current_positions) > 13 else current_positions
            inp_mask = current_masks[-13:] if len(current_masks) > 13 else current_masks
            
            # Pad
            pad_len = 13 - len(inp_types)
            inp_types = inp_types + [pad_id] * pad_len
            inp_pos = inp_pos + [[0.0, 0.0]] * pad_len
            inp_mask = inp_mask + [False] * pad_len
            
            # Tensors
            inp_types_t = torch.tensor([inp_types], dtype=torch.long).to(device)
            inp_pos_t = torch.tensor([inp_pos], dtype=torch.float).to(device)
            inp_mask_t = torch.tensor([inp_mask], dtype=torch.bool).to(device)
            
            # Forward
            with torch.no_grad():
                type_logits, x_mean, x_std, y_mean, y_std = model(inp_types_t, inp_pos_t, inp_mask_t)
                
                # Ostatnia pozycja w sekwencji
                idx = len(current_types) - 1
                if idx >= 13:
                    idx = 12
                
                # Sample type
                probs = torch.softmax(type_logits[0, idx], dim=0)
                next_type = torch.multinomial(probs, 1).item()
                
                # Sample position (Gaussian)
                if next_type not in [type_vocab['Shot'], goal_id, type_vocab['NO_GOAL']]:
                    x_sample = torch.normal(x_mean[0, idx], x_std[0, idx]).item()
                    y_sample = torch.normal(y_mean[0, idx], y_std[0, idx]).item()
                    # Clip to [0, 1]
                    x_sample = max(0.0, min(1.0, x_sample))
                    y_sample = max(0.0, min(1.0, y_sample))
                    has_pos = True
                else:
                    x_sample, y_sample = 0.0, 0.0
                    has_pos = False
            
            # Append
            current_types.append(next_type)
            current_positions.append([x_sample, y_sample])
            current_masks.append(has_pos)
            
            # Check goal
            if next_type == goal_id:
                goal_count += 1
                break
    
    return goal_count / n_rollouts



In [13]:
# Test
start_low = [
    {'type': 'START', 'end_x': 40.0, 'end_y': 10.0},
    {'type': 'Pass', 'end_x': 45.0, 'end_y': 15.0}
]
start_high = [
    {'type': 'START', 'end_x': 100.0, 'end_y': 35.0},
    {'type': 'Pass', 'end_x': 105.0, 'end_y': 40.0}
]

xT_low = calculate_xT_continuous(model, start_low, type_vocab, id_to_type, n_rollouts=500)
xT_high = calculate_xT_continuous(model, start_high, type_vocab, id_to_type, n_rollouts=500)

print(f"xT Low zone:  {xT_low*100:.2f}%")
print(f"xT High zone: {xT_high*100:.2f}%")
print(f"Sanity: {xT_low < xT_high}")

xT Low zone:  0.80%
xT High zone: 33.20%
Sanity: True


In [14]:
import numpy as np
# SprawdÅº dÅ‚ugoÅ›ci sekwencji w val_df
val_lengths = val_df['sequence_length'].values
print(f"Min: {val_lengths.min()}")
print(f"Max: {val_lengths.max()}")
print(f"Mean: {val_lengths.mean():.1f}")
print(f"Median: {np.median(val_lengths):.0f}")
print(f"\nDystrybucja:")
print(f"  < 5:  {(val_lengths < 5).sum()}")
print(f"  5-10: {((val_lengths >= 5) & (val_lengths < 10)).sum()}")
print(f"  >=10: {(val_lengths >= 10).sum()}")

Min: 3
Max: 14
Mean: 7.6
Median: 7

Dystrybucja:
  < 5:  1261
  5-10: 1245
  >=10: 1298


In [16]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, average_precision_score

def calculate_xT_batched(model, start_events_batch, type_vocab, id_to_type,
                         n_rollouts=100, n_steps=5, device='cuda'):
    model.eval()
    batch_size = len(start_events_batch)
    goal_id = type_vocab['GOAL']
    pad_id = type_vocab['<pad>']
    
    all_types = []
    all_positions = []
    all_masks = []
    
    for start_events in start_events_batch:
        for _ in range(n_rollouts):
            types = [type_vocab[e['type']] for e in start_events]
            positions = [
                [e['end_x']/120.0 if e['end_x'] is not None else 0.0,
                 e['end_y']/80.0 if e['end_y'] is not None else 0.0] 
                for e in start_events
            ]
            masks = [e['end_x'] is not None for e in start_events]
            all_types.append(types)
            all_positions.append(positions)
            all_masks.append(masks)
    
    goal_counts = torch.zeros(batch_size, device=device)
    
    for step in range(n_steps):
        padded_types = []
        padded_positions = []
        padded_masks = []
        
        for types, positions, masks in zip(all_types, all_positions, all_masks):
            inp = types[-13:] if len(types) > 13 else types
            pos = positions[-13:] if len(positions) > 13 else positions
            msk = masks[-13:] if len(masks) > 13 else masks
            
            pad_len = 13 - len(inp)
            inp = inp + [pad_id] * pad_len
            pos = pos + [[0.0, 0.0]] * pad_len
            msk = msk + [False] * pad_len
            
            padded_types.append(inp)
            padded_positions.append(pos)
            padded_masks.append(msk)
        
        types_t = torch.tensor(padded_types, dtype=torch.long, device=device)
        pos_t = torch.tensor(padded_positions, dtype=torch.float, device=device)
        mask_t = torch.tensor(padded_masks, dtype=torch.bool, device=device)
        
        with torch.no_grad():
            type_logits, x_mean, x_std, y_mean, y_std = model(types_t, pos_t, mask_t)
            
            seq_lens = torch.tensor([min(len(t)-1, 12) for t in all_types], device=device)
            batch_idx = torch.arange(len(all_types), device=device)
            
            probs = torch.softmax(type_logits[batch_idx, seq_lens], dim=-1)
            next_types = torch.multinomial(probs, 1).squeeze(-1)
            
            x_samples = torch.normal(x_mean[batch_idx, seq_lens], x_std[batch_idx, seq_lens]).clamp(0, 1)
            y_samples = torch.normal(y_mean[batch_idx, seq_lens], y_std[batch_idx, seq_lens]).clamp(0, 1)
        
        for i, next_type in enumerate(next_types):
            all_types[i].append(next_type.item())
            if next_type not in [type_vocab['Shot'], goal_id, type_vocab['NO_GOAL']]:
                all_positions[i].append([x_samples[i].item(), y_samples[i].item()])
                all_masks[i].append(True)
            else:
                all_positions[i].append([0.0, 0.0])
                all_masks[i].append(False)
        
        goals_mask = (next_types == goal_id).view(batch_size, n_rollouts)
        goal_counts += goals_mask.sum(dim=1).float()
    
    return (goal_counts / n_rollouts).cpu().numpy()


def evaluate_xT_on_dataset(model, df, type_vocab, id_to_type, n_rollouts=100, batch_size=32):
    predictions = []
    labels = df['goal'].values
    
    for batch_start in range(0, len(df), batch_size):
        batch_end = min(batch_start + batch_size, len(df))
        
        # Przygotuj batch startÃ³w
        start_events_batch = [df['events'].iloc[i][:2] for i in range(batch_start, batch_end)]
        
        # Batched Monte Carlo
        xT_batch = calculate_xT_batched(model, start_events_batch, type_vocab, id_to_type, n_rollouts)
        predictions.extend(xT_batch)
        
        print(f"Processed {batch_end}/{len(df)}")
    
    auc = roc_auc_score(labels, predictions)
    return auc, predictions, labels

# Test
auc, preds, labs = evaluate_xT_on_dataset(model, val_df.iloc[:100], type_vocab, id_to_type, 
                                          n_rollouts=100, batch_size=32)
print(f"ROC-AUC: {auc:.4f}")

Processed 32/100
Processed 64/100
Processed 96/100
Processed 100/100
ROC-AUC: 0.3830


In [17]:
auc, preds, labs = evaluate_xT_on_dataset(model, val_df, type_vocab, id_to_type, 
                                          n_rollouts=500, batch_size=32)
print(f"ROC-AUC: {auc:.4f}")

Processed 32/3804
Processed 64/3804


KeyboardInterrupt: 

In [None]:
# SprawdÅº None w pierwszych 100 sekwencjach
for i in range(300):
    events = val_df['events'].iloc[i][:2]
    for j, e in enumerate(events):
        if e['end_x'] is None:
            print(f"Seq {i}, Event {j}: {e}")