In [1]:
# pip install peft

In [2]:
# === CELL 1: Imports ===
import pandas as pd
import json
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from peft import LoraConfig, get_peft_model, TaskType  # ‚¨ÖÔ∏è DODAJ TO


print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

PyTorch: 2.5.1+cu121
CUDA available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [3]:
# Sequences
df = pd.read_parquet('data/sequences_balanced_1.parquet')
print(f"Sequences: {len(df):,}")
print(f"Goals: {df['goal'].sum()} ({df['goal'].mean()*100:.1f}%)")

# Vocabulary
with open('data/vocab_1.json', 'r') as f:
    vocab = json.load(f)

with open('data/id_to_token_1.json', 'r') as f:
    id_to_token = json.load(f)
    id_to_token = {int(k): v for k, v in id_to_token.items()}

print(f"Vocab size: {len(vocab)}")
print(f"\nSample sequence: {df['full_sequence'].iloc[0]}")

Sequences: 18,988
Goals: 951 (5.0%)
Vocab size: 1272

Sample sequence: ['START_LOC_20_5' 'Pass_LOC_5_25' 'Carry_LOC_5_35' 'Pass_LOC_45_65'
 'Pass_LOC_35_55' 'Carry_LOC_35_55' 'Carry_LOC_50_60' 'Pass_LOC_95_45'
 'Carry_LOC_90_45' 'Pass_LOC_105_35' 'Carry_LOC_115_35' 'SHOT' 'GOAL']


In [4]:
class CausalLMDataset(Dataset):
    def __init__(self, df, vocab, max_length=14):
        self.sequences = df['full_sequence'].tolist()
        self.vocab = vocab
        self.max_length = max_length
        self.pad_id = vocab['<pad>']
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        tokens = self.sequences[idx]
        ids = [self.vocab[token] for token in tokens]
        
        # Shifting
        input_ids = ids[:-1]
        labels = ids[1:]
        
        # Padding
        pad_length = self.max_length - 1 - len(input_ids)
        input_ids = input_ids + [self.pad_id] * pad_length
        labels = labels + [-100] * pad_length
        
        # Truncate
        input_ids = input_ids[:self.max_length - 1]
        labels = labels[:self.max_length - 1]
        
        return {
            'input_ids': torch.tensor(input_ids, dtype=torch.long),
            'labels': torch.tensor(labels, dtype=torch.long)
        }

# Create dataset
dataset = CausalLMDataset(df, vocab)
print(f"Dataset size: {len(dataset)}")

Dataset size: 18988


In [5]:
# === CELL 4: Stratified Train/Val Split ===
from sklearn.model_selection import train_test_split

# Split zachowujƒÖc proporcje goali
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['goal'], 
    random_state=42
)

print(f"=== DATASET SPLIT ===")
print(f"Total:    {len(df):,}")
print(f"Train:    {len(train_df):,} ({len(train_df)/len(df)*100:.1f}%)")
print(f"Val:      {len(val_df):,} ({len(val_df)/len(df)*100:.1f}%)")

print(f"\n=== GOAL DISTRIBUTION ===")
print(f"Train goals: {train_df['goal'].sum()} ({train_df['goal'].mean()*100:.1f}%)")
print(f"Val goals:   {val_df['goal'].sum()} ({val_df['goal'].mean()*100:.1f}%)")

=== DATASET SPLIT ===
Total:    18,988
Train:    15,190 (80.0%)
Val:      3,798 (20.0%)

=== GOAL DISTRIBUTION ===
Train goals: 761 (5.0%)
Val goals:   190 (5.0%)


In [6]:
# Create datasets z podzielonych DataFrames
train_dataset = CausalLMDataset(train_df, vocab)
val_dataset = CausalLMDataset(val_df, vocab)

print(f"\n=== DATASETS ===")
print(f"Train dataset: {len(train_dataset):,}")
print(f"Val dataset:   {len(val_dataset):,}")

# DataLoaders
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

print(f"\nTrain batches: {len(train_loader)}")
print(f"Val batches:   {len(val_loader)}")

# Test
batch = next(iter(train_loader))
print(f"\nBatch shapes:")
print(f"  input_ids: {batch['input_ids'].shape}")
print(f"  labels:    {batch['labels'].shape}")


=== DATASETS ===
Train dataset: 15,190
Val dataset:   3,798

Train batches: 950
Val batches:   238

Batch shapes:
  input_ids: torch.Size([16, 13])
  labels:    torch.Size([16, 13])


In [7]:
from transformers import GPT2LMHeadModel, GPT2Config

# === MODEL CONFIG ===
config = GPT2Config(
    vocab_size=len(vocab),        # nasze tokeny (853)
    n_positions=14,                # max sequence length
    n_ctx=14,                      # context window
    n_embd=768,                    # embedding dimension (GPT-2 small)
    n_layer=12,                    # transformer layers
    n_head=12                      # attention heads
)

# === CREATE MODEL ===
model = GPT2LMHeadModel(config)
model = model.cuda()  # przenie≈õ na GPU

print(f"Model created!")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

Model created!
Parameters: 86,043,648
Trainable: 86,043,648


In [8]:
from torch.optim import AdamW

# === OPTIMIZER ===
learning_rate = 5e-4
optimizer = AdamW(model.parameters(), lr=learning_rate)

print(f"Optimizer: AdamW")
print(f"Learning rate: {learning_rate}")

Optimizer: AdamW
Learning rate: 0.0005


In [None]:
# === TRAINING CONFIG ===
epochs = 3
device = 'cuda'

print(f"Starting training...")
print(f"Epochs: {epochs}")
print(f"Device: {device}\n")

# === TRAINING ===
model.train()

for epoch in range(epochs):
    total_loss = 0
    
    for batch_idx, batch in enumerate(train_loader):
        # Move to GPU
        input_ids = batch['input_ids'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Print co 50 batchy
        if (batch_idx + 1) % 50 == 0:
            avg_loss = total_loss / (batch_idx + 1)
            print(f"Epoch {epoch+1}/{epochs} | Batch {batch_idx+1}/{len(train_loader)} | Loss: {avg_loss:.4f}")
    
    # Epoch summary
    avg_loss = total_loss / len(train_loader)
    print(f"‚úÖ Epoch {epoch+1} done | Avg Loss: {avg_loss:.4f}\n")

print("üéâ Training finished!")

Starting training...
Epochs: 3
Device: cuda



`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Epoch 1/3 | Batch 50/950 | Loss: 6.4649
Epoch 1/3 | Batch 100/950 | Loss: 6.2264
Epoch 1/3 | Batch 150/950 | Loss: 6.1112
Epoch 1/3 | Batch 200/950 | Loss: 6.0555
Epoch 1/3 | Batch 250/950 | Loss: 6.0279
Epoch 1/3 | Batch 300/950 | Loss: 5.9901
Epoch 1/3 | Batch 350/950 | Loss: 5.9583
Epoch 1/3 | Batch 400/950 | Loss: 5.9298
Epoch 1/3 | Batch 450/950 | Loss: 5.9012
Epoch 1/3 | Batch 500/950 | Loss: 5.8715
Epoch 1/3 | Batch 550/950 | Loss: 5.8462
Epoch 1/3 | Batch 600/950 | Loss: 5.8279
Epoch 1/3 | Batch 650/950 | Loss: 5.8033
Epoch 1/3 | Batch 700/950 | Loss: 5.7844
Epoch 1/3 | Batch 750/950 | Loss: 5.7662
Epoch 1/3 | Batch 800/950 | Loss: 5.7521
Epoch 1/3 | Batch 850/950 | Loss: 5.7368
Epoch 1/3 | Batch 900/950 | Loss: 5.7260
Epoch 1/3 | Batch 950/950 | Loss: 5.7132
‚úÖ Epoch 1 done | Avg Loss: 5.7132

Epoch 2/3 | Batch 50/950 | Loss: 5.4817
Epoch 2/3 | Batch 100/950 | Loss: 5.4540


In [None]:
# === TEST PREDICTION ===
model.eval()

# We≈∫ przyk≈Çad z val
sample = val_dataset[0]
input_ids = sample['input_ids'].unsqueeze(0).to(device)  # [1, 13]

with torch.no_grad():
    outputs = model(input_ids)
    logits = outputs.logits  # [1, 13, 853]
    
    # Prawdopodobie≈Ñstwa dla ostatniej pozycji
    last_logits = logits[0, -1, :]  # [853]
    probs = torch.softmax(last_logits, dim=0)
    
    # Top-5 najbardziej prawdopodobnych token√≥w
    top5_probs, top5_ids = torch.topk(probs, 5)
    
    print("=== PREDICTION ===")
    print(f"Input: {[id_to_token[id] for id in input_ids[0].tolist() if id != 0][:5]}...")
    print(f"\nTop 5 next tokens:")
    for prob, id in zip(top5_probs, top5_ids):
        token = id_to_token[id.item()]
        print(f"  {token:30s} {prob.item()*100:.2f}%")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# === ANALIZA D≈ÅUGO≈öCI SEKWENCJI W VAL SET ===

print("=== VALIDATION SET - SEQUENCE LENGTH ANALYSIS ===\n")

# Podstawowe statystyki
print("Basic stats:")
print(val_df['sequence_length'].describe())

print("\n" + "="*50)
print("DISTRIBUTION BY LENGTH:")
print("="*50)

# Rozk≈Çad szczeg√≥≈Çowy
length_dist = val_df['sequence_length'].value_counts().sort_index()

for length, count in length_dist.items():
    pct = count / len(val_df) * 100
    bar = "‚ñà" * int(pct / 2)  # wizualizacja
    print(f"Length {length:2d}: {count:4d} ({pct:5.2f}%) {bar}")

In [None]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, average_precision_score
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F_torch  # ‚Üê DODAJ TO!
from pyspark.sql import functions as F  # ‚Üê to zostaje dla PySpark

# === FUNKCJA 1: Find Closest Token ===
def find_closest_token(target_token, vocab, token_type='START_LOC'):
    """Znajd≈∫ najbli≈ºszy token w vocab."""
    parts = target_token.split('_')
    target_x = int(parts[2])
    target_y = int(parts[3])
    
    available = [t for t in vocab.keys() if t.startswith(token_type)]
    
    if not available:
        return None, float('inf')
    
    min_dist = float('inf')
    closest = None
    
    for token in available:
        parts = token.split('_')
        x = int(parts[2])
        y = int(parts[3])
        dist = ((x - target_x)**2 + (y - target_y)**2)**0.5
        
        if dist < min_dist:
            min_dist = dist
            closest = token
    
    return closest, min_dist


# === FUNKCJA 2: Monte Carlo FAST (batched) ===
def calculate_xT_montecarlo_safe(model, start_tokens, vocab, id_to_token, 
                                   n_rollouts=200, n_steps=5, device='cuda'):
    """
    Monte Carlo xT z CONSTRAINED DECODING.
    """
    
    def get_valid_token_ids(previous_token, vocab):
        """Zwr√≥ƒá IDs dozwolonych token√≥w."""
        if previous_token == 'SHOT':
            valid = ['GOAL', 'NO_GOAL']
        elif previous_token in ['GOAL', 'NO_GOAL']:
            valid = []  # koniec
        else:
            valid = [t for t in vocab.keys() 
                     if t.startswith('Pass') or t.startswith('Carry') or t == 'SHOT']
        
        return [vocab[t] for t in valid if t in vocab]
    
    model.eval()
    goal_count = 0
    
    with torch.no_grad():
        for rollout in range(n_rollouts):
            current_tokens = start_tokens.copy()
            
            for step in range(n_steps):
                # Encode
                input_ids = torch.tensor([vocab[t] for t in current_tokens]).unsqueeze(0).to(device)
                
                # Forward
                outputs = model(input_ids)
                logits = outputs.logits[0, -1, :]  # ostatni token
                
                # === CONSTRAINED DECODING ===
                previous_token = current_tokens[-1]
                valid_ids = get_valid_token_ids(previous_token, vocab)
                
                if not valid_ids:  # koniec (po GOAL/NO_GOAL)
                    break
                
                # Maskuj
                mask = torch.zeros_like(logits)
                mask[valid_ids] = 1.0
                logits_masked = logits + (mask - 1.0) * 1e10  # -inf dla niedozwolonych
                
                # Sample
                probs = F_torch.softmax(logits_masked, dim=-1)  # ‚úÖ ZMIENIONE!
                next_id = torch.multinomial(probs, 1).item()
                next_token = id_to_token[next_id]
                
                current_tokens.append(next_token)
                
                # Check outcome
                if next_token == 'GOAL':
                    goal_count += 1
                    break
                elif next_token == 'NO_GOAL':
                    break
    
    return goal_count / n_rollouts


# === FUNKCJA 3: Evaluate na val set ===
def evaluate_xT_on_val_set(model, val_df, vocab, id_to_token, 
                            n_rollouts=200, n_steps=5, device='cuda'):
    """
    Oblicz xT dla ca≈Çego val set.
    U≈ºywa FAST (batched) implementacji calculate_xT_montecarlo_safe.
    """
    model.eval()
    
    predictions = []
    true_labels = []
    
    print(f"Evaluating on {len(val_df)} validation sequences...")
    print(f"Rollouts per sequence: {n_rollouts}")
    
    for idx in range(len(val_df)):
        sequence = val_df['full_sequence'].iloc[idx]
        true_label = val_df['goal'].iloc[idx]
        
        if len(sequence) == 3:
            start_tokens = list(sequence[:2])  # 2 tokeny dla length=3
        else:
            start_tokens = list(sequence[:3])  # 3 tokeny dla length>=4
        
        try:
            xT = calculate_xT_montecarlo_safe(
                model, start_tokens, vocab, id_to_token,
                n_rollouts=n_rollouts, n_steps=n_steps, device=device
            )
            predictions.append(xT)
            true_labels.append(true_label)
        except Exception as e:
            print(f"‚ö†Ô∏è  Error on sequence {idx}: {e}")
            continue
        
        if (idx + 1) % 100 == 0:
            avg_xt = np.mean(predictions)
            print(f"Processed {idx+1}/{len(val_df)} | Avg xT: {avg_xt*100:.2f}%")
    
    return np.array(predictions), np.array(true_labels)

In [None]:
predictions, labels = evaluate_xT_on_val_set(
    model, val_df, vocab, id_to_token,
    n_rollouts=100,
    n_steps=5
)

# Metrics
roc_auc = roc_auc_score(labels, predictions)
print(f"\nüéØ ROC-AUC: {roc_auc:.4f}")

In [None]:
import mlflow
import mlflow.pytorch
from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, precision_recall_curve
import pandas as pd
import json
import numpy as np

# === SETUP MLFLOW ===
mlflow.set_tracking_uri("file:./mlruns")  # lokalny folder
mlflow.set_experiment("xT_LLM_experiments")
import os

# Utw√≥rz folder artifacts
os.makedirs('artifacts', exist_ok=True)
print("‚úÖ Artifacts folder created")
print("‚úÖ MLflow configured")

# === START RUN ===
with mlflow.start_run(run_name="xT_verb"):
    
    print("üìù Logging parameters...")
    
    # === LOG PARAMETERS ===
    # Model config
    mlflow.log_param("model_architecture", "GPT-2 Small")
    mlflow.log_param("vocab_size", len(vocab))
    mlflow.log_param("max_seq_length", 14)
    mlflow.log_param("model_parameters", sum(p.numel() for p in model.parameters()))
    
    # Training config
    mlflow.log_param("epochs", 3)
    mlflow.log_param("learning_rate", 5e-4)
    mlflow.log_param("batch_size", 16)
    mlflow.log_param("optimizer", "AdamW")
    mlflow.log_param("loss_function", "CrossEntropyLoss")
    
    # Data config
    mlflow.log_param("train_sequences", len(train_df))
    mlflow.log_param("val_sequences", len(val_df))
    mlflow.log_param("train_goal_rate", f"{train_df['goal'].mean()*100:.2f}%")
    mlflow.log_param("val_goal_rate", f"{val_df['goal'].mean()*100:.2f}%")
    mlflow.log_param("leagues", "SerieA_2015_2016")
    
    # Evaluation config
    mlflow.log_param("uwagi_podejscie", "obr√≥cone warto≈õci x,y; brak kƒÖta strza≈Çu")

    
    print("üìä Logging metrics...")
    
    # === CALCULATE & LOG METRICS ===
    roc_auc = roc_auc_score(labels, predictions)
    avg_precision = average_precision_score(labels, predictions)
    brier = brier_score_loss(labels, predictions)
    
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("avg_precision", avg_precision)
    mlflow.log_metric("brier_score", brier)
    mlflow.log_metric("mean_predicted_xT", predictions.mean())
    mlflow.log_metric("std_predicted_xT", predictions.std())
    mlflow.log_metric("min_predicted_xT", predictions.min())
    mlflow.log_metric("max_predicted_xT", predictions.max())
    mlflow.log_metric("median_predicted_xT", float(np.median(predictions)))
    
    # Class-specific metrics
    mlflow.log_metric("mean_xT_for_goals", predictions[labels==1].mean())
    mlflow.log_metric("mean_xT_for_no_goals", predictions[labels==0].mean())
    
    print("üìà Creating plots...")
    
    # === CREATE & LOG PLOTS ===
    
    # 1. ROC Curve
    fig, ax = plt.subplots(figsize=(8, 6))
    fpr, tpr, _ = roc_curve(labels, predictions)
    ax.plot(fpr, tpr, linewidth=2, label=f'Model (AUC={roc_auc:.3f})')
    ax.plot([0, 1], [0, 1], 'k--', linewidth=1, label='Random')
    ax.set_xlabel('False Positive Rate', fontsize=12)
    ax.set_ylabel('True Positive Rate', fontsize=12)
    ax.set_title('ROC Curve', fontsize=14, fontweight='bold')
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('artifacts/roc_curve.png', dpi=150)
    mlflow.log_artifact('artifacts/roc_curve.png')
    plt.close()
    
    # 2. Precision-Recall Curve
    fig, ax = plt.subplots(figsize=(8, 6))
    precision, recall, _ = precision_recall_curve(labels, predictions)
    baseline = labels.mean()
    ax.plot(recall, precision, linewidth=2, label=f'Model (AP={avg_precision:.3f})')
    ax.axhline(y=baseline, color='k', linestyle='--', linewidth=1, 
               label=f'Baseline ({baseline*100:.1f}%)')
    ax.set_xlabel('Recall', fontsize=12)
    ax.set_ylabel('Precision', fontsize=12)
    ax.set_title('Precision-Recall Curve', fontsize=14, fontweight='bold')
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('artifacts/pr_curve.png', dpi=150)
    mlflow.log_artifact('artifacts/pr_curve.png')
    plt.close()
    
    # 3. Prediction Distribution
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.hist(predictions[labels==0], bins=40, alpha=0.6, label='No Goal', 
            color='blue', density=True, edgecolor='black', linewidth=0.5)
    ax.hist(predictions[labels==1], bins=40, alpha=0.6, label='Goal', 
            color='red', density=True, edgecolor='black', linewidth=0.5)
    ax.set_xlabel('Predicted xT', fontsize=12)
    ax.set_ylabel('Density', fontsize=12)
    ax.set_title('Prediction Distribution by Outcome', fontsize=14, fontweight='bold')
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.savefig('artifacts/prediction_dist.png', dpi=150)
    mlflow.log_artifact('artifacts/prediction_dist.png')
    plt.close()
    
    # 4. Calibration Plot
    fig, ax = plt.subplots(figsize=(8, 6))
    n_bins = 10
    bin_edges = np.linspace(0, predictions.max(), n_bins + 1)
    bin_centers = []
    actual_rates = []
    
    for i in range(n_bins):
        mask = (predictions >= bin_edges[i]) & (predictions < bin_edges[i+1])
        if mask.sum() > 0:
            bin_centers.append((bin_edges[i] + bin_edges[i+1]) / 2)
            actual_rates.append(labels[mask].mean())
    
    ax.plot(bin_centers, actual_rates, 'o-', linewidth=2, markersize=8, label='Model')
    ax.plot([0, max(bin_centers)], [0, max(bin_centers)], 'k--', linewidth=1, label='Perfect calibration')
    ax.set_xlabel('Predicted xT (binned)', fontsize=12)
    ax.set_ylabel('Actual Goal Rate', fontsize=12)
    ax.set_title('Calibration Plot', fontsize=14, fontweight='bold')
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('artifacts/calibration.png', dpi=150)
    mlflow.log_artifact('artifacts/calibration.png')
    plt.close()
    
    print("üíæ Saving artifacts...")
    
    # === SAVE DATA ARTIFACTS ===
    
    # Predictions & labels
    results_df = pd.DataFrame({
        'prediction': predictions,
        'label': labels,
        'sequence_index': range(len(predictions))
    })
    results_df.to_csv('artifacts/predictions.csv', index=False)
    mlflow.log_artifact('artifacts/predictions.csv')
    
    # Vocabulary
    with open('artifacts/vocab.json', 'w') as f:
        json.dump(vocab, f, indent=2)
    mlflow.log_artifact('artifacts/vocab.json')
    
    # Summary stats
    summary = {
        'model': 'GPT-2 Small baseline',
        'metrics': {
            'roc_auc': float(roc_auc),
            'avg_precision': float(avg_precision),
            'brier_score': float(brier)
        },
        'predictions': {
            'mean': float(predictions.mean()),
            'std': float(predictions.std()),
            'min': float(predictions.min()),
            'max': float(predictions.max())
        },
        'separation': {
            'mean_xT_goals': float(predictions[labels==1].mean()),
            'mean_xT_no_goals': float(predictions[labels==0].mean()),
            'difference': float(predictions[labels==1].mean() - predictions[labels==0].mean())
        }
    }
    
    with open('artifacts/summary.json', 'w') as f:
        json.dump(summary, f, indent=2)
    mlflow.log_artifact('artifacts/summary.json')
    
    print("‚úÖ Run logged successfully!")
    print(f"\n{'='*50}")
    print(f"RUN SUMMARY")
    print(f"{'='*50}")
    print(f"ROC-AUC: {roc_auc:.4f}")
    print(f"Avg Precision: {avg_precision:.4f}")
    print(f"Brier Score: {brier:.4f}")
    print(f"Mean xT (goals): {predictions[labels==1].mean()*100:.2f}%")
    print(f"Mean xT (no goals): {predictions[labels==0].mean()*100:.2f}%")
    print(f"{'='*50}")

In [None]:
import numpy as np

# === FUNKCJA DO INSPEKCJI OBSERWACJI ===
def inspect_validation_sample(val_df, idx, predictions=None, labels=None):
    """
    Szczeg√≥≈Çowa inspekcja pojedynczej obserwacji z validation set.
    """
    print(f"\n{'='*70}")
    print(f"VALIDATION SAMPLE #{idx}")
    print(f"{'='*70}")
    
    # Podstawowe info
    sequence = val_df.iloc[idx]['full_sequence']
    true_label = val_df.iloc[idx]['goal']
    seq_length = len(sequence)
    
    print(f"\nüìä Basic Info:")
    print(f"  Sequence length: {seq_length}")
    print(f"  True outcome: {'GOAL ‚öΩ' if true_label == 1 else 'NO_GOAL ‚ùå'}")
    
    # Pe≈Çna sekwencja
    print(f"\nüìù Full Sequence:")
    for i, token in enumerate(sequence):
        marker = "  "
        if i < 3:  # kontekst u≈ºywany w evaluation
            marker = "‚Üí "  # kontekst
        elif i == len(sequence) - 1:  # outcome
            marker = "üéØ"
        print(f"  {marker} {i}: {token}")
    
    # Kontekst u≈ºywany w Monte Carlo
    if seq_length == 3:
        context_tokens = list(sequence[:2])
        print(f"\nüîç Context used (length=3, only 2 tokens):")
    else:
        context_tokens = list(sequence[:3])
        print(f"\nüîç Context used (first 3 tokens):")
    
    for i, token in enumerate(context_tokens):
        print(f"  ‚Üí {token}")
    
    # Predykcja (je≈õli podana)
    if predictions is not None and labels is not None:
        pred_xT = predictions[idx]
        print(f"\nü§ñ Model Prediction:")
        print(f"  xT (5 steps): {pred_xT*100:.2f}%")
        print(f"  True label: {true_label} ({'GOAL' if true_label == 1 else 'NO_GOAL'})")
        
        # Klasyfikacja
        if true_label == 1:  # faktyczny GOAL
            if pred_xT > 0.10:  # high xT
                result = "‚úÖ TRUE POSITIVE (high xT, actual GOAL)"
            else:
                result = "‚ùå FALSE NEGATIVE (low xT, but actual GOAL)"
        else:  # faktyczny NO_GOAL
            if pred_xT > 0.10:
                result = "‚ùå FALSE POSITIVE (high xT, but NO_GOAL)"
            else:
                result = "‚úÖ TRUE NEGATIVE (low xT, no goal)"
        
        print(f"  Classification: {result}")
        
        # Analiza xT
        if pred_xT > 0.15:
            threat = "üî¥ HIGH THREAT"
        elif pred_xT > 0.08:
            threat = "üü° MEDIUM THREAT"
        else:
            threat = "üü¢ LOW THREAT"
        print(f"  Threat level: {threat}")
    
    # Dekoduj lokalizacje (je≈õli mo≈ºliwe)
    print(f"\nüó∫Ô∏è  Location Analysis:")
    for i, token in enumerate(context_tokens):
        if '_LOC_' in token:
            parts = token.split('_')
            event_type = parts[0]
            x = int(parts[2])
            y = int(parts[3])
            
            # Okre≈õl strefƒô
            if x < 60:
                zone = "Defensive third"
            elif x < 102:
                zone = "Middle third"
            else:
                zone = "‚ö†Ô∏è ATTACKING THIRD (penalty area!)"
            
            print(f"  {i+1}. {event_type} at ({x}, {y}) - {zone}")
    
    print(f"\n{'='*70}\n")


# === INSPEKCJA WYBRANYCH OBSERWACJI ===
indices_to_inspect = [197, 3, 246]

print("="*70)
print("DETAILED VALIDATION SAMPLE INSPECTION")
print("="*70)

for idx in indices_to_inspect:
    # Sprawd≈∫ czy masz predictions/labels (z poprzedniej evaluation)
    if 'predictions' in globals() and 'labels' in globals():
        inspect_validation_sample(val_df, idx, predictions, labels)
    else:
        inspect_validation_sample(val_df, idx)

# === POR√ìWNANIE 3 OBSERWACJI ===
print("\n" + "="*70)
print("COMPARISON SUMMARY")
print("="*70)

comparison_data = []
for idx in indices_to_inspect:
    seq = val_df.iloc[idx]['full_sequence']
    true_label = val_df.iloc[idx]['goal']
    
    if 'predictions' in globals():
        pred = predictions[idx]
    else:
        pred = None
    
    comparison_data.append({
        'idx': idx,
        'length': len(seq),
        'first_token': seq[0],
        'last_token': seq[-1],
        'true_label': true_label,
        'pred_xT': pred
    })

import pandas as pd
comp_df = pd.DataFrame(comparison_data)
print(comp_df.to_string(index=False))

# === WIZUALIZACJA POZYCJI (opcjonalne) ===
print("\n" + "="*70)
print("POSITION VISUALIZATION (ASCII)")
print("="*70)

def plot_positions_ascii(sequences, idx):
    """Prosta wizualizacja pozycji na boisku (ASCII)."""
    print(f"\nSample #{idx}:")
    
    # Boisko 120x80
    field = [[' ' for _ in range(24)] for _ in range(16)]  # scaled down 5x
    
    seq = sequences.iloc[idx]['full_sequence']
    context = seq[:3] if len(seq) > 3 else seq[:2]
    
    positions = []

In [None]:
# Pattern w training set:
target_pattern = ['START_LOC_110_40', 'SHOT_ANG_35']

train_count = 0
train_goals = 0

for seq in train_df['full_sequence']:
    if len(seq) >= 2:
        if list(seq[:2]) == target_pattern:
            train_count += 1
            if len(seq) == 3 and seq[2] == 'GOAL':
                train_goals += 1

print(f"=== PATTERN ANALYSIS IN TRAINING ===")
print(f"Pattern occurrences: {train_count}")
print(f"Pattern ‚Üí GOAL: {train_goals}")
if train_count > 0:
    print(f"Conversion rate: {train_goals / train_count * 100:.1f}%")