In [10]:
!pip install nfl_data_py
!pip install pandas numpy torch tabulate matplotlib tqdm scikit-learn



In [11]:
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np
import nfl_data_py as nfl
from tabulate import tabulate
import matplotlib.pyplot as plt
from datetime import datetime
import os
from tqdm.notebook import tqdm
import math
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
#running on Tesla T4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU type: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU type: Tesla T4


In [12]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = torch.nn.functional.mse_loss(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()

def create_curriculum_batches(dataset, start_seq_len=50, end_seq_len=None, epochs_per_stage=5):
    """Creates curriculum learning stages"""
    if end_seq_len is None:
        end_seq_len = max(len(seq) for seq in dataset.qb_seqs)

    num_stages = math.ceil((end_seq_len - start_seq_len) / 50)
    stages = []

    for i in range(num_stages):
        curr_len = min(start_seq_len + i * 50, end_seq_len)
        stages.extend([curr_len] * epochs_per_stage)

    return stages

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [13]:
years = [2022, 2023, 2024]
print("Loading play by play data...")
play_by_play = nfl.import_pbp_data(years, downcast=True)

#filter for passing plays, change this?
pass_plays = play_by_play[play_by_play['pass_attempt'] == 1].copy()
pass_plays = pass_plays.sort_values(['game_id', 'play_id'])

#fill missing values for new features
numeric_columns = ['defenders_in_box', 'number_of_pass_rushers', 'temp',
                  'wind', 'shotgun', 'no_huddle']
pass_plays[numeric_columns] = pass_plays[numeric_columns].fillna(0)

print(f"Total plays: {len(pass_plays)}")

Loading play by play data...
2022 done.
2023 done.
2024 done.
Downcasting floats.
Total plays: 54998


In [14]:
def create_sequence_features(play_by_play, qb_name, game_id):
    """Creates a sequence of play-by-play data with enhanced context"""
    qb_plays = play_by_play[
        (play_by_play['passer_player_name'] == qb_name) &
        (play_by_play['pass_attempt'] == 1)
    ]

    if isinstance(game_id, float) and np.isinf(game_id):
        previous_plays = qb_plays
    else:
        previous_plays = qb_plays[qb_plays['game_id'].astype(str) < str(game_id)]

    sequence = []
    for _, play in previous_plays.iterrows():
        play_stats = [
            # Core play stats
            play['yards_gained'] if not np.isnan(play['yards_gained']) else 0,
            play['pass_touchdown'] if not np.isnan(play['pass_touchdown']) else 0,
            play['complete_pass'] if not np.isnan(play['complete_pass']) else 0,
            play['air_yards'] if not np.isnan(play['air_yards']) else 0,
            play['yards_after_catch'] if not np.isnan(play['yards_after_catch']) else 0,
            play['qb_hit'] if not np.isnan(play['qb_hit']) else 0,
            play['sack'] if not np.isnan(play['sack']) else 0,

            # Game situation
            play['score_differential'] if not np.isnan(play['score_differential']) else 0,
            play['qtr'] if not np.isnan(play['qtr']) else 0,
            play['down'] if not np.isnan(play['down']) else 0,
            play['ydstogo'] if not np.isnan(play['ydstogo']) else 0,
            play['yardline_100'] if not np.isnan(play['yardline_100']) else 0,

            # Defensive pressure
            play['defenders_in_box'] if not np.isnan(play['defenders_in_box']) else 0,
            play['number_of_pass_rushers'] if not np.isnan(play['number_of_pass_rushers']) else 0,

            # Weather conditions
            play['temp'] if not np.isnan(play['temp']) else 70,
            play['wind'] if not np.isnan(play['wind']) else 0,

            # Binary indicators
            play['shotgun'] if not np.isnan(play['shotgun']) else 0,
            play['no_huddle'] if not np.isnan(play['no_huddle']) else 0,
        ]
        sequence.append(play_stats)

    sequence = np.array(sequence)

    # Apply linear weighting to emphasize recent plays
    if len(sequence) > 0:
        weights = np.linspace(1.0, 1.5, len(sequence))
        sequence = sequence * weights[:, np.newaxis]

    return sequence

def create_defense_sequence(play_by_play, def_team, game_id):
    """Creates a sequence of play-by-play data for a defense with enhanced context"""
    def_plays = play_by_play[
        (play_by_play['defteam'] == def_team) &
        (play_by_play['pass_attempt'] == 1)
    ]

    if isinstance(game_id, float) and np.isinf(game_id):
        previous_plays = def_plays
    else:
        previous_plays = def_plays[def_plays['game_id'].astype(str) < str(game_id)]

    sequence = []
    for _, play in previous_plays.iterrows():
        play_stats = [
            # Same features as QB sequence
            play['yards_gained'] if not np.isnan(play['yards_gained']) else 0,
            play['pass_touchdown'] if not np.isnan(play['pass_touchdown']) else 0,
            play['complete_pass'] if not np.isnan(play['complete_pass']) else 0,
            play['air_yards'] if not np.isnan(play['air_yards']) else 0,
            play['yards_after_catch'] if not np.isnan(play['yards_after_catch']) else 0,
            play['qb_hit'] if not np.isnan(play['qb_hit']) else 0,
            play['sack'] if not np.isnan(play['sack']) else 0,

            play['score_differential'] if not np.isnan(play['score_differential']) else 0,
            play['qtr'] if not np.isnan(play['qtr']) else 0,
            play['down'] if not np.isnan(play['down']) else 0,
            play['ydstogo'] if not np.isnan(play['ydstogo']) else 0,
            play['yardline_100'] if not np.isnan(play['yardline_100']) else 0,

            play['defenders_in_box'] if not np.isnan(play['defenders_in_box']) else 0,
            play['number_of_pass_rushers'] if not np.isnan(play['number_of_pass_rushers']) else 0,

            play['temp'] if not np.isnan(play['temp']) else 70,
            play['wind'] if not np.isnan(play['wind']) else 0,

            play['shotgun'] if not np.isnan(play['shotgun']) else 0,
            play['no_huddle'] if not np.isnan(play['no_huddle']) else 0,
        ]
        sequence.append(play_stats)

    sequence = np.array(sequence)

    if len(sequence) > 0:
        weights = np.linspace(1.0, 1.5, len(sequence))
        sequence = sequence * weights[:, np.newaxis]

    return sequence

In [15]:
class NFLDataset(Dataset):
    def __init__(self, qb_sequences, def_sequences, y, qb_names, def_teams, indices, max_seq_len=2000):
        self.qb_seqs = [torch.FloatTensor(qb_sequences[i]) for i in indices]
        self.def_seqs = [torch.FloatTensor(def_sequences[i]) for i in indices]
        self.y = torch.FloatTensor(y[indices])
        self.qb_idx = torch.LongTensor([qb_to_idx[qb] for qb in qb_names[indices]])
        self.team_idx = torch.LongTensor([team_to_idx[team] for team in def_teams[indices]])
        self.max_seq_len = max_seq_len

    def __len__(self):
        return len(self.y)  # Return the number of samples

    def __getitem__(self, idx):
        qb_seq = self.qb_seqs[idx]
        def_seq = self.def_seqs[idx]

        # Truncate sequences if they're too long
        if len(qb_seq) > self.max_seq_len:
            qb_seq = qb_seq[-self.max_seq_len:]
        if len(def_seq) > self.max_seq_len:
            def_seq = def_seq[-self.max_seq_len:]

        return (
            qb_seq,
            def_seq,
            self.qb_idx[idx],
            self.team_idx[idx],
            self.y[idx]
        )

def pad_sequences(sequences, max_len=None):
    """Pad sequences to the same length"""
    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    padded_seqs = []
    for seq in sequences:
        if len(seq) == 0:
            padded_seq = np.zeros((max_len, seq.shape[1] if len(seq.shape) > 1 else 1))
        else:
            pad_length = max_len - len(seq)
            if pad_length > 0:
                padding = np.zeros((pad_length, seq.shape[1]))
                padded_seq = np.vstack([seq, padding])
            else:
                padded_seq = seq[:max_len]
        padded_seqs.append(padded_seq)

    return np.array(padded_seqs)

def collate_fn(batch):
    """Custom collate function to handle variable-length sequences"""
    qb_seqs, def_seqs, qb_idx, team_idx, y = zip(*batch)

    # Find the maximum length in this batch for both QB and DEF sequences
    max_len = max(
        max(len(seq) for seq in qb_seqs),
        max(len(seq) for seq in def_seqs)
    )

    # Pad sequences to the same length
    qb_seqs_padded = pad_sequences([seq.numpy() for seq in qb_seqs], max_len=max_len)
    def_seqs_padded = pad_sequences([seq.numpy() for seq in def_seqs], max_len=max_len)

    return (
        torch.FloatTensor(qb_seqs_padded),
        torch.FloatTensor(def_seqs_padded),
        torch.stack(qb_idx),
        torch.stack(team_idx),
        torch.stack(y)
    )

In [16]:
class QBPerformancePredictor(nn.Module):
    def __init__(self, num_qbs, num_teams, max_seq_len=2000):
        super().__init__()

        self.qb_feature_dim = 18
        self.def_feature_dim = 18
        self.hidden_dim = 128
        self.max_seq_len = max_seq_len

        # Feature embedding
        self.feature_embedding = nn.Linear(self.qb_feature_dim, 64)

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=64,
            nhead=4,
            dim_feedforward=256,
            dropout=0.1,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=3)

        # Identity embeddings with positional encoding
        self.qb_embedding = nn.Embedding(num_qbs, 32)
        self.team_embedding = nn.Embedding(num_teams, 32)
        self.positional_encoding = nn.Parameter(torch.randn(1, max_seq_len, 64))

        # Attention pooling
        self.attention_weights = nn.Linear(64, 1)

        # Main output layers
        self.fc1 = nn.Linear(192, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 5)

        # Auxiliary output layers
        self.aux_fc1 = nn.Linear(192, 64)
        self.aux_fc2 = nn.Linear(64, 3)  # Predicting completion %, TD rate, INT rate

        self.layer_norm1 = nn.LayerNorm(128)
        self.layer_norm2 = nn.LayerNorm(64)

        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()

    def attention_pool(self, x):
        # Compute attention weights
        weights = torch.softmax(self.attention_weights(x), dim=1)
        # Apply weights to sequence
        return torch.sum(weights * x, dim=1)

    def forward(self, qb_seq, def_seq, qb_idx, team_idx):
        # Truncate sequences if they're too long
        if qb_seq.size(1) > self.max_seq_len:
            qb_seq = qb_seq[:, -self.max_seq_len:, :]
        if def_seq.size(1) > self.max_seq_len:
            def_seq = def_seq[:, -self.max_seq_len:, :]

        # Project features
        qb_embedded = self.feature_embedding(qb_seq)
        def_embedded = self.feature_embedding(def_seq)

        # Add positional encoding
        seq_len = qb_embedded.size(1)
        qb_embedded = qb_embedded + self.positional_encoding[:, :seq_len, :]
        def_embedded = def_embedded + self.positional_encoding[:, :seq_len, :]

        # Transform sequences
        qb_encoded = self.transformer_encoder(qb_embedded)
        def_encoded = self.transformer_encoder(def_embedded)

        # Pool sequences with attention
        qb_pooled = self.attention_pool(qb_encoded)
        def_pooled = self.attention_pool(def_encoded)

        # Get identity embeddings
        qb_emb = self.qb_embedding(qb_idx)
        team_emb = self.team_embedding(team_idx)

        # Combine features
        combined = torch.cat([qb_pooled, def_pooled, qb_emb, team_emb], dim=1)

        # Main prediction path
        x1 = self.fc1(combined)
        x1 = self.layer_norm1(x1)
        x1 = self.relu(x1)
        x1 = self.dropout(x1)

        x2 = self.fc2(x1)
        x2 = self.layer_norm2(x2)
        x2 = self.relu(x2)
        x2 = self.dropout(x2)

        main_out = self.fc3(x2)

        # Auxiliary prediction path
        aux_x = self.aux_fc1(combined)
        aux_x = self.relu(aux_x)
        aux_out = self.aux_fc2(aux_x)

        return main_out, aux_out

In [17]:
def train_epoch(model, train_loader, optimizer, criterion, aux_criterion, device):
    model.train()
    total_loss = 0
    total_main_loss = 0
    total_aux_loss = 0

    with tqdm(train_loader, desc='Training') as pbar:
        for batch in pbar:
            qb_seq, def_seq, qb_idx, team_idx, y = [b.to(device) for b in batch]

            optimizer.zero_grad()

            # Get main and auxiliary predictions
            main_pred, aux_pred = model(qb_seq, def_seq, qb_idx, team_idx)

            # Calculate losses
            main_loss = criterion(main_pred, y)

            # Create auxiliary targets (completion %, TD rate, INT rate)
            aux_targets = torch.stack([
                y[:, 3],  # completion percentage
                y[:, 1] / torch.clamp(y[:, 0], min=1),  # TD rate
                y[:, 2] / torch.clamp(y[:, 0], min=1)   # INT rate
            ], dim=1).to(device)

            aux_loss = aux_criterion(aux_pred, aux_targets)

            # Combined loss
            loss = main_loss + 0.3 * aux_loss

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()

            total_loss += loss.item()
            total_main_loss += main_loss.item()
            total_aux_loss += aux_loss.item()

            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'main_loss': f'{main_loss.item():.4f}',
                'aux_loss': f'{aux_loss.item():.4f}'
            })

    return total_loss / len(train_loader), total_main_loss / len(train_loader), total_aux_loss / len(train_loader)

def validate(model, val_loader, criterion, aux_criterion, device):
    model.eval()
    total_loss = 0
    total_main_loss = 0
    total_aux_loss = 0

    with torch.no_grad():
        for batch in val_loader:
            qb_seq, def_seq, qb_idx, team_idx, y = [b.to(device) for b in batch]

            main_pred, aux_pred = model(qb_seq, def_seq, qb_idx, team_idx)

            main_loss = criterion(main_pred, y)

            aux_targets = torch.stack([
                y[:, 3],
                y[:, 1] / torch.clamp(y[:, 0], min=1),
                y[:, 2] / torch.clamp(y[:, 0], min=1)
            ], dim=1).to(device)

            aux_loss = aux_criterion(aux_pred, aux_targets)
            loss = main_loss + 0.3 * aux_loss

            total_loss += loss.item()
            total_main_loss += main_loss.item()
            total_aux_loss += aux_loss.item()

    return total_loss / len(val_loader), total_main_loss / len(val_loader), total_aux_loss / len(val_loader)

In [18]:
# Create sequences and prepare data
print("\nCreating sequences...")
qb_sequences = []
def_sequences = []
y_data = []

# Initialize lists to store metrics for plotting
train_losses = []
train_main_losses = []
train_aux_losses = []
val_losses = []
val_main_losses = []
val_aux_losses = []
learning_rates = []

# Group plays by game for target creation
game_stats = pass_plays.groupby(['game_id', 'passer_player_name', 'defteam']).agg({
    'yards_gained': 'sum',
    'pass_touchdown': 'sum',
    'interception': 'sum',
    'complete_pass': 'sum',
    'pass_attempt': 'sum',
    'sack': 'sum'
}).reset_index()

game_stats['completion_percentage'] = (game_stats['complete_pass'] / game_stats['pass_attempt'] * 100).round(1)

for _, game in game_stats.iterrows():
    # Create sequences
    qb_seq = create_sequence_features(pass_plays, game['passer_player_name'], game['game_id'])
    def_seq = create_defense_sequence(pass_plays, game['defteam'], game['game_id'])

    # Skip if no historical data
    if len(qb_seq) == 0 or len(def_seq) == 0:
        continue

    # Create target variables
    target = [
        game['yards_gained'],
        game['pass_touchdown'],
        game['interception'],
        game['completion_percentage'],
        game['sack']
    ]

    qb_sequences.append(qb_seq)
    def_sequences.append(def_seq)
    y_data.append(target)

# Create QB and team indices
print("\nCreating indices...")
qb_to_idx = {qb: idx for idx, qb in enumerate(game_stats['passer_player_name'].unique())}
team_to_idx = {team: idx for idx, team in enumerate(game_stats['defteam'].unique())}

# Scale target variables
scaler = StandardScaler()
y = np.array(y_data)
y_scaled = scaler.fit_transform(y)

# Split into train and test sets
print("\nSplitting data...")
train_size = int(0.8 * len(y_scaled))
indices = np.arange(len(y_scaled))
np.random.shuffle(indices)
train_idx = indices[:train_size]
test_idx = indices[train_size:]

# experiment with this
max_seq_len = 2000 

# Create data loaders with max_seq_len
train_dataset = NFLDataset(
    qb_sequences, def_sequences, y_scaled,
    game_stats['passer_player_name'].values,
    game_stats['defteam'].values, train_idx,
    max_seq_len=max_seq_len  # Pass max_seq_len to the dataset
)

test_dataset = NFLDataset(
    qb_sequences, def_sequences, y_scaled,
    game_stats['passer_player_name'].values,
    game_stats['defteam'].values, test_idx,
    max_seq_len=max_seq_len  # Pass max_seq_len to the dataset
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, collate_fn=collate_fn)

# Initialize model with max_seq_len
model = QBPerformancePredictor(
    num_qbs=len(qb_to_idx),
    num_teams=len(team_to_idx),
    max_seq_len=max_seq_len  # Pass max_seq_len to the model
).to(device)

# Initialize training components
criterion = FocalLoss()
aux_criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

# Training loop
num_epochs = 50
early_stopping = EarlyStopping(patience=10, min_delta=0.001)
best_val_loss = float('inf')

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss, train_main_loss, train_aux_loss = train_epoch(
        model, train_loader, optimizer, criterion, aux_criterion, device
    )

    val_loss, val_main_loss, val_aux_loss = validate(
        model, test_loader, criterion, aux_criterion, device
    )

    # Store metrics for plotting
    train_losses.append(train_loss)
    train_main_losses.append(train_main_loss)
    train_aux_losses.append(train_aux_loss)
    val_losses.append(val_loss)
    val_main_losses.append(val_main_loss)
    val_aux_losses.append(val_aux_loss)
    learning_rates.append(optimizer.param_groups[0]['lr'])

    print(f"Training Loss: {train_loss:.4f} (Main: {train_main_loss:.4f}, Aux: {train_aux_loss:.4f})")
    print(f"Validation Loss: {val_loss:.4f} (Main: {val_main_loss:.4f}, Aux: {val_aux_loss:.4f})\n")

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
            'scaler': scaler,
            'qb_to_idx': qb_to_idx,
            'team_to_idx': team_to_idx,
        }, 'best_model.pth')

    scheduler.step()

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break


Creating sequences...

Creating indices...

Splitting data...
Epoch 1/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.9941 (Main: 0.7194, Aux: 0.9157)
Validation Loss: 0.9274 (Main: 0.6596, Aux: 0.8924)

Epoch 2/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.9255 (Main: 0.6582, Aux: 0.8909)
Validation Loss: 0.9275 (Main: 0.6529, Aux: 0.9155)

Epoch 3/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8989 (Main: 0.6397, Aux: 0.8643)
Validation Loss: 0.9263 (Main: 0.6572, Aux: 0.8971)

Epoch 4/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8830 (Main: 0.6260, Aux: 0.8569)
Validation Loss: 0.9425 (Main: 0.6700, Aux: 0.9080)

Epoch 5/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8543 (Main: 0.6056, Aux: 0.8292)
Validation Loss: 0.9078 (Main: 0.6391, Aux: 0.8958)

Epoch 6/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8525 (Main: 0.6055, Aux: 0.8231)
Validation Loss: 0.9220 (Main: 0.6495, Aux: 0.9083)

Epoch 7/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8110 (Main: 0.5734, Aux: 0.7920)
Validation Loss: 0.9748 (Main: 0.6955, Aux: 0.9308)

Epoch 8/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7882 (Main: 0.5546, Aux: 0.7786)
Validation Loss: 0.9292 (Main: 0.6562, Aux: 0.9098)

Epoch 9/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7670 (Main: 0.5374, Aux: 0.7654)
Validation Loss: 0.9490 (Main: 0.6737, Aux: 0.9178)

Epoch 10/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7586 (Main: 0.5314, Aux: 0.7572)
Validation Loss: 0.9453 (Main: 0.6707, Aux: 0.9153)

Epoch 11/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.8102 (Main: 0.5725, Aux: 0.7923)
Validation Loss: 0.9586 (Main: 0.6773, Aux: 0.9378)

Epoch 12/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7926 (Main: 0.5602, Aux: 0.7747)
Validation Loss: 0.9461 (Main: 0.6686, Aux: 0.9248)

Epoch 13/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7791 (Main: 0.5525, Aux: 0.7551)
Validation Loss: 0.9722 (Main: 0.6860, Aux: 0.9540)

Epoch 14/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7592 (Main: 0.5381, Aux: 0.7370)
Validation Loss: 0.9586 (Main: 0.6743, Aux: 0.9474)

Epoch 15/50


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Training Loss: 0.7212 (Main: 0.5070, Aux: 0.7139)
Validation Loss: 0.9628 (Main: 0.6765, Aux: 0.9544)

Early stopping triggered!


In [19]:
def plot_training_history(train_losses, train_main_losses, train_aux_losses,
                         val_losses, val_main_losses, val_aux_losses,
                         learning_rates, save_path):
    """
    Plot detailed training history with multiple metrics and learning rate.
    """
    epochs = range(1, len(train_losses) + 1)

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), height_ratios=[2, 1])
    fig.suptitle('QB Performance Predictor Training History', fontsize=16, y=0.95)

    # Plot losses on the first subplot
    ax1.plot(epochs, train_losses, 'b-', label='Train Total Loss', linewidth=2)
    ax1.plot(epochs, train_main_losses, 'b--', label='Train Main Loss', alpha=0.7)
    ax1.plot(epochs, train_aux_losses, 'b:', label='Train Aux Loss', alpha=0.7)
    ax1.plot(epochs, val_losses, 'r-', label='Val Total Loss', linewidth=2)
    ax1.plot(epochs, val_main_losses, 'r--', label='Val Main Loss', alpha=0.7)
    ax1.plot(epochs, val_aux_losses, 'r:', label='Val Aux Loss', alpha=0.7)

    # Add grid and legend to loss plot
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend(loc='upper right')

    # Plot learning rate on the second subplot
    ax2.plot(epochs, learning_rates, 'g-', label='Learning Rate', linewidth=2)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Learning Rate')
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax2.legend(loc='upper right')

    # Add min/max annotations
    min_val_loss = min(val_losses)
    min_val_epoch = val_losses.index(min_val_loss) + 1
    ax1.annotate(f'Min Val Loss: {min_val_loss:.4f}',
                xy=(min_val_epoch, min_val_loss),
                xytext=(10, 10),
                textcoords='offset points',
                ha='right',
                va='bottom',
                bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))

    # Add early stopping marker if applicable
    if len(val_losses) < num_epochs:
        ax1.axvline(x=len(val_losses), color='r', linestyle='--', alpha=0.5)
        ax1.text(len(val_losses), ax1.get_ylim()[1], 'Early Stop',
                rotation=90, va='top')

    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

# Save training history data
history = {
    'train_losses': train_losses,
    'train_main_losses': train_main_losses,
    'train_aux_losses': train_aux_losses,
    'val_losses': val_losses,
    'val_main_losses': val_main_losses,
    'val_aux_losses': val_aux_losses,
    'learning_rates': learning_rates
}

# Save training history to file
with open('training_history.json', 'w') as f:
    json.dump(history, f)

# Create and save the plot
plot_training_history(
    train_losses, train_main_losses, train_aux_losses,
    val_losses, val_main_losses, val_aux_losses,
    learning_rates,
    save_path='training_history.png'
)

print("Training history has been saved to 'training_history.json'")
print("Training plot has been saved to 'training_history.png'")

Training history has been saved to 'training_history.json'
Training plot has been saved to 'training_history.png'


In [20]:
def predict_qb_performance(qb_name, def_team, max_seq_len=2000):
    """Make predictions for a QB against a specific defense"""
    if qb_name not in qb_to_idx:
        raise ValueError(f"Quarterback {qb_name} not found in data.")
    if def_team not in team_to_idx:
        raise ValueError(f"Defense team {def_team} not found in data.")

    qb_seq = create_sequence_features(pass_plays, qb_name, float('inf'))
    def_seq = create_defense_sequence(pass_plays, def_team, float('inf'))

    if len(qb_seq) == 0:
        raise ValueError(f"No historical data found for QB: {qb_name}")
    if len(def_seq) == 0:
        raise ValueError(f"No historical data found for defense: {def_team}")

    # Truncate and pad sequences to exactly max_seq_len
    qb_seq_padded = pad_sequences([qb_seq], max_len=max_seq_len)[0]  # Get first sequence
    def_seq_padded = pad_sequences([def_seq], max_len=max_seq_len)[0]  # Get first sequence

    qb_seq_tensor = torch.FloatTensor(qb_seq_padded).unsqueeze(0).to(device)
    def_seq_tensor = torch.FloatTensor(def_seq_padded).unsqueeze(0).to(device)
    qb_idx = torch.LongTensor([qb_to_idx[qb_name]]).to(device)
    team_idx = torch.LongTensor([team_to_idx[def_team]]).to(device)

    model.eval()
    with torch.no_grad():
        main_pred, _ = model(qb_seq_tensor, def_seq_tensor, qb_idx, team_idx)

    prediction = scaler.inverse_transform(main_pred.cpu().numpy())

    return {
        'yards_gained': round(float(prediction[0, 0]), 1),
        'pass_touchdown': round(float(prediction[0, 1]), 1),
        'interception': round(float(prediction[0, 2]), 1),
        'completion_percentage': round(float(prediction[0, 3]), 1),
        'sack': round(float(prediction[0, 4]), 1)
    }


try:
    prediction = predict_qb_performance("P.Mahomes", "BUF")
    print("\nPredicted QB Performance:")
    for stat, value in prediction.items():
        print(f"{stat}: {value}")
except ValueError as e:
    print(e)
try:
    prediction = predict_qb_performance("P.Mahomes", "CIN")
    print("\nPredicted QB Performance:")
    for stat, value in prediction.items():
        print(f"{stat}: {value}")
except ValueError as e:
    print(e)
try:
    prediction = predict_qb_performance("J.Allen", "BAL")
    print("\nPredicted QB Performance:")
    for stat, value in prediction.items():
        print(f"{stat}: {value}")
except ValueError as e:
    print(e)
try:
    prediction = predict_qb_performance("W.Levis", "BUF")
    print("\nPredicted QB Performance:")
    for stat, value in prediction.items():
        print(f"{stat}: {value}")
except ValueError as e:
    print(e)
try:
    prediction = predict_qb_performance("W.Levis", "KC")
    print("\nPredicted QB Performance:")
    for stat, value in prediction.items():
        print(f"{stat}: {value}")
except ValueError as e:
    print(e)


Predicted QB Performance:
yards_gained: 184.0
pass_touchdown: 1.4
interception: 0.6
completion_percentage: 68.9
sack: 2.1

Predicted QB Performance:
yards_gained: 204.8
pass_touchdown: 1.5
interception: 0.4
completion_percentage: 63.6
sack: 2.5

Predicted QB Performance:
yards_gained: 255.3
pass_touchdown: 2.2
interception: 0.4
completion_percentage: 65.7
sack: 2.0

Predicted QB Performance:
yards_gained: 174.9
pass_touchdown: 1.0
interception: 0.5
completion_percentage: 57.3
sack: 2.6

Predicted QB Performance:
yards_gained: 230.7
pass_touchdown: 1.7
interception: 0.6
completion_percentage: 63.2
sack: 2.0
