In [1]:
import sys
sys.path.append('../')

from Datasets.BaseballDataset import BaseballDataset

import torch
import torch.nn as nn
import torch.optim as optim
import math
import torch.nn.functional as F
from torch.utils.data import DataLoader
import json
import pandas as pd
import os
import matplotlib.pyplot as plt

In [2]:
data_config_path = "../data/config.json"
valid_path = "../data/statcast_2023-2024_cleaned.csv"
sequence_length = 200
valid_data = pd.read_csv(valid_path)

In [3]:
valid_data.columns

Index(['launch_speed', 'game_date', 'release_speed', 'release_pos_x',
       'release_pos_z', 'batter', 'pitcher', 'pfx_x', 'pfx_z', 'plate_x',
       'plate_z', 'hc_x', 'hc_y', 'vy0', 'vz0', 'ax', 'ay', 'az', 'sz_top',
       'sz_bot', 'launch_angle', 'release_spin_rate', 'release_extension',
       'game_pk', 'release_pos_y', 'at_bat_number', 'batter_name',
       'pitcher_name', 'events_B', 'events_S', 'events_double',
       'events_field_out', 'events_hit_by_pitch', 'events_home_run',
       'events_single', 'events_strikeout', 'events_triple', 'events_walk',
       'pitch_type_CH', 'pitch_type_CS', 'pitch_type_CU', 'pitch_type_EP',
       'pitch_type_FA', 'pitch_type_FC', 'pitch_type_FF', 'pitch_type_FO',
       'pitch_type_FS', 'pitch_type_KC', 'pitch_type_KN', 'pitch_type_PO',
       'pitch_type_SC', 'pitch_type_SI', 'pitch_type_SL', 'pitch_type_ST',
       'pitch_type_SV', 'stand_L', 'stand_R', 'p_throws_L', 'p_throws_R',
       'hit_location_0.0', 'hit_location_1.0', 'hit_loc

In [5]:

valid_dataset = BaseballDataset(valid_data,data_config_path,sequence_length)

In [6]:
valid_dataset.label_columns

{'events', 'hc_x', 'hc_y', 'hit_location', 'launch_angle', 'launch_speed'}

In [7]:
seq, cont_target, cat_targets = valid_dataset[2]

In [18]:
seq[-1]

tensor([-3.7555e-17,  7.2403e-01, -1.6664e-01,  3.9323e-01, -8.3054e-01,
        -5.8089e-02,  1.1461e+00,  3.0425e-01, -3.3902e-16,  1.9012e-16,
        -7.1635e-01, -1.5105e-01, -9.2301e-01,  9.6360e-01, -5.2170e-02,
         1.9702e+00,  2.7812e+00, -2.3654e-17, -1.5236e+00, -9.9139e-02,
         5.6065e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00,
         0.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  1.0000e+00,  0.0

In [9]:
cont_target

tensor([-0.6591, -0.4316, -0.4295, -0.6863])

In [11]:
cat_targets

[tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]

In [12]:
valid_dataset.continuous_label_indices

tensor([17,  8,  9,  0])

In [16]:
valid_dataset.categorical_label_indices

[tensor([21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
 tensor([52, 53, 54, 55, 56, 57, 58, 59, 60, 61])]

In [17]:
valid_dataset.mask_indices

tensor([73, 74])

In [13]:
valid_dataset.mask

tensor([-3.7555e-17,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00, -3.3902e-16,  1.9012e-16,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00, -2.3654e-17,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0

In [19]:
val_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, num_workers=0)

In [20]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:x.size(1)].transpose(0, 1)
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_encoder_layers, hidden_dim, output_dim, sequence_length, dropout=0.1):
        super(TransformerModel, self).__init__()
        
        self.input_dim = input_dim
        self.sequence_length = sequence_length
        
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = PositionalEncoding(hidden_dim, dropout)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        
        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        x = x[:, -1, :]  # Use the output of the last pitch in the sequence
        x = self.fc_layers(x)
        return x


def load_model(model_path, model_class, config_path):
    with open(config_path, 'r') as file:
        config = json.load(file)

    state_dict = torch.load(model_path)
    state_dict_vals = list(state_dict.values())
    

    model = TransformerModel(
        input_dim=state_dict_vals[0].shape[1],
        num_heads=config['num_heads'],
        num_encoder_layers=config['num_encoder_layers'],
        hidden_dim=config['hidden_dim'],
        output_dim=state_dict_vals[-1].shape[0],
        sequence_length=config['sequence_length'],
        dropout=config.get('dropout', 0.1)  # Optional: provide a default value for dropout if not in config
    )
    model = nn.DataParallel(model)
    model.load_state_dict(state_dict)
    model.eval()  # Set the model to evaluation mode
    return model

In [27]:
model_path = "small_200_output/transformer_model.pth"
model = load_model(model_path,TransformerModel, "train_config.json")

In [28]:
for ex in val_loader:
    sequence, cont_target, cat_targets = ex
    print(sequence.shape,cont_target.shape)
    break

torch.Size([2, 200, 67]) torch.Size([2, 4])


In [31]:
output = model(sequence).cpu()
output

tensor([[  1.2394,   2.4635,   1.6865,   1.7297,  -1.6761,  -3.3174,  -1.7219,
          -0.9224,   0.8666,   0.2257,   0.3020,  -0.7569,  -1.5855, -21.6485,
          -4.7600,  -0.4523,   1.5333, -11.0467,  -1.3817,   0.1913, -10.9161,
          -4.9816, -11.2746, -21.9650],
        [ -0.5015,  -0.4433,  -0.5427,  -0.4247,  -8.0998,  -5.5313,  -8.0937,
          -7.7401,  -7.5339,  -7.6522,  -6.6194,  -6.7669,  -6.2269, -22.8538,
           8.0671,  -7.6580,  -5.2773,  -9.2394,  -8.5116,  -6.4327,  -6.7901,
         -11.6350,  -7.1483, -23.0533]], grad_fn=<ToCopyBackward0>)

In [32]:
print(output[:, :cont_target.size(1)])
mse_loss = F.mse_loss(output[:, :cont_target.size(1)], cont_target)

# Categorical target loss (Cross-Entropy) for each categorical feature
categorical_losses = []
start_idx = cont_target.size(1)
for cat_target in cat_targets:
    end_idx = start_idx + cat_target.size(1)
    print(output[:, start_idx:end_idx])
    categorical_losses.append(F.cross_entropy(output[:, start_idx:end_idx], cat_target))
    start_idx = end_idx

tensor([[ 1.2394,  2.4635,  1.6865,  1.7297],
        [-0.5015, -0.4433, -0.5427, -0.4247]], grad_fn=<SliceBackward0>)
tensor([[ -1.6761,  -3.3174,  -1.7219,  -0.9224,   0.8666,   0.2257,   0.3020,
          -0.7569,  -1.5855, -21.6485],
        [ -8.0998,  -5.5313,  -8.0937,  -7.7401,  -7.5339,  -7.6522,  -6.6194,
          -6.7669,  -6.2269, -22.8538]], grad_fn=<SliceBackward0>)
tensor([[ -4.7600,  -0.4523,   1.5333, -11.0467,  -1.3817,   0.1913, -10.9161,
          -4.9816, -11.2746, -21.9650],
        [  8.0671,  -7.6580,  -5.2773,  -9.2394,  -8.5116,  -6.4327,  -6.7901,
         -11.6350,  -7.1483, -23.0533]], grad_fn=<SliceBackward0>)


In [31]:
cont_output = output[:, :cont_target.size(1)].cpu().squeeze(0).detach().numpy()
cont_output

array([[ 1.239352  ,  2.4634557 ,  1.6864654 ,  1.7297451 ],
       [-0.50145173, -0.44333595, -0.54268885, -0.42470556]],
      dtype=float32)

In [12]:
valid_dataset.continuous_label_names

['hc_y', 'launch_speed', 'hc_x', 'launch_angle']

In [32]:

cat_output = nn.functional.softmax(output[:, cont_target.size(1):]).cpu().squeeze(0).detach().numpy()
cat_output

  cat_output = nn.functional.softmax(output[:, cont_target.size(1):]).cpu().squeeze(0).detach().numpy()


array([[1.4168231e-02, 2.7448731e-03, 1.3534164e-02, 3.0105993e-02,
        1.8014494e-01, 9.4896130e-02, 1.0242866e-01, 3.5523973e-02,
        1.5512227e-02, 3.0019546e-11, 6.4862391e-04, 4.8172981e-02,
        3.5088620e-01, 1.2070911e-06, 1.9019229e-02, 9.1690525e-02,
        1.3754304e-06, 5.1969773e-04, 9.6108840e-07, 2.1876142e-11],
       [9.5242619e-08, 1.2425043e-06, 9.5818777e-08, 1.3646492e-07,
        1.6772191e-07, 1.4901202e-07, 4.1854912e-07, 3.6114309e-07,
        6.1972986e-07, 3.7257974e-14, 9.9999368e-01, 1.4814243e-07,
        1.6017583e-06, 3.0471927e-08, 6.3090845e-08, 5.0445033e-07,
        3.5286942e-07, 2.7764366e-09, 2.4663615e-07, 3.0521565e-14]],
      dtype=float32)

In [15]:
valid_dataset.categorical_label_names

['events_S',
 'events_double',
 'events_field_out',
 'events_hit_by_pitch',
 'events_home_run',
 'events_single',
 'events_strikeout',
 'events_triple',
 'events_walk',
 'events_mask',
 'hit_location_1.0',
 'hit_location_2.0',
 'hit_location_3.0',
 'hit_location_4.0',
 'hit_location_5.0',
 'hit_location_6.0',
 'hit_location_7.0',
 'hit_location_8.0',
 'hit_location_9.0',
 'hit_location_mask']

In [33]:
print(cont_output.shape, cat_output.shape)

(2, 4) (2, 20)


In [37]:
import numpy as np

predictions = np.concatenate((cont_output, cat_output),axis=1)
predictions.shape

(2, 24)

In [38]:
pd.DataFrame(predictions, columns=valid_dataset.continuous_label_names + valid_dataset.categorical_label_names)

Unnamed: 0,hc_y,launch_speed,hc_x,launch_angle,events_S,events_double,events_field_out,events_hit_by_pitch,events_home_run,events_single,...,hit_location_1.0,hit_location_2.0,hit_location_3.0,hit_location_4.0,hit_location_5.0,hit_location_6.0,hit_location_7.0,hit_location_8.0,hit_location_9.0,hit_location_mask
0,1.239352,2.463456,1.686465,1.729745,0.01416823,0.002745,0.01353416,0.03010599,0.1801449,0.09489613,...,0.000649,0.04817298,0.350886,1.207091e-06,0.01901923,0.09169053,1.37543e-06,0.0005196977,9.610884e-07,2.187614e-11
1,-0.501452,-0.443336,-0.542689,-0.424706,9.524262e-08,1e-06,9.581878e-08,1.364649e-07,1.677219e-07,1.49012e-07,...,0.999994,1.481424e-07,2e-06,3.047193e-08,6.309084e-08,5.044503e-07,3.528694e-07,2.776437e-09,2.466361e-07,3.052157e-14
