In [1]:
import sys
sys.path.append('../')

In [3]:
from Datasets.BaseballDataset import BaseballDataset

In [4]:
import pandas as pd

data_config_path = "../data/config.json"
valid_path = "../data/statcast_2023-2024_cleaned.csv"
valid_data = pd.read_csv(valid_path)


In [5]:
config = "../data/config.json"
sequence_length = 20 


valid_dataset = BaseballDataset(valid_data,config,sequence_length)


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import math
import torch.nn.functional as F


class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:x.size(1)].transpose(0, 1)
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_encoder_layers, hidden_dim, output_dim, sequence_length, dropout=0.1):
        super(TransformerModel, self).__init__()
        
        self.input_dim = input_dim
        self.sequence_length = sequence_length
        
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = PositionalEncoding(hidden_dim, dropout)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        
        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        x = x[:, -1, :]  # Use the output of the last pitch in the sequence
        x = self.fc_layers(x)
        return x

class CustomLoss(nn.Module):
    def __init__(self, weight_param):
        super(CustomLoss, self).__init__()
        self.weight_param = weight_param

    def forward(self, output, target_continuous, target_categorical):
        # Continuous target loss (MSE)
        mse_loss = F.mse_loss(output[:, :target_continuous.size(1)], target_continuous)
        
        # Categorical target loss (Cross-Entropy) for each categorical feature
        cross_entropy_loss = 0
        start_idx = target_continuous.size(1)
        for cat_target in target_categorical:
            end_idx = start_idx + cat_target.size(1)
            cross_entropy_loss += F.cross_entropy(output[:, start_idx:end_idx], cat_target.argmax(dim=1))
            start_idx = end_idx
        
        # # Sum of all categorical losses
        # cross_entropy_loss = torch.sum(categorical_losses)

        # Weighted sum of the losses
        loss = (self.weight_param * mse_loss) + ((1 - self.weight_param) * cross_entropy_loss)
        return loss

# # Hyperparameters
# input_dim = 65 # Number of features in a single pitch
# num_heads = 4
# num_encoder_layers = 4
# hidden_dim = 40 # Increased hidden dimension for better representation
# output_dim = 6  # Number of label dimensions
# sequence_length = 20
# dropout = 0.1
# batch_size = 32

# # Initialize the model, loss function, and optimizer
# model = TransformerModel(input_dim, num_heads, num_encoder_layers, hidden_dim, output_dim, sequence_length, dropout)
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Dummy data for testing
# dummy_input = torch.randn(batch_size, sequence_length, input_dim)
# dummy_target_continuous = torch.randn(batch_size, 3)  # Assuming 3 continuous targets
# dummy_target_categorical = torch.randint(0, 2, (batch_size, 3))  # Assuming 3 categorical targets

# # Forward pass
# output = model(dummy_input)

# # Define the custom loss function
# weight_param = 0.5  # Adjust this weight parameter as needed
# criterion = CustomLoss(weight_param)

# # Compute the loss
# loss = criterion(output, dummy_target_continuous, dummy_target_categorical)
# print(f'Loss: {loss.item()}')


In [14]:
from torch.utils.data import DataLoader

batch_size = 16
train_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [10]:
valid_dataset[0][0].shape

torch.Size([20, 75])

In [11]:
valid_dataset[0][2]

[tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])]

In [15]:

input_dim = 75  # Number of features in a single pitch
num_heads = 1
num_encoder_layers = 1
hidden_dim = 30
output_dim = 24 # Number of label dimensions
sequence_length = 20
dropout = 0.1

# Initialize the model, loss function, and optimizer
model = TransformerModel(input_dim, num_heads, num_encoder_layers, hidden_dim, output_dim, sequence_length, dropout)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = CustomLoss(weight_param=0.5)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for sequences, cont_targets, cat_targets in train_loader:
        sequences, cont_targets = sequences.to(device), cont_targets.to(device)
        cat_targets = [t.to(device) for t in cat_targets]
        
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, cont_targets, cat_targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    return epoch_loss

def evaluate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for sequences, cont_targets, cat_targets in val_loader:
            sequences, cont_targets = sequences.to(device), cont_targets.to(device)
            cat_targets = [t.to(device) for t in cat_targets]
            
            outputs = model(sequences)
            loss = criterion(outputs, cont_targets, cat_targets)
            
            running_loss += loss.item()
    
    epoch_loss = running_loss / len(val_loader)
    return epoch_loss

num_epochs = 2

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss = evaluate(model, val_loader, criterion, device)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')



cuda
Epoch 1/2, Train Loss: 0.6349, Val Loss: 0.5596
Epoch 2/2, Train Loss: 0.5965, Val Loss: 0.5538


In [13]:
a = torch.stack([torch.tensor([1,2,3]),torch.tensor([4,5,6]),torch.tensor([7,8,9])])
a.shape

torch.Size([3, 3])

In [14]:
torch.index_select(a,0,torch.tensor([1,2]))

tensor([[4, 5, 6],
        [7, 8, 9]])