In [11]:
import gc
import os
import numpy as np
import pandas as pd

from tqdm import tqdm
from bayes_opt import BayesianOptimization

gc.collect()

0

In [12]:

DATA_PATH = '../data/testing/ncaam_sample_data.csv'
def load_data(data_path):
    m_data = pd.read_csv(data_path)
    m_data['date'] = pd.to_datetime(m_data['date'])
    m_data = m_data.sort_values(by=['date', 'team_name']).reset_index(drop=True)

    m_data['team_fgm2'] = m_data['team_fgm'] - m_data['team_fgm3']
    m_data['team_fga2'] = m_data['team_fga'] - m_data['team_fga3']
    m_data['opp_fgm2'] = m_data['opp_fgm'] - m_data['opp_fgm3']
    m_data['opp_fga2'] = m_data['opp_fga'] - m_data['opp_fga3']
    return m_data


sample = load_data(DATA_PATH)

In [13]:
sample['fgm2_diff'] = sample['team_fgm2'] - sample['opp_fgm2']
sample['fgm3_diff'] = sample['team_fgm3'] - sample['opp_fgm3']
sample['ftm_diff'] = sample['team_ftm'] - sample['opp_ftm']
sample['or_diff'] = sample['team_or'] - sample['opp_or']
sample['dr_diff'] = sample['team_dr'] - sample['opp_dr']
sample['ast_diff'] = sample['team_ast'] - sample['opp_ast']
sample['to_diff'] = sample['team_to'] - sample['opp_to']
sample['stl_diff'] = sample['team_stl'] - sample['opp_stl']
sample['blk_diff'] = sample['team_blk'] - sample['opp_blk']
sample['pf_diff'] = sample['team_pf'] - sample['opp_pf']

In [14]:
import torch
import torch.nn as nn

class FlexibleKAN(nn.Module):
    def __init__(self, input_dim, output_dim, num_functions):
        super(FlexibleKAN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_functions = num_functions
        
        # Create univariate functions
        self.univariate_functions = nn.ModuleList([
            nn.Sequential(
                nn.Linear(1, 10),
                nn.ReLU(),
                nn.Linear(10, 1)
            ) for _ in range(num_functions)
        ])
        
        # Final layer to combine outputs
        self.final_layer = nn.Linear(num_functions, output_dim)
    
    def forward(self, x):
        # Apply univariate functions
        univariate_outputs = torch.stack([f(x[:, i].unsqueeze(1)) for i, f in enumerate(self.univariate_functions)], dim=1)
        
        # Combine outputs
        output = self.final_layer(univariate_outputs.squeeze(-1))
        
        return output

def prediction_KAN(team_embed, opponent_embed, is_home, num_functions, num_stats):
    
    # Combine inputs
    x = torch.cat([team_embed, opponent_embed, is_home], dim=1)
    
    # Create and apply KAN
    input_dim = x.size(1)
    kan = FlexibleKAN(input_dim, num_stats, num_functions)
    
    return kan(x)

def update_KAN(actual_diff, predicted_diff, num_functions, embed_dim):
    # Combine inputs
    x = torch.cat([actual_diff, predicted_diff], dim=1)
    
    # Create and apply KAN
    input_dim = x.size(1)
    kan = FlexibleKAN(input_dim, embed_dim, num_functions)
    
    return kan(x)

In [15]:
# Example usage
team_embed_dim = 4
num_stats = 10
team_embed = torch.randn(1, team_embed_dim)
opponent_embed = torch.randn(1, team_embed_dim)
is_home = torch.tensor([[1.0]])
num_pred_functions = 6
print(team_embed.shape, opponent_embed.shape, is_home.shape)
predicted_diff = prediction_KAN(team_embed, opponent_embed, is_home, num_pred_functions, num_stats)

# For update KAN
actual_diff = torch.randn(1, num_stats)
predicted_diff = torch.randn(1, num_stats)
num_update_functions = 4

update_vector = update_KAN(actual_diff, predicted_diff, num_update_functions, team_embed_dim)

torch.Size([1, 4]) torch.Size([1, 4]) torch.Size([1, 1])


In [18]:
list(sample)

['season',
 'team_score',
 'opp_score',
 'is_home',
 'numot',
 'team_fgm',
 'team_fga',
 'team_fgm3',
 'team_fga3',
 'team_ftm',
 'team_fta',
 'team_or',
 'team_dr',
 'team_ast',
 'team_to',
 'team_stl',
 'team_blk',
 'team_pf',
 'opp_fgm',
 'opp_fga',
 'opp_fgm3',
 'opp_fga3',
 'opp_ftm',
 'opp_fta',
 'opp_or',
 'opp_dr',
 'opp_ast',
 'opp_to',
 'opp_stl',
 'opp_blk',
 'opp_pf',
 'team_name',
 'opp_name',
 'date',
 'team_fgm2',
 'team_fga2',
 'opp_fgm2',
 'opp_fga2',
 'fgm2_diff',
 'fgm3_diff',
 'ftm_diff',
 'or_diff',
 'dr_diff',
 'ast_diff',
 'to_diff',
 'stl_diff',
 'blk_diff',
 'pf_diff']

In [23]:
learning_rate = 0.01
end_season = 2019

# Initialize team embeddings
teams = sample['team_name'].unique()
team_embeddings = {team: torch.zeros(team_embed_dim) for team in teams}

