In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [38]:
data = pd.read_csv('sv_clean.csv')
data = data.drop(columns='Unnamed: 0.1')

In [39]:
np.random.seed(245)
grouped = data.groupby('GID')
group_keys = list(grouped.groups.keys())
np.random.shuffle(group_keys)
shuffled_data = pd.concat([grouped.get_group(key) for key in group_keys]).reset_index(drop=True)
n1 = int(0.8 * len(data))

In [40]:
stats = ['SA', 'CT', 'PS%', 'SV_x', 'OPS%', 'O1G', 'O2G', 'OSH', 'OSOG']

f_means = {}
f_sd = {}
for stat in stats:
    f_means[stat] = shuffled_data[:n1][stat].mean()
    f_sd[stat] = shuffled_data[:n1][stat].std()

def standardize_data(df):
    for col in df.columns:
        if col in f_means:
            df[col] = (df[col] - f_means[col]) / f_sd[col]
    return df

def build_dataset(df):
    X = torch.tensor(df[stats[0]].values).view(-1, 1).to(torch.float32)
    for i in range(1, len(stats)):
        stat_tensor = torch.tensor(df[stats[i]].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    Y = torch.tensor(df['SV'].values).to(torch.float32)

    return X, Y

shuffled_data = standardize_data(shuffled_data)
Xtr, Ytr = build_dataset(shuffled_data[:n1])
Xval, Yval = build_dataset(shuffled_data[n1:])

In [41]:
class SimpleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=9, out_features=1)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [42]:
model = SimpleModel()
criterion = nn.MSELoss()
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [43]:
num_epochs = 10
for epoch in range(num_epochs):

    # forward pass
    pred = model(Xtr)
    loss = criterion(pred.view(-1), Ytr)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(loss)

tensor(154.3382, grad_fn=<MseLossBackward0>)


In [44]:
# find val loss
pred_val = model(Xval)
loss_val = criterion(pred_val.view(-1), Yval)
print(loss_val)

tensor(104.7442, grad_fn=<MseLossBackward0>)


In [13]:
shuffled_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  30 non-null     object 
 1   SA          30 non-null     float64
 2   SV          30 non-null     float64
 3   GID         30 non-null     float64
 4   TID         30 non-null     float64
 5   CT          30 non-null     float64
 6   OTID        30 non-null     float64
 7   1G          30 non-null     float64
 8   2G          30 non-null     float64
 9   SH          30 non-null     float64
 10  SOG         30 non-null     float64
 11  PS%         30 non-null     float64
 12  SV_x        30 non-null     float64
 13  OPS%        30 non-null     float64
 14  O1G         30 non-null     float64
 15  O2G         30 non-null     float64
 16  OSH         30 non-null     float64
 17  OSOG        30 non-null     float64
dtypes: float64(17), object(1)
memory usage: 4.3+ KB


In [None]:
opp_dict = {
    1:'T. Schreiber',
    2:'M. Sowers',
    3:'M. Holman',
    4:'J. Teat',
    5:'J. Heningburg',
    6:'S. Handley',
    7:'TJ Malone',
    8:'R. Pannell'
}

team_stats = ['SA', 'SV', 'CT', 'PS%']
opp_stats = ['OPS%', 'O1G', 'O2G', 'OSH', 'OSOG']

In [None]:
# build prediction data
cur_data = pd.read_csv('sv_cur.csv')
cur_data = cur_data.drop(columns='Unnamed: 0.1')
cur_data = standardize_data(cur_data)

def build_input(df, player, opp):
    X = torch.tensor(df[team_stats[0]][df['Unnamed: 0'] == player].values).view(-1, 1).to(torch.float32)
    for i in range(1, len(team_stats)):
        stat_tensor = torch.tensor(df[team_stats[i]][df['Unnamed: 0'] == player].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    for i in range(len(opp_stats)):
        stat_tensor = torch.tensor(df[opp_stats[i]][df['Unnamed: 0'] == opp_dict[opp]].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    return X

In [None]:
player = 'S. Handley'
opp = 2

cur_x = build_input(cur_data, player, opp)
print(model(cur_x))