In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [18]:
data = pd.read_csv('clean_data.csv')
data = data.drop(columns='Unnamed: 0.1')

In [19]:
np.random.seed(523)
grouped = data.groupby('GID')
group_keys = list(grouped.groups.keys())
np.random.shuffle(group_keys)
shuffled_data = pd.concat([grouped.get_group(key) for key in group_keys]).reset_index(drop=True)
n1 = int(0.8 * len(data))

In [20]:
stats = ['1G', '2G', 'A', 'SH', 'SOG', 'GB', 'TCH', 'PAS', 'SA', 'SV', 'PS%', 'OPS%', 'OCT', 'OSA', 'OSV']

f_means = {}
f_sd = {}
for stat in stats:
    f_means[stat] = shuffled_data[:n1][stat].mean()
    f_sd[stat] = shuffled_data[:n1][stat].std()

def standardize_data(df):
    for col in df.columns:
        if col in f_means:
            df[col] = (df[col] - f_means[col]) / f_sd[col]
    return df

def build_dataset(df):
    X = torch.tensor(df[stats[0]].values).view(-1, 1).to(torch.float32)
    for i in range(1, len(stats)):
        stat_tensor = torch.tensor(df[stats[i]].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    Y = torch.tensor(df['P'].values).to(torch.float32)

    return X, Y

shuffled_data = standardize_data(shuffled_data)
Xtr, Ytr = build_dataset(shuffled_data[:n1])
Xval, Yval = build_dataset(shuffled_data[n1:])

In [21]:
class SimpleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=15, out_features=1)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [22]:
model = SimpleModel()
criterion = nn.MSELoss()
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [23]:
num_epochs = 10000
for epoch in range(num_epochs):

    # forward pass
    pred = model(Xtr)
    loss = criterion(pred.view(-1), Ytr)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
        print(loss)

tensor(5.6818, grad_fn=<MseLossBackward0>)
tensor(1.7385, grad_fn=<MseLossBackward0>)
tensor(1.7360, grad_fn=<MseLossBackward0>)
tensor(1.7357, grad_fn=<MseLossBackward0>)
tensor(1.7356, grad_fn=<MseLossBackward0>)
tensor(1.7355, grad_fn=<MseLossBackward0>)
tensor(1.7354, grad_fn=<MseLossBackward0>)
tensor(1.7354, grad_fn=<MseLossBackward0>)
tensor(1.7353, grad_fn=<MseLossBackward0>)
tensor(1.7352, grad_fn=<MseLossBackward0>)


In [24]:
# find val loss
pred_val = model(Xval)
loss_val = criterion(pred_val.view(-1), Yval)
print(loss_val)

tensor(2.6994, grad_fn=<MseLossBackward0>)


In [27]:
opp_dict = {
    1:'T. Schreiber',
    2:'M. Sowers',
    3:'M. Holman',
    4:'J. Teat',
    5:'J. Heningburg',
    6:'S. Handley',
    7:'TJ Malone',
    8:'R. Pannell'
}

team_stats = ['1G', '2G', 'A', 'SH', 'SOG', 'GB', 'TCH', 'PAS', 'SA', 'SV', 'PS%']
opp_stats = ['PS%', 'CT', 'SA', 'SV']

In [26]:
# build prediction data
cur_data = pd.read_csv('cur_data.csv')
cur_data = cur_data.drop(columns='Unnamed: 0.1')
cur_data = standardize_data(cur_data)

def build_input(df, player, opp):
    X = torch.tensor(df[team_stats[0]][df['Unnamed: 0'] == player].values).view(-1, 1).to(torch.float32)
    for i in range(1, len(team_stats)):
        stat_tensor = torch.tensor(df[team_stats[i]][df['Unnamed: 0'] == player].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    for i in range(len(opp_stats)):
        stat_tensor = torch.tensor(df[opp_stats[i]][df['Unnamed: 0'] == opp_dict[opp]].values).view(-1, 1).to(torch.float32)
        X = torch.cat((X, stat_tensor), dim=1)
    
    return X

In [38]:
player = 'T. Schreiber'
opp = 8

cur_x = build_input(cur_data, player, opp)
print(model(cur_x))

tensor([[1.7775]], grad_fn=<AddmmBackward0>)
