In [6]:
import torch
import numpy as np
import pandas as pd
import requests
import json
from torch import nn

## Definining Model Architecture
Training on LSTM to try and capture some hidden state of a team

In [162]:
class LSTM_Model(nn.Module):
    def __init__(self, input_size, hidden_size):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.35)
        self.linear1 = nn.Linear(hidden_size, 128)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(128, 2)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        lstm_out, _ = self.lstm(x)
#         print(lstm_out)
        dropout = self.dropout(lstm_out)
        linear1 = self.linear1(dropout)
        relud = self.relu(linear1)
        pred = self.linear2(relud)
        
        return pred

In [149]:
raw_df = pd.read_csv('2019pbpfeatures.csv')


### Testing for Single Team
Picking the Warriors ('GSW') for now

In [188]:
df = raw_df[(raw_df['HomeTeam'] == 'GSW') | (raw_df['AwayTeam'] == 'GSW')].reset_index()
df.drop(['index'],1,inplace=True)


In [189]:
##train tests split
sizes = df.groupby(['Date']).size()
mask = np.random.rand(len(sizes)) < 0.8
final_mask = np.array([])
for i in range(len(sizes)):
    final_mask = np.concatenate((final_mask, np.full(sizes[i], mask[i])))
final_mask = final_mask.astype(bool)
df_train = df[final_mask].reset_index().drop(['index'],1)
df_test = df[~final_mask].reset_index().drop(['index'],1)
# df.groupby(['Date']).values()

In [190]:
homeLabels = df_train['HomeFinalScore']
awayLabels = df_train['AwayFinalScore']
df_train.drop(['AwayTeam', 'HomeTeam','Date','HomeFinalScore','AwayFinalScore'],1,inplace=True)

In [191]:
df_train

Unnamed: 0,AwayScore,HomeScore,hour_sin,hour_cos,playoff,regular,AwayPlayOneHot,HomePlayOneHot,AwayAssist,HomeAssist,...,3PAr_y,TS%_y,eFG%_y,TOV%_y,ORB%_y,FT/FGA_y,eFG%.1_y,TOV%.1_y,DRB%_y,FT/FGA.1_y
0,-6.666667,-6.666667,-0.730836,0.682553,0,1,1,0,0,0,...,0.463249,0.262896,0.766833,1.284756,1.102111,-0.834536,0.151582,-0.364332,0.203380,0.518127
1,-6.533333,-6.666667,-0.730836,0.682553,0,1,1,0,1,0,...,0.463249,0.262896,0.766833,1.284756,1.102111,-0.834536,0.151582,-0.364332,0.203380,0.518127
2,-6.533333,-6.666667,-0.730836,0.682553,0,1,0,1,0,0,...,0.463249,0.262896,0.766833,1.284756,1.102111,-0.834536,0.151582,-0.364332,0.203380,0.518127
3,-6.533333,-6.666667,-0.730836,0.682553,0,1,1,0,0,0,...,0.463249,0.262896,0.766833,1.284756,1.102111,-0.834536,0.151582,-0.364332,0.203380,0.518127
4,-6.533333,-6.666667,-0.730836,0.682553,0,1,1,0,0,0,...,0.463249,0.262896,0.766833,1.284756,1.102111,-0.834536,0.151582,-0.364332,0.203380,0.518127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23614,2.066667,0.466667,-0.269797,0.962917,0,1,0,1,0,0,...,-0.700937,-1.708826,-2.252571,0.446872,-0.627947,0.615215,1.391794,0.763363,-0.650815,-0.401912
23615,2.066667,0.466667,-0.269797,0.962917,0,1,0,1,0,0,...,-0.700937,-1.708826,-2.252571,0.446872,-0.627947,0.615215,1.391794,0.763363,-0.650815,-0.401912
23616,2.066667,0.466667,-0.269797,0.962917,0,1,1,0,0,0,...,-0.700937,-1.708826,-2.252571,0.446872,-0.627947,0.615215,1.391794,0.763363,-0.650815,-0.401912
23617,2.066667,0.466667,-0.269797,0.962917,0,1,1,0,0,0,...,-0.700937,-1.708826,-2.252571,0.446872,-0.627947,0.615215,1.391794,0.763363,-0.650815,-0.401912


In [195]:
model = LSTM_Model(146, 1024)
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

In [196]:
model.train()
for i in range(len(df_train)):
    # Forward pass: Compute predicted y by passing x to the model
    x = torch.FloatTensor(df_train.iloc[i]).unsqueeze(0).unsqueeze(0)
    y = torch.FloatTensor((homeLabels.iloc[i], awayLabels.iloc[i])).unsqueeze(0).unsqueeze(0)
    y_pred = model(x)

#     # Compute and print loss
    loss = criterion(y_pred, y)
    if i % 100 == 99:
        print(i, loss.item())

#     # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

99 6.403069496154785
199 5.703798294067383
299 5.503468990325928
399 5.0156707763671875
499 5.336812496185303
599 0.3547244966030121
699 0.42503803968429565
799 0.5547959208488464
899 0.7979294061660767
999 1.1006875038146973
1099 0.2948269546031952
1199 0.3012235462665558
1299 0.7029852867126465
1399 1.0424762964248657
1499 7.646365165710449
1599 3.8297810554504395
1699 2.6207163333892822
1799 1.915623426437378
1899 1.6072160005569458
1999 2.709934711456299
2099 2.2960569858551025
2199 2.0484437942504883
2299 2.117335319519043
2399 2.1351048946380615
2499 0.580202043056488
2599 0.5343806147575378
2699 0.7160704135894775
2799 0.9535329341888428
2899 1.1965781450271606
2999 1.6921334266662598
3099 0.5541466474533081
3199 0.13116875290870667
3299 0.01961066760122776
3399 0.061608925461769104
3499 0.4140268862247467
3599 0.4173767864704132
3699 0.4828709363937378
3799 0.6892611980438232
3899 1.7705140113830566
3999 0.6210579872131348
4099 0.3395281136035919
4199 0.19186925888061523
4299 0

In [185]:
homeLabels

0        1.533333
1        1.533333
2        1.533333
3        1.533333
4        1.533333
           ...   
24550    0.466667
24551    0.466667
24552    0.466667
24553    0.466667
24554    0.466667
Name: HomeFinalScore, Length: 24555, dtype: float64

In [167]:
testHomeLabels = df_test['HomeFinalScore']
testAwayLabels = df_test['AwayFinalScore']
testHomeWins = (df_test['AwayFinalScore'] > df_test['HomeFinalScore']).astype(int)

df_test.drop(['AwayTeam', 'HomeTeam','Date','HomeFinalScore','AwayFinalScore'],1,inplace=True)

In [197]:
start_test = df_test.loc[df['GameTimeLeft'] == 1]
testHomeLabels = start_test['HomeFinalScore']
testAwayLabels = start_test['AwayFinalScore']
testHomeWins = (start_test['AwayFinalScore'] > start_test['HomeFinalScore']).astype(int)
start_test.drop(['AwayTeam', 'HomeTeam','Date','HomeFinalScore','AwayFinalScore'],1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [198]:
len(testHomeWins)

15

In [208]:
model.eval()
preds = []
for i in range(len(start_test)):
    x = torch.FloatTensor(start_test.iloc[i]).unsqueeze(0).unsqueeze(0)
    y = torch.FloatTensor((testHomeLabels.iloc[i], testAwayLabels.iloc[i]))
    
    pred = model(x)
    
    if (pred[0,0,0].item() > pred[0,0,1].item()):
        preds.append(True)
    else:
        preds.append(False)
preds

    

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]

In [209]:
testHomeWins

0       1
481     0
947     0
1469    0
1958    0
2422    1
2874    1
3347    0
3833    1
4419    0
4837    1
5298    1
5749    1
6243    1
6708    0
dtype: int32

In [210]:
preds

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]