In [765]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [850]:
data = pd.read_csv('clean_data.csv')
data = data.drop(columns=['Unnamed: 0'])

odds_data = pd.read_csv('clean_odds.csv')
odds_data = odds_data.drop(columns=['Unnamed: 0'])

In [851]:
odds_games = set(odds_data['GAME_ID'])
data = data[~data['GAME_ID'].isin(odds_games)]

In [852]:
np.random.seed(324)
grouped = data.groupby('GAME_ID')
group_keys = list(grouped.groups.keys())
np.random.shuffle(group_keys)
shuffled_data = pd.concat([grouped.get_group(key) for key in group_keys]).reset_index(drop=True)
n1 = int(0.8 * len(data))
n2 = int(0.9 * len(data))

In [853]:
stats = ['OFF_RATING',
       'OREB_PCT', 'TM_TOV_PCT', 'TS_PCT', 'USG_PCT', 'PACE_PER40',
       'POSS', 'PIE', 'DREB', 'AST', 'STL','PTS', 'T1_OR', 'T1_AP', 'T1_UP', 'T1_PACE', 'T1_PIE',
       'T1_TO', 'T1_PTS', 'T2_OR', 'T2_AP', 'T2_UP', 'T2_PACE', 'T2_PIE',
       'T2_TO', 'T2_PTS', 'T3_OR', 'T3_AP', 'T3_UP', 'T3_PACE', 'T3_PIE',
       'T3_TO', 'T3_PTS', 'T4_OR', 'T4_AP', 'T4_UP', 'T4_PACE', 'T4_PIE',
       'T4_TO', 'T4_PTS', 'O1_MIN', 'O1_DR', 'O2_MIN', 'O2_DR', 'O3_MIN',
       'O3_DR', 'O4_MIN', 'O4_DR', 'O5_MIN', 'O5_DR']

f_means = {}
f_sd = {}
for stat in stats:
    f_means[stat] = shuffled_data[:n1][stat].mean()
    f_sd[stat] = shuffled_data[:n1][stat].std()

def standardize_data(df):
    for col in df.columns:
        if col in f_means:
            df[col] = (df[col] - f_means[col]) / f_sd[col]
    return df

def build_dataset(df):
    pos_dic = {
        'G':0,
        'F':1,
        'C':2
    }
    is_home = torch.tensor(df['HOME?'].values)
    start_pos = torch.tensor([pos_dic.get(elem, elem) for elem in df['START_POSITION'].values])
    hot_start_pos = F.one_hot(start_pos, num_classes=3)
    ref1 = F.one_hot(torch.tensor(df['REF_1'].values.astype(int)), num_classes=142)
    ref2 = F.one_hot(torch.tensor(df['REF_2'].values.astype(int)), num_classes=142)
    ref3 = F.one_hot(torch.tensor(df['REF_3'].values.astype(int)), num_classes=142)
    refs = ref1 + ref2 + ref3
    X = torch.cat((is_home.view(-1, 1), hot_start_pos, refs), dim=1)

    for stat in stats:
        stat_tensor = torch.tensor(df[stat].values)
        X = torch.cat((X, stat_tensor.view(-1, 1)), dim=1)
    X = X.to(torch.float32)

    Y = torch.tensor(df['PTS_y'].values).to(torch.float32)

    return X, Y

shuffled_data = standardize_data(shuffled_data)
Xtr, Ytr = build_dataset(shuffled_data[:n1])
Xval, Yval = build_dataset(shuffled_data[n1:n2])
Xte, Yte = build_dataset(shuffled_data[n2:])

In [870]:
batch_size = 64

In [871]:
tm_features = 7
tm_out_size = 15

class TeammateModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=tm_features, out_features=tm_out_size)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [872]:
t_out_size = 30

class TeamModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.tm_model = TeammateModel()
        self.linear = nn.Linear(in_features=4*tm_out_size, out_features=t_out_size)
    
    def forward(self, x):
        x = self.tm_model(x.view(-1, 4, tm_features))
        out = self.linear(x.view(-1, 4*tm_out_size))
        return out

In [873]:
op_features = 2
op_out_size = 3

class OpponentModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=op_features, out_features=op_out_size)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [874]:
opt_out_size = 10

class OppTeamModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.op_model = OpponentModel()
        self.linear = nn.Linear(in_features=5*op_out_size, out_features=opt_out_size)
    
    def forward(self, x):
        x = self.op_model(x.view(-1, 5, op_features))
        out = self.linear(x.view(-1, 5*op_out_size))
        return out

In [875]:
class RefModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=142, out_features=5)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [876]:
other_features = 16
total_features = t_out_size + opt_out_size + other_features + 5

class FinalModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.t_model = TeamModel()
        self.opt_model = OppTeamModel()
        self.r_model = RefModel()
        self.linear = nn.Linear(in_features=total_features, out_features=1)
    
    def forward(self, x):
        x_split = torch.split(x, split_size_or_sections=[other_features, 142, 4 * tm_features, 5 * op_features], dim=1)
        x1 = torch.squeeze(x_split[0])
        x2 = self.r_model(x_split[1])
        x3 = self.t_model(x_split[2])
        x4 = self.opt_model(x_split[3])
        out = self.linear(torch.cat((x1, x2, x3, x4), dim=1))
        return out

In [877]:
model = FinalModel()
criterion = nn.MSELoss()
lr = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [880]:
num_epochs = 100000
for epoch in range(num_epochs):

    # contruct minibatch
    ix = torch.randint(0, Xtr.shape[0], (batch_size,))

    # forward pass
    pred = model(Xtr[ix])
    loss = criterion(pred.view(batch_size), Ytr[ix])

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10000 == 0:
        print(loss)


tensor(34.8756, grad_fn=<MseLossBackward0>)
tensor(38.5031, grad_fn=<MseLossBackward0>)
tensor(39.7223, grad_fn=<MseLossBackward0>)
tensor(36.1781, grad_fn=<MseLossBackward0>)
tensor(41.8502, grad_fn=<MseLossBackward0>)
tensor(34.5351, grad_fn=<MseLossBackward0>)
tensor(37.4756, grad_fn=<MseLossBackward0>)
tensor(39.3665, grad_fn=<MseLossBackward0>)
tensor(39.3957, grad_fn=<MseLossBackward0>)
tensor(36.5428, grad_fn=<MseLossBackward0>)
tensor(40.9169, grad_fn=<MseLossBackward0>)
tensor(40.6872, grad_fn=<MseLossBackward0>)
tensor(47.9575, grad_fn=<MseLossBackward0>)
tensor(40.0550, grad_fn=<MseLossBackward0>)
tensor(42.5007, grad_fn=<MseLossBackward0>)
tensor(37.5758, grad_fn=<MseLossBackward0>)
tensor(40.7543, grad_fn=<MseLossBackward0>)
tensor(32.1152, grad_fn=<MseLossBackward0>)
tensor(54.6673, grad_fn=<MseLossBackward0>)
tensor(37.6508, grad_fn=<MseLossBackward0>)
tensor(31.1826, grad_fn=<MseLossBackward0>)
tensor(38.7211, grad_fn=<MseLossBackward0>)
tensor(24.9184, grad_fn=<MseLoss

In [881]:
# find validation loss
pred_val = model(Xval)
loss_val = criterion(pred_val.view(Yval.shape), Yval)
print(loss_val)

tensor(40.3648, grad_fn=<MseLossBackward0>)


In [867]:
def build_odds(df):
    pos_dic = {
        'G':0,
        'F':1,
        'C':2
    }
    is_home = torch.tensor(df['HOME?'].values)
    start_pos = torch.tensor([pos_dic.get(elem, elem) for elem in df['START_POSITION'].values])
    hot_start_pos = F.one_hot(start_pos, num_classes=3)
    ref1 = F.one_hot(torch.tensor(df['REF_1'].values.astype(int)), num_classes=142)
    ref2 = F.one_hot(torch.tensor(df['REF_2'].values.astype(int)), num_classes=142)
    ref3 = F.one_hot(torch.tensor(df['REF_3'].values.astype(int)), num_classes=142)
    refs = ref1 + ref2 + ref3
    X = torch.cat((is_home.view(-1, 1), hot_start_pos, refs), dim=1)

    for stat in stats:
        stat_tensor = torch.tensor(df[stat].values)
        X = torch.cat((X, stat_tensor.view(-1, 1)), dim=1)
    X = X.to(torch.float32)

    pts = torch.tensor(df['PTS_y'].values).to(torch.float32)
    ol = torch.tensor(df['O_LINE'].values).to(torch.float32)
    oo = torch.tensor(df['O_ODDS'].values).to(torch.float32)
    ul = torch.tensor(df['U_LINE'].values).to(torch.float32)
    uo = torch.tensor(df['U_ODDS'].values).to(torch.float32)
    Y = torch.stack((pts, ol, oo, ul, uo), dim=1)

    return X, Y

In [868]:
# determine profits
odds_data = pd.read_csv('clean_odds.csv')
odds_data = odds_data.drop(columns=['Unnamed: 0'])
odds_data = standardize_data(odds_data)

In [869]:
Xod, Yod = build_odds(odds_data)

KeyError: 'OFF_RATING'

In [838]:
profit = 0
bets = 0
pred_pts = model(Xod).squeeze()

for i in range(pred_pts.shape[0]):
    if pred_pts[i] > Yod[i][1] + 0:
        bets += 1
        profit -= 1
        if Yod[i][0] > Yod[i][1]:
            profit += Yod[i][2]
    elif pred_pts[i] < Yod[i][3] - 0:
        bets += 1
        profit -= 1
        if Yod[i][0] < Yod[i][3]:
            profit += Yod[i][4]
    if i % 10 == 0:
        print(profit)
print(bets)

0
-1
tensor(0.6600)
tensor(-0.7400)
tensor(-0.1300)
tensor(-1.4200)
tensor(-3.5100)
tensor(-2.0500)
tensor(-1.5400)
tensor(1.8200)
tensor(5.4500)
tensor(5.9600)
tensor(4.7800)
tensor(2.7800)
tensor(2.5600)
tensor(4.9900)
tensor(7.5700)
tensor(9.9100)
tensor(12.4100)
tensor(12.7000)
tensor(12.5600)
tensor(8.5600)
tensor(8.2100)
tensor(8.4800)
tensor(9.1500)
tensor(9.5300)
tensor(10.3900)
tensor(11.7500)
116


In [834]:
'''
ROI w/o refs:
- 5.4% on 175
- 6.1% on 105
- 15.9% on 59
- 17.5% on 24
- 46.9% on 9

ROI w/ refs:
- 4.9% on 268
- 6.8% on 171
- 10.1% on 116
- 10.1% on 57
- 26.5% on 34
- 62.4% on 14
'''

'''
TD:
- GAN
    - Get probability of over and under
- normalization layers
- PACE? Other stats?
- Combine the files and shit
- Tune hyperparamters
- Results for other stats
- Parlays
'''

'\nROI w/o refs:\n- 5.4% on 175\n- 6.1% on 105\n- 15.9% on 59\n- 17.5% on 24\n- 46.9% on 9\n\nROI w/ refs:\n- 4.9% on 268\n- 6.8% on 171\n- 10.1% on 116\n- 10.1% on 57\n- 26.5% on 34\n- 62.4% on 14\n'