In [51]:
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [52]:
outcomePath = '/Users/justinholmes/Desktop/Sports_Model/Data/outcomeData.csv'
featurePath = '/Users/justinholmes/Desktop/Sports_Model/Data/featureData.csv'

In [53]:
outcomeData = pd.read_csv(outcomePath)
featureData = pd.read_csv(featurePath)

### Format all data

In [54]:
# drop unwanted columns
outcomeData = outcomeData.drop(["Start Time", "Box Score", "Notes"], axis=1)

In [55]:
# change OT to integer value for number of OT rounds
outcomeData.loc[(outcomeData["OT?"].isna()),'OT?']= 0
outcomeData.loc[(outcomeData["OT?"] == "OT"),'OT?']= 1
outcomeData.loc[(outcomeData["OT?"] == "2OT"),'OT?']= 2
outcomeData.loc[(outcomeData["OT?"] == "3OT"),'OT?']= 3
outcomeData.loc[(outcomeData["OT?"] == "4OT"),'OT?']= 4

In [56]:
def redefineTeamNames(dataframe):
    dataframe = dataframe.replace('Seattle SuperSonics', 'Oklahoma City Thunder', regex=True)
    
    dataframe = dataframe.replace('New Orleans/Oklahoma City Hornets', 'New Orleans Pelicans', regex=True)

    dataframe = dataframe.replace('New Orleans Hornets', 'New Orleans Pelicans', regex=True)
    
    dataframe = dataframe.replace('Charlotte Bobcats', 'Charlotte Hornets', regex=True)
    
    dataframe = dataframe.replace('New Jersey Nets', 'Brooklyn Nets', regex=True)
    
#     dataframe = dataframe.replace('*', '', regex=True)

    return dataframe

outcomeData = redefineTeamNames(outcomeData)

In [57]:
outcomeData["Final Home Spread"] = (outcomeData["VisitorPTS"] - outcomeData["HomePTS"]).astype(float)

In [58]:
outcomeData = pd.concat([outcomeData, featureData], axis=1, sort=False)

In [59]:
def homeWin(dataframe):
    if dataframe['Winner'] == dataframe['Home']:
        return True
    else:
        return False

outcomeData['HomeWin'] = outcomeData.apply(homeWin, axis=1)

In [60]:
outcomeData.head()

Unnamed: 0,Year,Month,Visitor,VisitorPTS,Home,HomePTS,OT?,Attend,Total Points,Winner,...,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,HomeWin
0,2005,november,Houston Rockets,79,Detroit Pistons,87,0,22076,166,Detroit Pistons,...,-0.015,-0.014,-0.2,4.9,0.005,0.002,0.6,-1.5,-0.038,True
1,2005,november,Sacramento Kings,98,Dallas Mavericks,107,0,20041,205,Dallas Mavericks,...,0.004,-0.004,0.5,-0.2,0.047,-0.021,1.0,1.2,0.025,True
2,2005,november,Denver Nuggets,78,Los Angeles Lakers,89,0,18997,167,Los Angeles Lakers,...,0.0,0.002,-0.4,1.1,-0.016,0.011,-4.1,-0.6,-0.029,True
3,2005,november,Indiana Pacers,109,Cleveland Cavaliers,104,2,19730,213,Indiana Pacers,...,-0.016,-0.006,-1.1,6.1,-0.039,0.013,0.5,0.2,-0.013,False
4,2005,november,Milwaukee Bucks,92,Orlando Magic,93,0,15138,185,Orlando Magic,...,0.002,0.003,1.6,0.9,0.004,-0.015,0.8,-0.7,0.022,True


In [61]:
encoding = LabelEncoder()
encoding.fit(outcomeData['Home'].values)
outcomeData['Home'] = encoding.transform(outcomeData['Home'].values)
outcomeData['Visitor'] = encoding.transform(outcomeData['Visitor'].values)

In [62]:
outcomeData.head()

Unnamed: 0,Year,Month,Visitor,VisitorPTS,Home,HomePTS,OT?,Attend,Total Points,Winner,...,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,HomeWin
0,2005,november,10,79,8,87,0,22076,166,Detroit Pistons,...,-0.015,-0.014,-0.2,4.9,0.005,0.002,0.6,-1.5,-0.038,True
1,2005,november,25,98,6,107,0,20041,205,Dallas Mavericks,...,0.004,-0.004,0.5,-0.2,0.047,-0.021,1.0,1.2,0.025,True
2,2005,november,7,78,13,89,0,18997,167,Los Angeles Lakers,...,0.0,0.002,-0.4,1.1,-0.016,0.011,-4.1,-0.6,-0.029,True
3,2005,november,11,109,5,104,2,19730,213,Indiana Pacers,...,-0.016,-0.006,-1.1,6.1,-0.039,0.013,0.5,0.2,-0.013,False
4,2005,november,16,92,21,93,0,15138,185,Orlando Magic,...,0.002,0.003,1.6,0.9,0.004,-0.015,0.8,-0.7,0.022,True


In [63]:
outcomeData = outcomeData.drop(['Month', 'Attend', 'Winner', 'VisitorPTS', 'HomePTS', 'Total Points',
                                "Final Home Spread"], axis=1)


In [64]:
outcomeData.head()

Unnamed: 0,Year,Visitor,Home,OT?,FG_PG,FGA_PG,FG%_PG,3P_PG,3PA_PG,3P%_PG,...,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,HomeWin
0,2005,10,8,0,0.1,0.0,0.001,-2.3,-5.7,-0.019,...,-0.015,-0.014,-0.2,4.9,0.005,0.002,0.6,-1.5,-0.038,True
1,2005,25,6,0,-1.8,-3.5,-0.002,-0.8,-1.5,-0.01,...,0.004,-0.004,0.5,-0.2,0.047,-0.021,1.0,1.2,0.025,True
2,2005,7,13,0,-1.7,0.1,-0.022,4.0,10.6,0.015,...,0.0,0.002,-0.4,1.1,-0.016,0.011,-4.1,-0.6,-0.029,True
3,2005,11,5,2,4.0,6.3,0.015,-2.9,-8.2,-0.012,...,-0.016,-0.006,-1.1,6.1,-0.039,0.013,0.5,0.2,-0.013,False
4,2005,16,21,0,0.9,1.3,0.004,-0.1,-0.1,-0.002,...,0.002,0.003,1.6,0.9,0.004,-0.015,0.8,-0.7,0.022,True


In [65]:
trainData = outcomeData.loc[outcomeData['Year'] < 2020]
testData = outcomeData.loc[outcomeData['Year'] == 2020]

In [66]:
X_train = trainData.drop(["HomeWin"], axis=1).values
y_train = trainData["HomeWin"].to_numpy()

X_test = testData.drop(["HomeWin"], axis=1).values
y_test = testData["HomeWin"].to_numpy()

# X_train = np.asarray(X_train).astype(np.float32)

In [86]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.sigmoid))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20)

Train on 19463 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x11e7262d0>

In [87]:
val_loss, val_acc = model.evaluate(X_test, y_test)
print(val_loss, val_acc)

1.296757954033715 0.6622039
