In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from nba_helper import read_url_to_csv, read_data
from preprocess import *
from nba_helper import elo_url
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client['NBA']

db.list_collection_names()

['historical_raptor', 'player2k', 'game_log', 'shot_chart']

In [45]:
historical_pbp = pd.read_csv('../Data/historical_pbp.csv')

In [47]:
def feature_engineer(df):
    pbp = df.copy()
    pbp[['home_true', 'visitor_true']] = pbp[['HOMEDESCRIPTION','VISITORDESCRIPTION']].notnull().astype(int)
    pbp['block'] = pbp['HOMEDESCRIPTION'].str.contains("BLOCK").fillna(False)
    pbp['steal'] = pbp['HOMEDESCRIPTION'].str.contains("STEAL").fillna(False)

    pbp['home_poss'] = pbp.apply(home_poss, axis = 1)
    pbp['diff'] = pbp['SCOREMARGIN'].ffill().fillna(0).replace({'TIE':0}).astype(int)
    pbp['OT_ind'] = (pbp['PERIOD']-4).clip(lower=0)

    pbp['seconds'] = pbp['PCTIMESTRING'].apply(str_to_time)
    pbp['seconds_left_in_game_from_quarter'] = pbp['PERIOD'].apply(find_seconds_left)
    pbp['time_remaining'] = pbp['seconds'] + pbp['seconds_left_in_game_from_quarter']

    game = pbp[['GAME_ID', 'home_poss', 'diff', 'time_remaining', 'OT_ind']]
    
    return game

In [81]:
historical_pbp_modelled = historical_pbp.groupby('GAME_ID').apply(feature_engineer)
historical_pbp_modelled = historical_pbp_modelled.reset_index(drop=True)

In [82]:
elo = read_url_to_csv(elo_url)
elo = elo[elo['date'] > '2012-01-01']

elo.loc[:, 'elo_difference'] = np.abs(elo['elo1_pre'] - elo['elo2_pre'])

elo = elo[['date', 'team1', 'elo1_pre', 'elo2_pre', 'elo_difference']]

elo['team1'] = elo['team1'].replace({'BRK':'BKN',
                                     'PHO':'PHX',
                                    'CHO':'CHA',})

In [83]:
all_games = pd.DataFrame.from_records(db.game_log.find())
all_games[['Home', 'Away']] = all_games['MATCHUP'].str.split('vs.', expand=True)
all_games['Home'] = all_games['Home'].str.strip()
all_games['GAME_ID'] = all_games['GAME_ID'].astype(int)
all_games['home_team_win'] = all_games['WL'].replace({'W':1, 'L':0})

In [85]:
elo_w_game_ids = all_games.merge(elo, left_on=['GAME_DATE', 'Home'], 
                                 right_on = ['date', 'team1'])

In [86]:
df = historical_pbp_modelled.merge(elo_w_game_ids[['GAME_ID', 'elo1_pre', 
                                                   'elo2_pre', 'home_team_win',
                                                  'MATCHUP', ]], on = ['GAME_ID'])




In [87]:
# from pymongo import MongoClient, ASCENDING, DESCENDING

# db.game_log.insert_many(all_games.to_dict('records'))

# db.game_log.create_index([("GAME_ID", ASCENDING)], unique=True)

In [88]:
df

Unnamed: 0,GAME_ID,home_poss,diff,time_remaining,OT_ind,elo1_pre,elo2_pre,home_team_win,MATCHUP
0,21500001,0,0,2880,0,1561.843550,1472.058800,0.0,ATL vs. DET
1,21500001,1,0,2880,0,1561.843550,1472.058800,0.0,ATL vs. DET
2,21500001,1,0,2861,0,1561.843550,1472.058800,0.0,ATL vs. DET
3,21500001,1,0,2859,0,1561.843550,1472.058800,0.0,ATL vs. DET
4,21500001,1,0,2857,0,1561.843550,1472.058800,0.0,ATL vs. DET
...,...,...,...,...,...,...,...,...,...
3553191,22100581,0,17,34,0,1694.385602,1490.689779,1.0,PHX vs. LAC
3553192,22100581,0,17,31,0,1694.385602,1490.689779,1.0,PHX vs. LAC
3553193,22100581,0,17,23,0,1694.385602,1490.689779,1.0,PHX vs. LAC
3553194,22100581,1,17,18,0,1694.385602,1490.689779,1.0,PHX vs. LAC


In [93]:
modelling_data_wO_elo = df[['home_poss', 'diff', 'time_remaining', 'OT_ind']]
modelling_data_w_elo = df[['home_poss', 'diff', 'time_remaining', 'OT_ind', 'elo1_pre', 'elo2_pre']]
modelling_target = df[['home_team_win']]

In [94]:
model_wO_elo = tf.keras.models.Sequential([
    tf.keras.layers.Dense(12, activation='relu', input_shape=[4,]),
    tf.keras.layers.Dense(12, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model_wO_elo.compile(optimizer=tf.keras.optimizers.RMSprop(1e-3), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])
model_wO_elo.summary()

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(12, activation='relu', input_shape=[6,]),
    tf.keras.layers.Dense(12, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(1e-3), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 12)                60        
                                                                 
 dense_13 (Dense)            (None, 12)                156       
                                                                 
 dense_14 (Dense)            (None, 1)                 13        
                                                                 
Total params: 229
Trainable params: 229
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 12)                84        
                                                                 
 dense_16 (Dense)            (None

In [96]:
model_wO_elo.fit(modelling_data_wO_elo.values,
          modelling_target.values,
          verbose=1,
          shuffle=True,
          epochs=15,
          batch_size=512)

model.fit(modelling_data_w_elo.values,
          modelling_target.values,
          verbose=1,
          shuffle=True,
          epochs=15,
          batch_size=512)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x19e5a87c0>

In [97]:
model.save('../Models/TF_model_w_elo.h5')
model_wO_elo.save('../Models/TF_model_wO_elo.h5')

In [98]:
df.loc[:, 'preds_w_elo'] = model.predict_on_batch(df[modelling_data_w_elo.columns])

df.loc[:, 'preds_wO_elo'] = model_wO_elo.predict_on_batch(df[modelling_data_wO_elo.columns])

In [99]:
df

Unnamed: 0,GAME_ID,home_poss,diff,time_remaining,OT_ind,...,elo2_pre,home_team_win,MATCHUP,preds_w_elo,preds_wO_elo
0,21500001,0,0,2880,0,...,1472.058800,0.0,ATL vs. DET,0.690017,0.562178
1,21500001,1,0,2880,0,...,1472.058800,0.0,ATL vs. DET,0.688084,0.562178
2,21500001,1,0,2861,0,...,1472.058800,0.0,ATL vs. DET,0.687846,0.562178
3,21500001,1,0,2859,0,...,1472.058800,0.0,ATL vs. DET,0.687821,0.562178
4,21500001,1,0,2857,0,...,1472.058800,0.0,ATL vs. DET,0.687796,0.562178
...,...,...,...,...,...,...,...,...,...,...,...
3553191,22100581,0,17,34,0,...,1490.689779,1.0,PHX vs. LAC,0.999997,1.000000
3553192,22100581,0,17,31,0,...,1490.689779,1.0,PHX vs. LAC,0.999997,1.000000
3553193,22100581,0,17,23,0,...,1490.689779,1.0,PHX vs. LAC,0.999998,1.000000
3553194,22100581,1,17,18,0,...,1490.689779,1.0,PHX vs. LAC,0.999999,1.000000


In [100]:
historical_pbp

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,...,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG
0,21500003,0,12,0,1,...,,,,,0
1,21500003,1,10,0,1,...,1.610613e+09,New Orleans,Pelicans,NOP,1
2,21500003,2,2,1,1,...,,,,,1
3,21500003,3,4,0,1,...,,,,,1
4,21500003,4,1,1,1,...,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...
3553191,22100581,666,2,1,4,...,,,,,1
3553192,22100581,667,4,0,4,...,,,,,0
3553193,22100581,669,2,1,4,...,,,,,1
3553194,22100581,670,4,0,4,...,,,,,1


In [101]:
db.list_collection_names()

['historical_raptor', 'player2k', 'shot_chart', 'game_log']

In [102]:
from pymongo import MongoClient, ASCENDING, DESCENDING

db.historical_pbp.insert_many(historical_pbp.to_dict('records'))

db.historical_pbp.create_index([("GAME_ID", ASCENDING)])

'GAME_ID_1'

In [103]:
db.historical_pbp_modelled.insert_many(df.to_dict('records'))

db.historical_pbp_modelled.create_index([("GAME_ID", ASCENDING)])


'GAME_ID_1'

In [104]:
test = pd.DataFrame.from_records(db.historical_pbp.find({'GAME_ID': 22100581}))

In [105]:
test

Unnamed: 0,_id,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,...,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG
0,61e9efd6f0ee3d9953e21db9,22100581,2,12,0,...,,,,,0
1,61e9efd6f0ee3d9953e21dba,22100581,4,10,0,...,1.610613e+09,Phoenix,Suns,PHX,1
2,61e9efd6f0ee3d9953e21dbb,22100581,7,2,101,...,,,,,1
3,61e9efd6f0ee3d9953e21dbc,22100581,8,4,0,...,,,,,1
4,61e9efd6f0ee3d9953e21dbd,22100581,9,2,1,...,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...
471,61e9efd6f0ee3d9953e21f90,22100581,666,2,1,...,,,,,1
472,61e9efd6f0ee3d9953e21f91,22100581,667,4,0,...,,,,,0
473,61e9efd6f0ee3d9953e21f92,22100581,669,2,1,...,,,,,1
474,61e9efd6f0ee3d9953e21f93,22100581,670,4,0,...,,,,,1


In [1]:
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import playbyplayv2, leaguegamelog, leaguegamefinder

lgl = leaguegamelog.LeagueGameLog().get_data_frames()[0].astype({'GAME_ID':int})