In [None]:
from py_ball import playbyplay
from nba_help import constants
import numpy as np
import json
import os
from sklearn import linear_model
import pickle

In [None]:
TOTAL_GAMES_PER_YEAR = 1230

In [None]:
def get_seconds_left(period, time_string):
    time_in_quarter = 12
    if period > 4:
        time_in_quarter=5
    mins, seconds = time_string.split(':')
    extra_after_quarter = (4-period)*time_in_quarter*60
    if period > 4:
        extra_after_quarter = (5-period)*time_in_quarter*60
        time_elapsed = (time_in_quarter*60) - ((int(mins)*60)+(int(seconds)))
        return extra_after_quarter-time_elapsed
    else:
        return extra_after_quarter+(int(mins)*60)+(int(seconds))

In [None]:
def get_game_ids(years):
    game_ids = []
    for year in years:
        for game in range(1, TOTAL_GAMES_PER_YEAR+1):
            maybe = '002%s' + str(game).zfill(5)
            game_ids.append(maybe % year)
    return game_ids

In [None]:
def populate_train_test(game_ids):
    train_x = []
    train_y = []
    for game_id in game_ids:
        print(game_id)
        
        pbp = playbyplay.PlayByPlay(headers=constants.headers, game_id=game_id)
        with open('/home/avyayv/data/nba/playbyplay/py_ball/'+game_id+'.json', 'w') as fp:
            json.dump(pbp.data, fp)
            
        time.sleep(0.5)
    return train_x, train_y

In [None]:
train_gids = get_game_ids(['15','16', '17'])

In [None]:
populate_train_test(train_gids)

In [None]:
def get_train_test_from_dir(directory='/home/avyayv/data/nba/playbyplay/py_ball/'):
    train_x = []
    train_y = []
    for file in os.listdir(directory):
        try:
            pbp = json.loads(open(directory+file).read())
            jump_event = pbp['PlayByPlay'][1]
            home_has_ball = (jump_event['HOMEDESCRIPTION'] != None)
            current_margin = 0
            home_wins = int(pbp['PlayByPlay'][-1]['SCOREMARGIN']) > 0
            last_second = 2880
            
            number_of_quarters = int(pbp['PlayByPlay'][-1]['PERIOD'])
            game_x = {}
            game_y = []
            
            added_this_game = []
            for event in pbp['PlayByPlay'][2:]:
                
                seconds_left_in_game = get_seconds_left(event['PERIOD'], event['PCTIMESTRING'], number_of_quarters)
                
                for sec in range(seconds_left_in_game+1, last_second):
                    if sec % 10 == 0 and (sec not in added_this_game):
                        game_x[sec] = [current_margin, home_wins, home_has_ball]
                        game_y.append(int(home_wins))
                        added_this_game.append(sec)
                    
                last_second = seconds_left_in_game
                
                home_desc = (event['HOMEDESCRIPTION'] != None)
                visitor_desc = (event['VISITORDESCRIPTION'] != None)
                
                if home_desc and not visitor_desc:
                    home_has_ball = True
                
                if visitor_desc and not home_desc:
                    home_has_ball = False
                    
                if home_desc and visitor_desc:
                    if ('STEAL' in event['HOMEDESCRIPTION']) or ('BLOCK' in event['HOMEDESCRIPTION']):
                        home_has_ball = True
                    else:
                        home_has_ball = False
                    
                if event['SCOREMARGIN'] != None:
                    margin = 0
                    if event['SCOREMARGIN'] != 'TIE':
                        margin = (int(event['SCOREMARGIN']))
                        current_margin = margin
                        
                if seconds_left_in_game % 10 == 0 and seconds_left_in_game not in added_this_game:  
                    game_x[seconds_left_in_game] = [current_margin, home_wins, home_has_ball]
                    game_y.append(int(home_wins))
                    added_this_game.append(int(seconds_left_in_game))
                
            train_x.append(game_x)
            train_y.append(game_y)
            
        except UnicodeDecodeError:
            continue
       
    return train_x, train_y

In [None]:
train_x, train_y = get_train_test_from_dir()

In [None]:
def train(train_x, train_y):
    time_to_train_x = {}
    time_to_train_y = {}
    time_to_model = {}
    for game in train_x:
        for key in game.keys():
            if key in time_to_train_x:
                time_to_train_x[key].append([game[key][0], game[key][2]])
                time_to_train_y[key].append(game[key][1])
            else:
                time_to_train_x[key] = [[game[key][0], game[key][2]]]
                time_to_train_y[key] = [game[key][1]]
                
    for key in time_to_train_x.keys():
        model = linear_model.LogisticRegression(max_iter=10000)
        model.fit(X=np.array(time_to_train_x[key]), y=np.array(time_to_train_y[key]))
        time_to_model[key] = model
    return time_to_model

In [None]:
time_to_model = train(train_x, train_y, solver='lbfgs')

In [None]:
with open('model.pickle', 'wb') as handle:
    pickle.dump(time_to_model, handle, protocol=pickle.HIGHEST_PROTOCOL)