In [726]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [730]:
# Change directory so that code in "src" directory is easily importable.
import os
os.chdir('/Users/binhu/projects/nba-in-game-prediction-models')
os.getcwd()

'/Users/binhu/projects/nba-in-game-prediction-models'

In [376]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from IPython.display import display, Markdown, Latex, HTML
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

from joblib import load
transform_pipeline_player = load('transform_pipeline.joblib') 
model_logistic = load('model_logistic.joblib')

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 100)

In [643]:
team_dict = {'ATL':1, 'BOS':2, 'BRK':17, 'CHI':4, 'CHO':5312, 'CLE':5, 'DAL':6, 'DEN':7, 'DET':8,
       'GSW':9, 'HOU':10, 'IND':11, 'LAC':12, 'LAL':13, 'MEM':29, 'MIA':14, 'MIL':15, 'MIN':16,
       'NOP':3, 'NYK':18, 'OKC':25, 'ORL':19, 'PHI':20, 'PHO':21, 'POR':22,
       'SAC':23, 'SAS':24, 'TOR':28, 'UTA':26, 'WAS':27}

team_id_dict = { v:k for k,v in team_dict.items()}

player_feature_names = ['team_id', 'player_id', 
                        'minutes', 'points', 'fg_attempt', 'fg_made',
                       'ft_attempt', 'ft_made', 'point_3_attempt', 'point_3_made',
                       'offensive_rebounds', 'defensive_rebounds', 'assists', 'blocks',
                       'turnovers', #'team_id', #'at_home', 'game_code', 'season', 'game_time', 
                       'position_id', 'draft_year', 'game_started', 'minutes_l10',
                       'points_l10', 'fg_attempt_l10', 'fg_made_l10', 'ft_attempt_l10',
                       'ft_made_l10', 'point_3_attempt_l10', 'point_3_made_l10',
                       'offensive_rebounds_l10', 'defensive_rebounds_l10', 'assists_l10',
                       'blocks_l10', 'turnovers_l10']

feature_names = ['season', 'player_id', 'minutes', 'points', 'fg_attempt', 'fg_made',
                   'ft_attempt', 'ft_made', 'point_3_attempt', 'point_3_made',
                   'offensive_rebounds', 'defensive_rebounds', 'assists', 'blocks',
                   'turnovers', 'game_time', 'game_started', 'game_code', 'team_id',
                   'at_home', 'position_id', 'draft_year', 'minutes_l10', 'points_l10',
                   'fg_attempt_l10', 'fg_made_l10', 'ft_attempt_l10', 'ft_made_l10',
                   'point_3_attempt_l10', 'point_3_made_l10', 'offensive_rebounds_l10',
                   'defensive_rebounds_l10', 'assists_l10', 'blocks_l10', 'turnovers_l10',
                   'opp_id', 'team_elo', 'team_o_rebounds_l5', 'team_d_rebounds_l5',
                   'team_o_rebounds_conceded_l5', 'team_d_rebounds_conceded_l5', 'opp_elo',
                   'opp_o_rebounds_l5', 'opp_d_rebounds_l5', 'opp_o_rebounds_conceded_l5',
                   'opp_d_rebounds_conceded_l5']

In [577]:
def get_current_team_features(data, game_time):
    x = data.loc[data.game_time >= game_time]
    
    x1 = x[['team_id', 'game_time', 'team_o_rebounds_l5', 'team_d_rebounds_l5',
           'team_o_rebounds_conceded_l5', 'team_d_rebounds_conceded_l5']]
    x2 = x[['opp_id', 'game_time', 'opp_o_rebounds_l5', 'opp_d_rebounds_l5', 
            'opp_o_rebounds_conceded_l5', 'opp_d_rebounds_conceded_l5']]

    x1.columns = x2.columns = ['team_id', 'game_time',
                                  'team_o_rebounds_l5', 'team_d_rebounds_l5',
                                  'team_o_rebounds_conceded_l5', 'team_d_rebounds_conceded_l5']

    # x contains all the up-to-date team features
    x = pd.concat([x1, x2], axis=0)
    
    g = x.groupby(['team_id'])
    team_features = g.apply(lambda x: x.sort_values(by='game_time', ascending=True).head(1))
    team_features.reset_index(drop=True, inplace=True)
    
    team_features.drop(columns=('game_time'), inplace=True)
    
    return team_features

In [717]:
def add_team_player_features(features, one_team, game_code):
    player_features = one_team.loc[one_team.game_code==game_code, player_feature_names]
    
    # For each player,
    # predict probability for each game of the season
    # sample from each game once and sum up to get one seasonal result
    # after n such sample turns, we can get the empirical distribution of NBA player stats
    features = features.merge(player_features, on='team_id',how='left')
    
    features = features[feature_names]
    
    features = features.astype({'position_id':'float64', 'draft_year':'float64'}, errors='raise')
    
    return features

In [510]:
def player_box_plot(props):
    
    props['game_num'] = props.groupby('player_id').cumcount()
    y = props.pivot(columns='game_num',index='player_id')

    y.columns = y.columns.get_level_values(1)

    y['mean'] = y.mean(axis=1)
    y = y.sort_values(by='mean', ascending=False)
    y = y.loc[:, ~y.isnull().any(axis=0)]
    y.drop(columns='mean', inplace=True)

    plt.figure(figsize=(12,8))
    plt.boxplot(y.T, labels=y.index)
    plt.xlabel('Players')
    plt.ylabel('Points per Game')
    plt.show()

In [722]:
def update_player_plot(team_name, prop_name, game_num, n_runs):
    
    n_runs = int(n_runs)
    
    print(team_name, prop_name, game_num, n_runs)
    
    if prop_name != 'points':
        print(prop_name, "is not implemented yet!")
        return
    
    game_num = game_num - 1
    
    data = pd.read_csv('./notebooks/player_2018.csv')
    data.sort_values(by=['season','team_id','game_time'], inplace=True)
    
    team_id = team_dict[team_name]
    one_team = data.loc[data.team_id == team_id].reset_index(drop=True)
    
    game_codes = one_team.game_code.unique()
    game_code = game_codes[game_num]
    
    # create season schedule with elo ratings
    schedule = get_season_schedule(one_team, game_num)
    
    
    # team strength extraction
    # need a snapshot of all individual team stats up-to-date, the last row of team parts
    game_time = schedule.game_time.iloc[0]
    team_features = get_current_team_features(data, game_time)
    
    # merge team_features into elo_ratings
    features = add_team_features(schedule, team_features)
    
    # add players features into one game
    features = add_team_player_features(features, one_team, game_code)
    
    features_player = transform_pipeline_player.transform(features)
    
    pred = model_logistic.predict_proba(features_player)
    pred = np.dot(pred, np.array(range(46)) )
    
    pred = pd.DataFrame({'player_id':features.player_id, 'pred':pred})
    
    player_box_plot(pred)
    

    return

# (1) NBA Player Seasonal Props Simulation 

In [723]:
# select team_id

team_picker = widgets.Dropdown(
    options=team_dict.keys(),
    description='Team'
)
prop_picker = widgets.Dropdown(
    options=['points','assists'],
    description='Props'
)
game_picker = widgets.BoundedIntText(
    value=1,
    min=1,
    max=82,
    step=1,
    description='Game #:',
    #disabled=False
)
n_runs_picker = widgets.FloatLogSlider(
    value=1000,
    base=10,
    min=2,
    max=5,
    step=1,
    description='# runs'
)

_ = interact_manual(update_player_plot, team_name=team_picker, prop_name=prop_picker, 
                    game_num=game_picker, n_runs=n_runs_picker)

interactive(children=(Dropdown(description='Team', options=('ATL', 'BOS', 'BRK', 'CHI', 'CHO', 'CLE', 'DAL', '…

# (2) NBA Player Trade Simulation

In [724]:
team_picker = widgets.Dropdown(
    options=team_dict.keys(),
    description='Team'
)
player_picker = widgets.BoundedIntText(
    value = 395388, # James Harden
    min = 0,
    max = 900000,
    step = 1,
    description='player id'
)
prop_picker = widgets.Dropdown(
    options=['points','assists'],
    description='Props'
)
game_picker = widgets.BoundedIntText(
    value=1,
    min=1,
    max=82,
    step=1,
    description='Game #',
    #disabled=False
)
n_runs_picker = widgets.FloatLogSlider(
    value=1000,
    base=10,
    min=2,
    max=5,
    step=1,
    description='# runs'
)

# plot one team first, add other comparison later
_ = interact_manual(update_player_trade_plot, team_name=team_picker, player_id=player_picker, prop_name=prop_picker, 
                    game_num=game_picker, n_runs=n_runs_picker)

interactive(children=(Dropdown(description='Team', options=('ATL', 'BOS', 'BRK', 'CHI', 'CHO', 'CLE', 'DAL', '…

In [553]:
def get_season_schedule(data, game_num):
    schedule = data[['season','game_code','game_time','team_id','opp_id','at_home','team_elo','opp_elo']].\
                        groupby(['game_code']).first().reset_index()
    schedule.sort_values(by=['season','game_time'], inplace=True, ignore_index=True)
    
    # remaining season schedule
    return schedule[game_num:]

In [596]:
def add_team_features(schedule, team_features):
    t = team_features.copy()
    features = schedule.merge(t, on='team_id', how='left')

    t.columns = ['opp_id', 'opp_o_rebounds_l5', 'opp_d_rebounds_l5', 
                            'opp_o_rebounds_conceded_l5', 'opp_d_rebounds_conceded_l5']

    features = features.merge(t, on=['opp_id'], how='left')

    return features

In [702]:
def add_one_player_features(features, one_team, team_id, player_id, game_code):
    player_features = one_team.loc[(one_team.game_code==game_code) & (one_team.player_id==player_id), 
                                   player_feature_names]
    
    player_features.team_id = team_id
    
    # For each player,
    # predict probability for each game of the season
    # sample from each game once and sum up to get one seasonal result
    # after n such sample turns, we can get the empirical distribution of NBA player stats
    features = features.merge(player_features, on='team_id',how='left')
    
    # re-order features sequence
    features = features[feature_names]
    
    features = features.astype({'position_id':'float64', 'draft_year':'float64'}, errors='raise')
    
    return features

In [710]:
def player_team_comparison_plot(pred_0, pred_1, team_0_id, team_1_id):
    team_0_name = team_id_dict[team_0_id]
    team_1_name = team_id_dict[team_1_id]
    
    plt.figure(figsize=(12,8))
    plt.plot(pred_0, linestyle='-', marker='o', label=team_0_name+", avg="+str(round(pred_0.mean(),1)) )
    plt.plot(pred_1, linestyle='-', marker='o', label=team_1_name+", avg="+str(round(pred_1.mean(),1)) )
    plt.xlabel('games')
    plt.ylabel('point')
    plt.title('James Harden 2018 season simulation')
    plt.legend()
    plt.show()
    
    return

In [711]:
def update_player_trade_plot(team_name, player_id, prop_name, game_num, n_runs):
    n_runs = int(n_runs)
    #print(team_name, player_id, prop_name, game_num, n_runs)
    
    # load data
    if prop_name != 'points':
        print(prop_name, "is not implemented yet!")
        return
    
    game_num = game_num - 1
    
    data = pd.read_csv('./notebooks/player_2018.csv')
    data.sort_values(by=['season', 'team_id', 'game_time'], inplace=True)
    
    # player's original team id
    team_0_id = data.loc[data.player_id==395388, 'team_id'].values[0]
    team_0 = data.loc[data.team_id == team_0_id].reset_index(drop=True)
    
    team_1_id = team_dict[team_name]
    team_1 = data.loc[data.team_id == team_1_id].reset_index(drop=True)
    
    game_codes = team_0.game_code.unique()
    game_code = game_codes[game_num]
    
    # create season schedule, get elo ratings
    schedule_0 = get_season_schedule(team_0, game_num)
    schedule_1 = get_season_schedule(team_1, game_num)
    
    # get team features
    game_time = schedule_0.game_time.iloc[0]
    team_features = get_current_team_features(data, game_time)
    
    # merge team_features into elo_ratings
    features_0 = add_team_features(schedule_0, team_features)
    features_1 = add_team_features(schedule_1, team_features)
    
    # get player features
    features_0 = add_one_player_features(features_0, team_0, team_0_id, player_id, game_code)
    features_1 = add_one_player_features(features_1, team_0, team_1_id, player_id, game_code)
    
    # merge all features together (now the player's team_id is switched) 
    # 2 feature set: one for team to be traded, one for the player's current team!
    features_0_player = transform_pipeline_player.transform(features_0)
    features_1_player = transform_pipeline_player.transform(features_1)
    
    pred_0 = model_logistic.predict_proba(features_0_player)
    pred_0 = np.dot(pred_0, np.array(range(46)) )
    
    pred_1 = model_logistic.predict_proba(features_1_player)
    pred_1 = np.dot(pred_1, np.array(range(46)) )
    
    # generate graph
    player_team_comparison_plot(pred_0, pred_1, team_0_id, team_1_id)

    return