In [1]:
import datetime as dt
import os
from pathlib import Path
import sys

import basketball_reference_web_scraper
from basketball_reference_web_scraper import client
import git
import pandas as pd

sys.path.append('../')

import src.util as ut

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)


GIT_ROOT_DIR = ut.get_git_root(os.getcwd())
DATA_DIR = os.path.join(GIT_ROOT_DIR, 'data')

# Constants to translate raw NBA stats to DraftKings Points.
# Pulling them out here in case they change it's easy to swap out
POINTS_MULTIPLE = 1
THREE_POINT_MULTIPLE = 0.5
REBOUND_MULTIPLE = 1.25
ASSIST_MULTIPLE = 1.5
STEAL_MULTIPLE = 2
BLOCK_MULTIPLE = 2
TURNOVER_MULTIPLE = -0.5
DOUBLE_DOUBLE_MULTIPLE = 1.5
TRIPLE_DOUBLE_MULTIPLE = 3

# THIS IS WHAT YOU NEED TO CHANGE
GAME_YEAR = 2020
GAME_MONTH = 8
GAME_DAY = 7



In [2]:
def get_player_box_scores_day(game_year, game_month, game_day):
    """
    Get all player box scores for a given day. Scraped from Basketball Reference
    
    Keyword Args:
      - game_year: integer, year of game
      - game_month: integer, month of game
      - game_day: integer, day of game
      
    We are using a package: https://jaebradley.github.io/basketball_reference_web_scraper/api/
    """
    player_box_scores = client.player_box_scores(day=GAME_DAY, month=GAME_MONTH, year=GAME_YEAR)
    
    if len(player_box_scores) == 0:
        print('There were no games played on date: {}-{}-{}'.format(GAME_YEAR, str(GAME_MONTH).rjust(2, '0'), str(GAME_DAY).rjust(2, '0')))
        return None
    
    player_box_scores_df = pd.DataFrame(player_box_scores)
    player_box_scores_df['date_played'] = dt.date(GAME_YEAR, GAME_MONTH, GAME_DAY)
    player_box_scores_df['total_rebounds'] = player_box_scores_df['offensive_rebounds'] + player_box_scores_df['defensive_rebounds']
    player_box_scores_df['total_points_scored'] = (player_box_scores_df['made_field_goals'] - player_box_scores_df['made_three_point_field_goals']) * 2 \
        + player_box_scores_df['made_three_point_field_goals'] * 3 \
        + player_box_scores_df['made_free_throws']
    player_box_scores_df['total_double_digit_stats'] = (player_box_scores_df['total_points_scored'] >= 10).astype(int) \
        + (player_box_scores_df['total_rebounds'] >= 10).astype(int) \
        + (player_box_scores_df['assists'] >= 10).astype(int) \
        + (player_box_scores_df['steals'] >= 10).astype(int) \
        + (player_box_scores_df['blocks'] >= 10).astype(int)
    player_box_scores_df['double_double_flag'] = (player_box_scores_df['total_double_digit_stats'] == 2).astype(int)
    player_box_scores_df['triple_double_flag'] = (player_box_scores_df['total_double_digit_stats'] >= 3).astype(int)
    
    player_box_scores_df['draftkings_points'] = \
        POINTS_MULTIPLE * player_box_scores_df['total_points_scored'] \
        + THREE_POINT_MULTIPLE * player_box_scores_df['made_three_point_field_goals'] \
        + REBOUND_MULTIPLE * player_box_scores_df['total_rebounds'] \
        + ASSIST_MULTIPLE * player_box_scores_df['assists'] \
        + STEAL_MULTIPLE * player_box_scores_df['steals'] \
        + BLOCK_MULTIPLE * player_box_scores_df['blocks'] \
        + TURNOVER_MULTIPLE * player_box_scores_df['turnovers'] \
        + DOUBLE_DOUBLE_MULTIPLE * player_box_scores_df['double_double_flag'] \
        + TRIPLE_DOUBLE_MULTIPLE * player_box_scores_df['triple_double_flag']
    
    player_box_scores_df = player_box_scores_df[[
        'slug',
        'name',
        'team',
        'location',
        'opponent',
        'date_played',
        'outcome',
        'seconds_played',
        'made_field_goals',
        'attempted_field_goals',
        'made_three_point_field_goals',
        'attempted_three_point_field_goals',
        'made_free_throws',
        'attempted_free_throws',
        'total_points_scored',
        'total_rebounds',
        'assists',
        'steals',
        'blocks',
        'turnovers',
        'double_double_flag',
        'triple_double_flag',
        'draftkings_points',    
    ]]
    
    player_box_scores_df.sort_values('draftkings_points', ascending=False, inplace=True)
    
    filename = os.path.join(
        DATA_DIR, 'raw', 'nba_box_score_stats', 'nba_box_score_stats_{}{}{}.csv'.format(GAME_YEAR, str(GAME_MONTH).rjust(2, '0'), str(GAME_DAY).rjust(2, '0')))
    player_box_scores_df.to_csv(filename, index=False, encoding='utf-8')
    
    return player_box_scores_df
    

In [3]:
player_box_scores_df = get_player_box_scores_day(game_year=GAME_YEAR, game_month=GAME_MONTH, game_day=GAME_DAY)

print(player_box_scores_df.head())

         slug           name                     team       location  \
13  smithis01      Ish Smith  Team.WASHINGTON_WIZARDS  Location.AWAY   
15  harrito02  Tobias Harris  Team.PHILADELPHIA_76ERS  Location.HOME   
7   browntr01     Troy Brown  Team.WASHINGTON_WIZARDS  Location.AWAY   
3   allenja01  Jarrett Allen       Team.BROOKLYN_NETS  Location.HOME   
17  leverca01   Caris LeVert       Team.BROOKLYN_NETS  Location.HOME   

                     opponent date_played       outcome  seconds_played  \
13  Team.NEW_ORLEANS_PELICANS  2020-08-07  Outcome.LOSS            1909   
15         Team.ORLANDO_MAGIC  2020-08-07   Outcome.WIN            2383   
7   Team.NEW_ORLEANS_PELICANS  2020-08-07  Outcome.LOSS            2310   
3       Team.SACRAMENTO_KINGS  2020-08-07   Outcome.WIN            2188   
17      Team.SACRAMENTO_KINGS  2020-08-07   Outcome.WIN            2166   

    made_field_goals  attempted_field_goals  made_three_point_field_goals  \
13                 7                   