In [1]:
# https://github.com/jaebradley/basketball_reference_web_scraper
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType

In [2]:
# Load player box scores
# NOTES
# - MISSING 
# - 'game_score' is NOT POINTS SCORED by player
# - Verified data correct per 
#   https://www.basketball-reference.com/boxscores/201701010LAL.html
player_box = client.player_box_scores(day=1, month=1, year=2017)

In [3]:
player_box[0]

{'slug': 'lowryky01',
 'name': 'Kyle Lowry',
 'team': <Team.TORONTO_RAPTORS: 'TORONTO RAPTORS'>,
 'location': <Location.AWAY: 'AWAY'>,
 'opponent': <Team.LOS_ANGELES_LAKERS: 'LOS ANGELES LAKERS'>,
 'outcome': <Outcome.WIN: 'WIN'>,
 'seconds_played': 2342,
 'made_field_goals': 12,
 'attempted_field_goals': 16,
 'made_three_point_field_goals': 6,
 'attempted_three_point_field_goals': 7,
 'made_free_throws': 11,
 'attempted_free_throws': 11,
 'offensive_rebounds': 1,
 'defensive_rebounds': 8,
 'assists': 7,
 'steals': 0,
 'blocks': 0,
 'turnovers': 4,
 'personal_fouls': 3,
 'game_score': 37.4}

In [4]:
# Load team box scores
# NOTES
# - MISSING: total 'game_score', 'location', 'opponent'
# - Verified data correct per 
#   https://www.basketball-reference.com/boxscores/201701010LAL.html
team_box = client.team_box_scores(day=1, month=1, year=2017)

In [5]:
team_box[4]

{'team': <Team.TORONTO_RAPTORS: 'TORONTO RAPTORS'>,
 'minutes_played': 240,
 'made_field_goals': 42,
 'attempted_field_goals': 75,
 'made_three_point_field_goals': 10,
 'attempted_three_point_field_goals': 17,
 'made_free_throws': 29,
 'attempted_free_throws': 32,
 'offensive_rebounds': 5,
 'defensive_rebounds': 35,
 'assists': 18,
 'steals': 5,
 'blocks': 9,
 'turnovers': 16,
 'personal_fouls': 25}

In [6]:
# Load season schedule (teams, location, and score for all games)
# NOTES
# - DATE FORMAT IS OFF BY 5 HOURS compared with CORRECT TIMES ABOVE
# --- Uses UTC = Greenwich Mean Time
# --- 'start_time': datetime.datetime(2017, 1, 1, 23, 0, tzinfo=<UTC>)
#     means 6:00pm EST  (year, month, day, hour, minute)
# - Verified data otherwise correct per
#   https://www.basketball-reference.com/boxscores/201701010LAL.html
#   https://www.basketball-reference.com/leagues/NBA_2017_games-january.html
season_schedule = client.season_schedule(season_end_year=2017)

In [7]:
season_schedule[506:511]

[{'start_time': datetime.datetime(2017, 1, 1, 23, 0, tzinfo=<UTC>),
  'away_team': <Team.SAN_ANTONIO_SPURS: 'SAN ANTONIO SPURS'>,
  'home_team': <Team.ATLANTA_HAWKS: 'ATLANTA HAWKS'>,
  'away_team_score': 112,
  'home_team_score': 114},
 {'start_time': datetime.datetime(2017, 1, 1, 23, 0, tzinfo=<UTC>),
  'away_team': <Team.ORLANDO_MAGIC: 'ORLANDO MAGIC'>,
  'home_team': <Team.INDIANA_PACERS: 'INDIANA PACERS'>,
  'away_team_score': 104,
  'home_team_score': 117},
 {'start_time': datetime.datetime(2017, 1, 1, 23, 0, tzinfo=<UTC>),
  'away_team': <Team.DETROIT_PISTONS: 'DETROIT PISTONS'>,
  'home_team': <Team.MIAMI_HEAT: 'MIAMI HEAT'>,
  'away_team_score': 107,
  'home_team_score': 98},
 {'start_time': datetime.datetime(2017, 1, 2, 0, 0, tzinfo=<UTC>),
  'away_team': <Team.PORTLAND_TRAIL_BLAZERS: 'PORTLAND TRAIL BLAZERS'>,
  'home_team': <Team.MINNESOTA_TIMBERWOLVES: 'MINNESOTA TIMBERWOLVES'>,
  'away_team_score': 95,
  'home_team_score': 89},
 {'start_time': datetime.datetime(2017, 1, 2

In [10]:
# Load player season totals
season_player_totals = client.players_season_totals(season_end_year=2017)

In [23]:
season_player_totals[304]

{'slug': 'lowryky01',
 'name': 'Kyle Lowry',
 'positions': [<Position.POINT_GUARD: 'POINT GUARD'>],
 'age': 30,
 'team': <Team.TORONTO_RAPTORS: 'TORONTO RAPTORS'>,
 'games_played': 60,
 'games_started': 60,
 'minutes_played': 2244,
 'made_field_goals': 426,
 'attempted_field_goals': 918,
 'made_three_point_field_goals': 193,
 'attempted_three_point_field_goals': 468,
 'made_free_throws': 299,
 'attempted_free_throws': 365,
 'offensive_rebounds': 48,
 'defensive_rebounds': 238,
 'assists': 417,
 'steals': 88,
 'blocks': 19,
 'turnovers': 173,
 'personal_fouls': 170}

In [36]:
# Load advanced statistics
season_player_adv = client.players_advanced_season_totals(season_end_year=2017)

In [37]:
season_player_adv[304]

{'slug': 'lowryky01',
 'name': 'Kyle Lowry',
 'positions': [<Position.POINT_GUARD: 'POINT GUARD'>],
 'age': 30,
 'team': <Team.TORONTO_RAPTORS: 'TORONTO RAPTORS'>,
 'games_played': 60,
 'minutes_played': 2244,
 'player_efficiency_rating': 22.9,
 'true_shooting_percentage': 0.623,
 'three_point_attempt_rate': 0.51,
 'free_throw_attempt_rate': 0.398,
 'offensive_rebound_percentage': 2.4,
 'defensive_rebound_percentage': 12.0,
 'total_rebound_percentage': 7.2,
 'assist_percentage': 29.9,
 'steal_percentage': 2.0,
 'block_percentage': 0.7,
 'turnover_percentage': 13.8,
 'usage_percentage': 24.9,
 'offensive_win_shares': 7.8,
 'defensive_win_shares': 2.3,
 'win_shares': 10.1,
 'win_shares_per_48_minutes': 0.216,
 'offensive_box_plus_minus': 7.1,
 'defensive_box_plus_minus': -0.5,
 'box_plus_minus': 6.6,
 'value_over_replacement_player': 4.9}

In [5]:
# KB
70*6*30*20

252000

In [4]:
client.player_box_scores(day=1, month=1, year=2017, 
                         output_type=OutputType.CSV, 
                         output_file_path="./1_1_2017_player_box_scores.csv")


In [7]:
client.team_box_scores(day=1, month=1, year=2017, 
                       output_type=OutputType.CSV, 
                       output_file_path="./1_1_2017_team_box_scores.csv")


In [8]:
client.players_season_totals(season_end_year=2017, 
                             output_type=OutputType.CSV, 
                             output_file_path="./2017_players_season_totals.csv")


In [12]:
# Scrapes raw data for player box scores

import os
import csv
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType

month_range = [10, 11, 12, 1, 2, 3, 4, 5, 6]
year_range = range(1999,2020)

for season_start_year in year_range:
    season_str = str(season_start_year) + "_" + str(season_start_year + 1)
    if not os.path.exists("data_raw/player_box_scores/" \
                          + season_str):
        os.mkdir("data_raw/player_box_scores/" + season_str)

    year = season_start_year

    for month in month_range:
        for day in range(1,32):
            if month == 1 and day == 1:
                year = year + 1

            if day < 10:
                day_str = "0" + str(day)
            else:
                day_str = str(day)
            if month < 10:
                month_str = "0" + str(month)
            else:
                month_str = str(month)
            output_file_path = "./data_raw/player_box_scores/" + season_str \
                + "/" + str(year) + "_" + month_str + "_" + day_str \
                + "_" + "player_box_scores.csv"

            if not os.path.exists(output_file_path):
                pbs = client.player_box_scores(day=day, month=month, year=year, 
                                               output_type=OutputType.CSV, 
                                               output_file_path=output_file_path)
                print()
                with open(output_file_path) as csv_file:
                    csv_reader = csv.reader(csv_file, delimiter=',')
                    line_count = 0
                    for row in csv_reader:
                        line_count += 1
                if line_count < 2:
                    os.remove(output_file_path)
                    print(str(year) + "_" + month_str + "_" + day_str + ": No games played")
                else:
                    print(str(year) + "_" + month_str + "_" + day_str + ": Game data saved")
            else:
                print(str(year) + "_" + month_str + "_" + day_str + ": Game data already exists")





2016_10_01: No games played

2016_10_02: No games played

2016_10_03: No games played

2016_10_04: No games played

2016_10_05: No games played

2016_10_06: No games played

2016_10_07: No games played

2016_10_08: No games played

2016_10_09: No games played

2016_10_10: No games played

2016_10_11: No games played

2016_10_12: No games played

2016_10_13: No games played

2016_10_14: No games played

2016_10_15: No games played

2016_10_16: No games played

2016_10_17: No games played

2016_10_18: No games played

2016_10_19: No games played

2016_10_20: No games played

2016_10_21: No games played

2016_10_22: No games played

2016_10_23: No games played

2016_10_24: No games played
2016_10_25: Game data already exists

2016_10_26: Game data saved

2016_10_27: Game data saved

2016_10_28: Game data saved

2016_10_29: Game data saved

2016_10_30: Game data saved

2016_10_31: Game data saved
2016_11_01: Game data already exists

2016_11_02: Game data saved

2016_11_03: Game data save