Skip to content

Commit

Permalink
Release v4.4 (#93)
Browse files Browse the repository at this point in the history
* Adds play-by-play stats scraping for pages like https://www.basketball-reference.com/boxscores/pbp/201810160BOS.html (#86)
* Rewrites internal writing interface (#89)
* Updates `README` to reflect new `play_by_play` client method and adds `Contributors` section (#94)
  • Loading branch information
jaebradley committed Aug 13, 2019
2 parents 947eb40 + f41e796 commit b3627f7
Show file tree
Hide file tree
Showing 24 changed files with 1,062 additions and 301 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,26 @@ client.players_advanced_season_totals(season_end_year=2018)
# The players_advanced_season_totals method also supports all output behavior previously described
```

### Get play-by-play data for a game

The structure of the API is due to the unique URL pattern that Basketball Reference has for getting play-by-play data,
which depends on the date of the game and the home team.

Example: `https://www.basketball-reference.com/boxscores/pbp/201810160BOS.html`

```python
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import Team

# Get play-by-play data for 2018-10-16 game played at the Boston Celtics
play_by_play = client.play_by_play(
home_team=Team.BOSTON_CELTICS,
year=2018,
month=10,
day=16,
)
```

## Development

There are currently two supported major versions - `V3` and `V4`.
Expand All @@ -142,3 +162,7 @@ when making changes.

`master` will reflect the latest major version branch.

## Contributors

Thanks to [@DaiJunyan](https://github.com/DaiJunyan) for their contributions!

73 changes: 55 additions & 18 deletions basketball_reference_web_scraper/client.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import requests

from basketball_reference_web_scraper import http_client

from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate
from basketball_reference_web_scraper.output import box_scores_to_csv, schedule_to_csv, players_season_totals_to_csv, players_advanced_season_totals_to_csv, team_box_scores_to_csv
from basketball_reference_web_scraper.output import output
from basketball_reference_web_scraper.json_encoders import BasketballReferenceJSONEncoder
from basketball_reference_web_scraper.writers import CSVWriter, RowFormatter, \
BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, PLAYER_SEASON_TOTALS_COLUMN_NAMES, \
PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES


def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.player_box_scores(day=day, month=month, year=year)
except requests.exceptions.HTTPError as http_error:
Expand All @@ -21,13 +22,16 @@ def player_box_scores(day, month, year, output_type=None, output_file_path=None,
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=box_scores_to_csv,
encoder=BasketballReferenceJSONEncoder,
csv_writer=CSVWriter(
column_names=BOX_SCORE_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=BOX_SCORE_COLUMN_NAMES)
),
json_options=json_options,
)


def season_schedule(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
def season_schedule(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.season_schedule(season_end_year)
except requests.exceptions.HTTPError as http_error:
Expand All @@ -41,13 +45,16 @@ def season_schedule(season_end_year, output_type=None, output_file_path=None, ou
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=schedule_to_csv,
encoder=BasketballReferenceJSONEncoder,
csv_writer=CSVWriter(
column_names=SCHEDULE_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=SCHEDULE_COLUMN_NAMES)
),
json_options=json_options,
)


def players_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
def players_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.players_season_totals(season_end_year)
except requests.exceptions.HTTPError as http_error:
Expand All @@ -60,13 +67,16 @@ def players_season_totals(season_end_year, output_type=None, output_file_path=No
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=players_season_totals_to_csv,
encoder=BasketballReferenceJSONEncoder,
csv_writer=CSVWriter(
column_names=PLAYER_SEASON_TOTALS_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=PLAYER_SEASON_TOTALS_COLUMN_NAMES)
),
json_options=json_options,
)


def players_advanced_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
def players_advanced_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.players_advanced_season_totals(season_end_year)
except requests.exceptions.HTTPError as http_error:
Expand All @@ -79,13 +89,16 @@ def players_advanced_season_totals(season_end_year, output_type=None, output_fil
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=players_advanced_season_totals_to_csv,
encoder=BasketballReferenceJSONEncoder,
csv_writer=CSVWriter(
column_names=PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES)
),
json_options=json_options,
)


def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.team_box_scores(day=day, month=month, year=year)
except requests.exceptions.HTTPError as http_error:
Expand All @@ -98,7 +111,31 @@ def team_box_scores(day, month, year, output_type=None, output_file_path=None, o
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=team_box_scores_to_csv,
encoder=BasketballReferenceJSONEncoder,
csv_writer=CSVWriter(
column_names=TEAM_BOX_SCORES_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=TEAM_BOX_SCORES_COLUMN_NAMES)
),
json_options=json_options,
)


def play_by_play(home_team, day, month, year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
values = http_client.play_by_play(home_team=home_team, day=day, month=month, year=year)
except requests.exceptions.HTTPError as http_error:
if http_error.response.status_code == requests.codes.not_found:
raise InvalidDate(day=day, month=month, year=year)
else:
raise http_error
return output(
values=values,
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=CSVWriter(
column_names=PLAY_BY_PLAY_COLUMN_NAMES,
row_formatter=RowFormatter(data_field_names=PLAY_BY_PLAY_COLUMN_NAMES)
),
json_options=json_options,
)
8 changes: 8 additions & 0 deletions basketball_reference_web_scraper/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ class Position(Enum):
GUARD = "GUARD"


class PeriodType(Enum):
QUARTER = "QUARTER"
OVERTIME = "OVERTIME"


TEAM_ABBREVIATIONS_TO_TEAM = {
'ATL': Team.ATLANTA_HAWKS,
'BOS': Team.BOSTON_CELTICS,
Expand Down Expand Up @@ -116,6 +121,9 @@ class Position(Enum):
'VAN': Team.VANCOUVER_GRIZZLIES,
}

TEAM_TO_TEAM_ABBREVIATION = {v: k for k, v in TEAM_ABBREVIATIONS_TO_TEAM.items()}
TEAM_TO_TEAM_ABBREVIATION[Team.CHARLOTTE_HORNETS] = "CHO"

TEAM_NAME_TO_TEAM = {
"ATLANTA HAWKS": Team.ATLANTA_HAWKS,
"BOSTON CELTICS": Team.BOSTON_CELTICS,
Expand Down
22 changes: 19 additions & 3 deletions basketball_reference_web_scraper/http_client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import requests

from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION
from basketball_reference_web_scraper.errors import InvalidDate
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths
from basketball_reference_web_scraper.parsers.play_by_play import parse_play_by_plays
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
from basketball_reference_web_scraper.parsers.box_scores.teams import parse_team_totals
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals
from basketball_reference_web_scraper.parsers.players_advanced_season_totals import parse_players_advanced_season_totals
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths

BASE_URL = 'https://www.basketball-reference.com'

Expand Down Expand Up @@ -108,3 +110,17 @@ def team_box_scores(day, month, year):
for game_url_path in game_url_paths
for box_score in team_box_score(game_url_path=game_url_path)
]


def play_by_play(home_team, day, month, year):

add_0_if_needed = lambda s: "0" + s if len(s) == 1 else s

# the hard-coded `0` in the url assumes we always take the first match of the given date and team.
url = "{BASE_URL}/boxscores/pbp/{year}{month}{day}0{team_abbr}.html".format(
BASE_URL=BASE_URL, year=year, month=add_0_if_needed(str(month)), day=add_0_if_needed(str(day)),
team_abbr=TEAM_TO_TEAM_ABBREVIATION[home_team]
)
response = requests.get(url=url)
response.raise_for_status()
return parse_play_by_plays(response.content, home_team)
2 changes: 1 addition & 1 deletion basketball_reference_web_scraper/json_encoders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime
from json import JSONEncoder
from enum import Enum
from json import JSONEncoder


class BasketballReferenceJSONEncoder(JSONEncoder):
Expand Down
Loading

0 comments on commit b3627f7

Please sign in to comment.