Skip to content

Commit

Permalink
Implement Team Box Scores method (#71)
Browse files Browse the repository at this point in the history
### Discussion

As discussed in #70, there could be a need for box score data, aggregated by team.

This PR introduces a `team_box_scores` API method.

This API method returns a list of dictionaries that look something like


```python
[
    {
      "team": Team.BOSTON_CELTICS,
      "minutes_played: 265,
      "attempted_field_goals": 70,
      # And more fields
    },
    {
      # Another Team
    }
]
```

The `team_box_scores` method is called in the same way the `player_box_scores` is invoked - with a `day`, `month`, and `year` parameters (as well as optional `output_type`, `output_file_path`, `output_write_option`, `json_options` parameters).

### Implementation

* Makes requests to get HTML for `boxscores` page for a given date (like https://www.basketball-reference.com/boxscores/?month=01&day=01&year=2017)
* Parses game urls from page (like https://www.basketball-reference.com/boxscores/201701010ATL.html)
* Makes requests to get HTML for each of these game pages
* Parses HTML for these pages to get "Team Totals" for each team

![image](https://user-images.githubusercontent.com/8136030/50730310-22ea5c00-10ff-11e9-8226-e293f7e821eb.png)
  • Loading branch information
jaebradley committed Jan 6, 2019
1 parent bc3a4d1 commit 4f366af
Show file tree
Hide file tree
Showing 17 changed files with 3,279 additions and 26 deletions.
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ from basketball_reference_web_scraper.data import Team

## API

This client has three methods
This client has four methods
* Getting player box scores by a date (`client.player_box_scores`)
* Getting team box scores by a date (`client.team_box_scores`)
* Getting the schedule for a season (`client.season_schedule`)
* Getting players totals for a season (`client.players_season_totals`)

You can see all three methods used in [this `repl`]()https://repl.it/@jaebradley/v300api-examples).
You can see all four methods used in [this `repl`]()https://repl.it/@jaebradley/v300api-examples).

### Data output

Expand Down Expand Up @@ -87,6 +88,17 @@ client.player_box_scores(day=1, month=1, year=2017, output_type=OutputType.JSON,
client.player_box_scores(day=1, month=1, year=2017, output_type=OutputType.CSV, output_file_path="./1_1_2017_box_scores.csv")
```

### Get team box scores by date

```python
from basketball_reference_web_scraper import client

# Get all team totals for January 1st, 2018
client.team_box_scores(day=1, month=1, year=2018)

# The team_box_scores method also supports all output behavior previously described
```

### Get season schedule

```python
Expand Down
21 changes: 20 additions & 1 deletion basketball_reference_web_scraper/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from basketball_reference_web_scraper import http_client

from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate
from basketball_reference_web_scraper.output import box_scores_to_csv, schedule_to_csv, players_season_totals_to_csv
from basketball_reference_web_scraper.output import box_scores_to_csv, schedule_to_csv, players_season_totals_to_csv, \
team_box_scores_to_csv
from basketball_reference_web_scraper.output import output
from basketball_reference_web_scraper.json_encoders import BasketballReferenceJSONEncoder

Expand Down Expand Up @@ -65,3 +66,21 @@ def players_season_totals(season_end_year, output_type=None, output_file_path=No
json_options=json_options,
)


def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
try:
values = http_client.team_box_scores(day=day, month=month, year=year)
except requests.exceptions.HTTPError as http_error:
if http_error.response.status_code == requests.codes.not_found:
raise InvalidDate(day=day, month=month, year=year)
else:
raise http_error
return output(
values=values,
output_type=output_type,
output_file_path=output_file_path,
output_write_option=output_write_option,
csv_writer=team_box_scores_to_csv,
encoder=BasketballReferenceJSONEncoder,
json_options=json_options,
)
41 changes: 41 additions & 0 deletions basketball_reference_web_scraper/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,47 @@ class Position(Enum):
'VAN': Team.VANCOUVER_GRIZZLIES,
}

TEAM_NAME_TO_TEAM = {
"ATLANTA HAWKS": Team.ATLANTA_HAWKS,
"BOSTON CELTICS": Team.BOSTON_CELTICS,
"BROOKLYN NETS": Team.BROOKLYN_NETS,
"CHARLOTTE HORNETS": Team.CHARLOTTE_HORNETS,
"CHICAGO BULLS": Team.CHICAGO_BULLS,
"CLEVELAND CAVALIERS": Team.CLEVELAND_CAVALIERS,
"DALLAS MAVERICKS": Team.DALLAS_MAVERICKS,
"DENVER NUGGETS": Team.DENVER_NUGGETS,
"DETROIT PISTONS": Team.DETROIT_PISTONS,
"GOLDEN STATE WARRIORS": Team.GOLDEN_STATE_WARRIORS,
"HOUSTON ROCKETS": Team.HOUSTON_ROCKETS,
"INDIANA PACERS": Team.INDIANA_PACERS,
"LOS ANGELES CLIPPERS": Team.LOS_ANGELES_CLIPPERS,
"LOS ANGELES LAKERS": Team.LOS_ANGELES_LAKERS,
"MEMPHIS GRIZZLIES": Team.MEMPHIS_GRIZZLIES,
"MIAMI HEAT": Team.MIAMI_HEAT,
"MILWAUKEE BUCKS": Team.MILWAUKEE_BUCKS,
"MINNESOTA TIMBERWOLVES": Team.MINNESOTA_TIMBERWOLVES,
"NEW ORLEANS PELICANS": Team.NEW_ORLEANS_PELICANS,
"NEW YORK KNICKS": Team.NEW_YORK_KNICKS,
"OKLAHOMA CITY THUNDER": Team.OKLAHOMA_CITY_THUNDER,
"ORLANDO MAGIC": Team.ORLANDO_MAGIC,
"PHILADELPHIA 76ERS": Team.PHILADELPHIA_76ERS,
"PHOENIX SUNS": Team.PHOENIX_SUNS,
"PORTLAND TRAIL BLAZERS": Team.PORTLAND_TRAIL_BLAZERS,
"SACRAMENTO KINGS": Team.SACRAMENTO_KINGS,
"SAN ANTONIO SPURS": Team.SAN_ANTONIO_SPURS,
"TORONTO RAPTORS": Team.TORONTO_RAPTORS,
"UTAH JAZZ": Team.UTAH_JAZZ,
"WASHINGTON WIZARDS": Team.WASHINGTON_WIZARDS,

# DEPRECATED TEAMS
"CHARLOTTE BOBCATS": Team.CHARLOTTE_BOBCATS,
"NEW JERSEY NETS": Team.NEW_JERSEY_NETS,
"NEW ORLEANS HORNETS": Team.NEW_ORLEANS_HORNETS,
"NEW ORLEANS/OKLAHOMA CITY HORNETS": Team.NEW_ORLEANS_OKLAHOMA_CITY_HORNETS,
"SEATTLE SUPERSONICS": Team.SEATTLE_SUPERSONICS,
"VANCOUVER GRIZZLIES": Team.VANCOUVER_GRIZZLIES,
}

POSITION_ABBREVIATIONS_TO_POSITION = {
"PG": Position.POINT_GUARD,
"SG": Position.SHOOTING_GUARD,
Expand Down
30 changes: 29 additions & 1 deletion basketball_reference_web_scraper/http_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import requests

from basketball_reference_web_scraper.errors import InvalidDate
from basketball_reference_web_scraper.parsers.box_scores import parse_player_box_scores
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths
from basketball_reference_web_scraper.parsers.box_scores.teams import parse_team_totals
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals

Expand Down Expand Up @@ -66,3 +68,29 @@ def players_season_totals(season_end_year):
response.raise_for_status()

return parse_players_season_totals(response.content)


def team_box_score(game_url_path):
url = "{BASE_URL}/{game_url_path}".format(BASE_URL=BASE_URL, game_url_path=game_url_path)

response = requests.get(url=url)

response.raise_for_status()

return parse_team_totals(response.content)


def team_box_scores(day, month, year):
url = "{BASE_URL}/boxscores/".format(BASE_URL=BASE_URL)

response = requests.get(url=url, params={"day": day, "month": month, "year": year})

response.raise_for_status()

game_url_paths = parse_game_url_paths(response.content)

return [
box_score
for game_url_path in game_url_paths
for box_score in team_box_score(game_url_path=game_url_path)
]
45 changes: 44 additions & 1 deletion basketball_reference_web_scraper/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@
"personal_fouls",
]

team_box_score_fieldname = [
"team",
"minutes_played",
"made_field_goals",
"attempted_field_goals",
"made_three_point_field_goals",
"attempted_three_point_field_goals",
"made_free_throws",
"attempted_free_throws",
"offensive_rebounds",
"defensive_rebounds",
"assists",
"steals",
"blocks",
"turnovers",
"personal_fouls",
]

default_json_options = {
"sort_keys": True,
"indent": 4,
Expand Down Expand Up @@ -167,4 +185,29 @@ def players_season_totals_to_csv(rows, output_file_path, write_option):
"turnovers": row["turnovers"],
"personal_fouls": row["personal_fouls"],
} for row in rows
)
)


def team_box_scores_to_csv(rows, output_file_path, write_option):
with open(output_file_path, write_option.value, newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=team_box_score_fieldname)
writer.writeheader()
writer.writerows(
{
"team": row["team"].value,
"minutes_played": row["minutes_played"],
"made_field_goals": row["made_field_goals"],
"attempted_field_goals": row["attempted_field_goals"],
"made_three_point_field_goals": row["made_three_point_field_goals"],
"attempted_three_point_field_goals": row["attempted_three_point_field_goals"],
"made_free_throws": row["made_free_throws"],
"attempted_free_throws": row["attempted_free_throws"],
"offensive_rebounds": row["offensive_rebounds"],
"defensive_rebounds": row["defensive_rebounds"],
"assists": row["assists"],
"steals": row["steals"],
"blocks": row["blocks"],
"turnovers": row["turnovers"],
"personal_fouls": row["personal_fouls"],
} for row in rows
)
Empty file.
7 changes: 7 additions & 0 deletions basketball_reference_web_scraper/parsers/box_scores/games.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from lxml import html


def parse_game_url_paths(page):
tree = html.fromstring(page)
games = tree.xpath('//td[contains(@class, "gamelink")]/a')
return list(map(lambda game: game.attrib['href'], games))
43 changes: 43 additions & 0 deletions basketball_reference_web_scraper/parsers/box_scores/teams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from lxml import html

from basketball_reference_web_scraper.data import TEAM_NAME_TO_TEAM


def parse_team_total(footer, team):
cells = footer.xpath('tr/td')
return {
"team": team,
"minutes_played": int(cells[0].text_content()),
"made_field_goals": int(cells[1].text_content()),
"attempted_field_goals": int(cells[2].text_content()),
"made_three_point_field_goals": int(cells[4].text_content()),
"attempted_three_point_field_goals": int(cells[5].text_content()),
"made_free_throws": int(cells[7].text_content()),
"attempted_free_throws": int(cells[8].text_content()),
"offensive_rebounds": int(cells[10].text_content()),
"defensive_rebounds": int(cells[11].text_content()),
"assists": int(cells[13].text_content()),
"steals": int(cells[14].text_content()),
"blocks": int(cells[15].text_content()),
"turnovers": int(cells[16].text_content()),
"personal_fouls": int(cells[17].text_content()),
}


def parse_team_totals(page):
tree = html.fromstring(page)
teams = [
TEAM_NAME_TO_TEAM[anchor.text_content().upper()]
for anchor in tree.xpath('//div[@class="scorebox"]//a[@itemprop="name"]')
]
tables = tree.xpath('//table[contains(@class, "stats_table")]')
footers = [
footer
for table in tables
if "basic" in table.attrib["id"]
for footer in table.xpath("tfoot")
]
return [
parse_team_total(footer=footer, team=teams[footers.index(footer)])
for footer in footers
]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="basketball_reference_web_scraper",
version="4.0.0",
version="4.1.0",
author="Jae Bradley",
author_email="jae.b.bradley@gmail.com",
license="MIT",
Expand Down
1,448 changes: 1,448 additions & 0 deletions tests/01_01_2017_box_scores.html

Large diffs are not rendered by default.

1,506 changes: 1,506 additions & 0 deletions tests/201701010ATL.html

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions tests/test_integration_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,35 @@ def test_2018_player_season_totals(self):
def test_2019_player_season_totals(self):
player_season_totals = client.players_season_totals(season_end_year=2019)
self.assertIsNotNone(player_season_totals)

def test_2018_01_01_team_box_scores(self):
team_box_scores = client.team_box_scores(day=1, month=1, year=2018)
self.assertIsNotNone(team_box_scores)

def test_2001_01_01_team_box_scores(self):
team_box_scores = client.team_box_scores(day=1, month=1, year=2001)
self.assertIsNotNone(team_box_scores)

def test_2004_01_02_team_box_scores(self):
team_box_scores = client.team_box_scores(day=2, month=1, year=2004)
self.assertIsNotNone(team_box_scores)

def test_2018_01_01_team_box_scores_json_box_scores_to_file(self):
client.team_box_scores(
day=1,
month=1,
year=2018,
output_type=OutputType.JSON,
output_file_path="./2018_01_01_team_box_scores.json",
output_write_option=OutputWriteOption.WRITE
)

def test_2018_01_01_team_box_scores_json_box_scores_to_memory(self):
january_first_box_scores = client.team_box_scores(
day=1,
month=1,
year=2018,
output_type=OutputType.JSON,
)

self.assertIsNotNone(january_first_box_scores)
20 changes: 20 additions & 0 deletions tests/test_integration_parse_games.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from unittest import TestCase
import os

from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths

january_01_2017_html = os.path.join(os.path.dirname(__file__), './01_01_2017_box_scores.html')


class TestParseGameUrls(TestCase):
def setUp(self):
self.january_01_2017_box_scores = open(january_01_2017_html).read()

def test_parse_urls(self):
urls = parse_game_url_paths(self.january_01_2017_box_scores)
self.assertEqual(len(urls), 5)
self.assertEqual(urls[0], '/boxscores/201701010ATL.html')
self.assertEqual(urls[1], '/boxscores/201701010IND.html')
self.assertEqual(urls[2], '/boxscores/201701010LAL.html')
self.assertEqual(urls[3], '/boxscores/201701010MIA.html')
self.assertEqual(urls[4], '/boxscores/201701010MIN.html')
10 changes: 5 additions & 5 deletions tests/test_integration_parse_player_box_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unittest import TestCase

from basketball_reference_web_scraper.data import Team, Outcome
from basketball_reference_web_scraper.parsers import box_scores
from basketball_reference_web_scraper.parsers.box_scores import players

november_03_2003_daily_leaders_html = os.path.join(os.path.dirname(__file__), './11_03_2003_daily_leaders.html')
november_01_2006_daily_leaders_html = os.path.join(os.path.dirname(__file__), './11_01_2006_daily_leaders.html')
Expand All @@ -18,12 +18,12 @@ def setUp(self):
self.january_01_2017_daily_leaders = open(january_01_2017_daily_leaders_html).read()

def test_box_scores_for_12_18_2015(self):
parsed_box_score = box_scores.parse_player_box_scores(self.december_18_2015_daily_leaders)
parsed_box_score = players.parse_player_box_scores(self.december_18_2015_daily_leaders)
self.assertEqual(len(parsed_box_score), 250)

# Test for minutes played greater than or equal to 60 minutes
def test_box_scores_for_01_01_2017(self):
parsed_box_score = box_scores.parse_player_box_scores(self.january_01_2017_daily_leaders)
parsed_box_score = players.parse_player_box_scores(self.january_01_2017_daily_leaders)
self.assertEqual(len(parsed_box_score), 170)

first_box_score = parsed_box_score[0]
Expand All @@ -49,7 +49,7 @@ def test_box_scores_for_01_01_2017(self):
self.assertEqual(first_box_score["game_score"], 31.3)

def test_parses_new_orleans_hornets_for_box_scores_for_11_03_2003(self):
parsed_box_score = box_scores.parse_player_box_scores(self.november_03_2003_daily_leaders)
parsed_box_score = players.parse_player_box_scores(self.november_03_2003_daily_leaders)

self.assertEqual(len(parsed_box_score), 145)

Expand All @@ -59,7 +59,7 @@ def test_parses_new_orleans_hornets_for_box_scores_for_11_03_2003(self):
self.assertEqual(pj_brown["team"], Team.NEW_ORLEANS_HORNETS)

def test_parses_new_orleans_oklahoma_city_hornets_for_box_scores_for_11_01_2006(self):
parsed_box_score = box_scores.parse_player_box_scores(self.november_01_2006_daily_leaders)
parsed_box_score = players.parse_player_box_scores(self.november_01_2006_daily_leaders)
self.assertEqual(len(parsed_box_score), 272)

chris_paul = parsed_box_score[10]
Expand Down
Loading

0 comments on commit 4f366af

Please sign in to comment.