-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement Team Box Scores method (#71)
### Discussion As discussed in #70, there could be a need for box score data, aggregated by team. This PR introduces a `team_box_scores` API method. This API method returns a list of dictionaries that look something like ```python [ { "team": Team.BOSTON_CELTICS, "minutes_played: 265, "attempted_field_goals": 70, # And more fields }, { # Another Team } ] ``` The `team_box_scores` method is called in the same way the `player_box_scores` is invoked - with a `day`, `month`, and `year` parameters (as well as optional `output_type`, `output_file_path`, `output_write_option`, `json_options` parameters). ### Implementation * Makes requests to get HTML for `boxscores` page for a given date (like https://www.basketball-reference.com/boxscores/?month=01&day=01&year=2017) * Parses game urls from page (like https://www.basketball-reference.com/boxscores/201701010ATL.html) * Makes requests to get HTML for each of these game pages * Parses HTML for these pages to get "Team Totals" for each team ![image](https://user-images.githubusercontent.com/8136030/50730310-22ea5c00-10ff-11e9-8226-e293f7e821eb.png)
- Loading branch information
1 parent
bc3a4d1
commit 4f366af
Showing
17 changed files
with
3,279 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from lxml import html | ||
|
||
|
||
def parse_game_url_paths(page): | ||
tree = html.fromstring(page) | ||
games = tree.xpath('//td[contains(@class, "gamelink")]/a') | ||
return list(map(lambda game: game.attrib['href'], games)) |
File renamed without changes.
43 changes: 43 additions & 0 deletions
43
basketball_reference_web_scraper/parsers/box_scores/teams.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from lxml import html | ||
|
||
from basketball_reference_web_scraper.data import TEAM_NAME_TO_TEAM | ||
|
||
|
||
def parse_team_total(footer, team): | ||
cells = footer.xpath('tr/td') | ||
return { | ||
"team": team, | ||
"minutes_played": int(cells[0].text_content()), | ||
"made_field_goals": int(cells[1].text_content()), | ||
"attempted_field_goals": int(cells[2].text_content()), | ||
"made_three_point_field_goals": int(cells[4].text_content()), | ||
"attempted_three_point_field_goals": int(cells[5].text_content()), | ||
"made_free_throws": int(cells[7].text_content()), | ||
"attempted_free_throws": int(cells[8].text_content()), | ||
"offensive_rebounds": int(cells[10].text_content()), | ||
"defensive_rebounds": int(cells[11].text_content()), | ||
"assists": int(cells[13].text_content()), | ||
"steals": int(cells[14].text_content()), | ||
"blocks": int(cells[15].text_content()), | ||
"turnovers": int(cells[16].text_content()), | ||
"personal_fouls": int(cells[17].text_content()), | ||
} | ||
|
||
|
||
def parse_team_totals(page): | ||
tree = html.fromstring(page) | ||
teams = [ | ||
TEAM_NAME_TO_TEAM[anchor.text_content().upper()] | ||
for anchor in tree.xpath('//div[@class="scorebox"]//a[@itemprop="name"]') | ||
] | ||
tables = tree.xpath('//table[contains(@class, "stats_table")]') | ||
footers = [ | ||
footer | ||
for table in tables | ||
if "basic" in table.attrib["id"] | ||
for footer in table.xpath("tfoot") | ||
] | ||
return [ | ||
parse_team_total(footer=footer, team=teams[footers.index(footer)]) | ||
for footer in footers | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from unittest import TestCase | ||
import os | ||
|
||
from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths | ||
|
||
january_01_2017_html = os.path.join(os.path.dirname(__file__), './01_01_2017_box_scores.html') | ||
|
||
|
||
class TestParseGameUrls(TestCase): | ||
def setUp(self): | ||
self.january_01_2017_box_scores = open(january_01_2017_html).read() | ||
|
||
def test_parse_urls(self): | ||
urls = parse_game_url_paths(self.january_01_2017_box_scores) | ||
self.assertEqual(len(urls), 5) | ||
self.assertEqual(urls[0], '/boxscores/201701010ATL.html') | ||
self.assertEqual(urls[1], '/boxscores/201701010IND.html') | ||
self.assertEqual(urls[2], '/boxscores/201701010LAL.html') | ||
self.assertEqual(urls[3], '/boxscores/201701010MIA.html') | ||
self.assertEqual(urls[4], '/boxscores/201701010MIN.html') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.