Skip to content

Commit

Permalink
Add player slug to player box score and player season totals (#75)
Browse files Browse the repository at this point in the history
As #74 pointed out, player box scores and player season totals do not have a unique identifier.

The `data-append-csv` attribute on a player's name cell should be used - this value is the URI slug for a player's Basketball Reference Page.

![image](https://user-images.githubusercontent.com/8136030/53716772-d4112800-3e0a-11e9-9369-6c8e44d6cf97.png)
  • Loading branch information
jaebradley committed Mar 4, 2019
1 parent a0bc06c commit e178261
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 1 deletion.
4 changes: 4 additions & 0 deletions basketball_reference_web_scraper/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from basketball_reference_web_scraper.data import OutputType, OutputWriteOption

box_score_fieldname = [
"slug",
"name",
"team",
"location",
Expand Down Expand Up @@ -35,6 +36,7 @@
]

player_season_totals_fieldname = [
"slug",
"name",
"positions",
"age",
Expand Down Expand Up @@ -119,6 +121,7 @@ def box_scores_to_csv(rows, output_file_path, write_option):
writer.writeheader()
writer.writerows(
{
"slug": row["slug"],
"name": row["name"],
"team": row["team"].value,
"location": row["location"].value,
Expand Down Expand Up @@ -164,6 +167,7 @@ def players_season_totals_to_csv(rows, output_file_path, write_option):
writer.writeheader()
writer.writerows(
{
"slug": row["slug"],
"name": row["name"],
"positions": "-".join(map(lambda position: position.value, row["positions"])),
"age": row["age"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def parse_seconds_played(formatted_playing_time):

def parse_player_box_score(row):
return {
"slug": str(row[1].get("data-append-csv")),
"name": str(row[1].text_content()),
"team": TEAM_ABBREVIATIONS_TO_TEAM[row[2].text_content()],
"location": parse_location(row[3].text_content()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

def parse_player_season_totals(row):
return {
"slug": str(row[1].get("data-append-csv")),
"name": str(row[1].text_content()),
"positions": parse_positions(row[2].text_content()),
"age": int(row[3].text_content()),
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="basketball_reference_web_scraper",
version="4.1.0",
version="4.2.0",
author="Jae Bradley",
author_email="jae.b.bradley@gmail.com",
license="MIT",
Expand Down
3 changes: 3 additions & 0 deletions tests/test_integration_parse_player_box_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_box_scores_for_01_01_2017(self):

first_box_score = parsed_box_score[0]

self.assertEqual(first_box_score["slug"], "millspa01")
self.assertEqual(first_box_score["name"], "Paul Millsap")
self.assertEqual(first_box_score["team"], Team.ATLANTA_HAWKS)
self.assertEqual(first_box_score["opponent"], Team.NEW_YORK_KNICKS)
Expand Down Expand Up @@ -55,6 +56,7 @@ def test_parses_new_orleans_hornets_for_box_scores_for_11_03_2003(self):

pj_brown = parsed_box_score[51]

self.assertEqual(pj_brown["slug"], "brownpj01")
self.assertEqual(pj_brown["name"], "P.J. Brown")
self.assertEqual(pj_brown["team"], Team.NEW_ORLEANS_HORNETS)

Expand All @@ -64,5 +66,6 @@ def test_parses_new_orleans_oklahoma_city_hornets_for_box_scores_for_11_01_2006(

chris_paul = parsed_box_score[10]

self.assertEqual(chris_paul["slug"], "paulch01")
self.assertEqual(chris_paul["name"], "Chris Paul")
self.assertEqual(chris_paul["team"], Team.NEW_ORLEANS_OKLAHOMA_CITY_HORNETS)
5 changes: 5 additions & 0 deletions tests/test_integration_parse_player_season_totals.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_2001_players_season_totals(self):

mahmoud_abdul_rauf = parsed_season_totals[0]

self.assertEqual(mahmoud_abdul_rauf["slug"], "abdulma02")
self.assertEqual(mahmoud_abdul_rauf["name"], "Mahmoud Abdul-Rauf")
self.assertEqual(mahmoud_abdul_rauf["positions"], [Position.POINT_GUARD])
self.assertEqual(mahmoud_abdul_rauf["team"], Team.VANCOUVER_GRIZZLIES)
Expand All @@ -47,6 +48,7 @@ def test_2018_players_season_totals(self):

alex_abrines = parsed_season_totals[0]

self.assertEqual(alex_abrines["slug"], "abrinal01")
self.assertEqual(alex_abrines["name"], "Alex Abrines")
self.assertEqual(alex_abrines["positions"], [Position.SHOOTING_GUARD])
self.assertEqual(alex_abrines["team"], Team.OKLAHOMA_CITY_THUNDER)
Expand All @@ -72,6 +74,7 @@ def test_2018_omer_asik_season_totals(self):

pelicans_omer_asik = parsed_season_totals[22]

self.assertEqual(pelicans_omer_asik["slug"], "asikom01")
self.assertEqual(pelicans_omer_asik["name"], "Omer Asik")
self.assertEqual(pelicans_omer_asik["positions"], [Position.CENTER])
self.assertEqual(pelicans_omer_asik["team"], Team.NEW_ORLEANS_PELICANS)
Expand All @@ -94,6 +97,7 @@ def test_2018_omer_asik_season_totals(self):

bulls_omer_asik = parsed_season_totals[23]

self.assertEqual(pelicans_omer_asik["slug"], "asikom01")
self.assertEqual(bulls_omer_asik["name"], "Omer Asik")
self.assertEqual(bulls_omer_asik["positions"], [Position.CENTER])
self.assertEqual(bulls_omer_asik["team"], Team.CHICAGO_BULLS)
Expand All @@ -119,6 +123,7 @@ def test_2019_jimmy_butler_season_totals(self):

philly_jimmy_butler = parsed_season_totals[72]

self.assertEqual(philly_jimmy_butler["slug"], "butleji01")
self.assertEqual(philly_jimmy_butler["name"], "Jimmy Butler")
self.assertEqual(philly_jimmy_butler["positions"], [Position.SHOOTING_GUARD])
self.assertEqual(philly_jimmy_butler["team"], Team.PHILADELPHIA_76ERS)
Expand Down

0 comments on commit e178261

Please sign in to comment.