Skip to content

Commit

Permalink
Improve schedule parser (#61) (#66)
Browse files Browse the repository at this point in the history
Add logic so that the schedule scraper can get all the games for
the current season without crashing, leaving scores off of the
returned game hashes

Adds `None` as default values if away team score or home team score are not available
  • Loading branch information
jaebradley committed Nov 28, 2018
1 parent 45af162 commit 183d831
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
4 changes: 2 additions & 2 deletions basketball_reference_web_scraper/parsers/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def parse_game(row):
return {
"start_time": start_time,
"away_team": TEAM_NAME_TO_TEAM[row[2].text_content().upper()],
"away_team_score": int(row[3].text_content()),
"home_team": TEAM_NAME_TO_TEAM[row[4].text_content().upper()],
"home_team_score": int(row[5].text_content()),
"away_team_score": int(row[3].text_content()) if row[3].text_content() else None,
"home_team_score": int(row[5].text_content()) if row[5].text_content() else None,
}


Expand Down
17 changes: 17 additions & 0 deletions tests/test_integration_parse_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@

october_2001_schedule_html = os.path.join(os.path.dirname(__file__), './NBA_2001_games-october.html')
october_2018_schedule_html = os.path.join(os.path.dirname(__file__), './NBA_2018_games-october.html')
april_2019_schedule_html = os.path.join(os.path.dirname(__file__), './NBA_2019_games-april.html')


class TestSchedule(TestCase):
def setUp(self):
self.october_2001_html = open(october_2001_schedule_html).read()
self.october_2018_html = open(october_2018_schedule_html).read()
self.april_2019_html = open(april_2019_schedule_html).read()

def test_parse_october_2001_schedule_for_month_url_paths_(self):
urls = schedule.parse_schedule_for_month_url_paths(self.october_2001_html)
Expand Down Expand Up @@ -48,3 +50,18 @@ def test_parse_october_2001_schedule(self):
def test_parse_october_2018_schedule(self):
parsed_schedule = schedule.parse_schedule(self.october_2018_html)
self.assertEqual(len(parsed_schedule), 104)

def test_parse_future_game(self):
parsed_schedule = schedule.parse_schedule(self.april_2019_html)
first_game = parsed_schedule[0]
expected_first_game_start_time = pytz.timezone("US/Eastern") \
.localize(datetime(year=2019, month=4, day=1, hour=19, minute=30)) \
.astimezone(pytz.utc)

self.assertIsNotNone(parsed_schedule)
self.assertEqual(len(parsed_schedule), 79)
self.assertEqual(first_game["start_time"], expected_first_game_start_time)
self.assertEqual(first_game["away_team"], Team.MIAMI_HEAT)
self.assertEqual(first_game["home_team"], Team.BOSTON_CELTICS)
self.assertIsNone(first_game["away_team_score"])
self.assertIsNone(first_game["home_team_score"])

0 comments on commit 183d831

Please sign in to comment.