Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add rate limiting to url requests #1

Merged
merged 1 commit into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions sportsipy/fb/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from pyquery import PyQuery as pq
from sportsipy.utils import (_get_stats_table,
_parse_field,
_remove_html_comment_tags)
_remove_html_comment_tags,
_rate_limit_pq)
from urllib.error import HTTPError


Expand Down Expand Up @@ -1642,7 +1643,7 @@ def _pull_stats(self, doc):
"""
if not doc:
try:
doc = pq(SQUAD_URL % self._squad_id)
doc = _rate_limit_pq(SQUAD_URL % self._squad_id)
doc = pq(_remove_html_comment_tags(doc))
except HTTPError:
return None
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/fb/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ def _pull_schedule(self, team_id, doc):
if not doc:
squad_id = _lookup_team(team_id)
try:
doc = pq(SQUAD_URL % squad_id)
doc = utils._rate_limit_pq(SQUAD_URL % squad_id)
except HTTPError:
return
schedule = utils._get_stats_table(doc, 'table#matchlogs_all')
Expand Down
8 changes: 5 additions & 3 deletions sportsipy/mlb/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,8 +468,10 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
except HTTPError:
url_data = utils._rate_limit_pq(url)
except HTTPError as e:
print('HTTP Error')
print(e)
return None
return pq(utils._remove_html_comment_tags(url_data))

Expand Down Expand Up @@ -1732,7 +1734,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/mlb/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _retrieve_html_page(self):
"""
url = self._build_url()
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1489,7 +1489,7 @@ def _pull_team_page(self, url):
Returns a PyQuery object of the team's HTML page.
"""
try:
return pq(url)
return utils._rate_limit_pq(url)
except HTTPError:
return None

Expand Down
2 changes: 1 addition & 1 deletion sportsipy/mlb/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation,
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation, year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation, year))
schedule = utils._get_stats_table(doc, 'table#team_schedule')
if not schedule:
utils._no_data_found()
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/nba/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1644,7 +1644,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/nba/nba_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _retrieve_all_teams(year, season_file=None):
# instead.
if year == 2021:
try:
doc = pq(SEASON_PAGE_URL % year)
doc = utils._rate_limit_pq(SEASON_PAGE_URL % year)
except HTTPError:
year = str(int(year) - 1)
# If stats for the requested season do not exist yet (as is the case
Expand Down
6 changes: 3 additions & 3 deletions sportsipy/nba/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def _retrieve_html_page(self):
"""
url = self._build_url()
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except (HTTPError, ParserError):
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1422,7 +1422,7 @@ def _pull_team_page(self, url):
Returns a PyQuery object of the team's HTML page.
"""
try:
return pq(url)
return utils._rate_limit_pq(url)
except HTTPError:
return None

Expand Down Expand Up @@ -1535,7 +1535,7 @@ def _find_players_with_coach(self, year):
# be pulled instead.
if year == 2021:
try:
doc = pq(self._create_url(year))
doc = utils._rate_limit_pq(self._create_url(year))
except HTTPError:
year = str(int(year) - 1)
# If stats for the requested season do not exist yet (as is the
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/nba/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def _pull_schedule(self, abbreviation, year):
# be pulled instead.
if year == 2021:
try:
doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation.lower(), year))
except HTTPError:
year = str(int(year) - 1)
# If stats for the requested season do not exist yet (as is the
Expand All @@ -447,7 +447,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation, year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation, year))
schedule = utils._get_stats_table(doc, 'table#games')
if not schedule:
utils._no_data_found()
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaab/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1702,7 +1702,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaab/conferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _pull_conference_page(self, conference_abbreviation, year):
A string of the requested year to pull conference information from.
"""
try:
return pq(CONFERENCE_URL % (conference_abbreviation, year))
return utils._rate_limit_pq(CONFERENCE_URL % (conference_abbreviation, year))
except HTTPError:
return None

Expand Down Expand Up @@ -184,7 +184,7 @@ def _pull_conference_page(self, year):
Returns a PyQuery object of the conference HTML page.
"""
try:
return pq(CONFERENCES_URL % year)
return utils._rate_limit_pq(CONFERENCES_URL % year)
except HTTPError:
return None

Expand Down
2 changes: 1 addition & 1 deletion sportsipy/ncaab/rankings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _pull_rankings_page(self, year):
Returns a PyQuery object of the rankings HTML page.
"""
try:
return pq(RANKINGS_URL % year)
return utils._rate_limit_pq(RANKINGS_URL % year)
except HTTPError:
return None

Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaab/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def _retrieve_html_page(self):
"""
url = PLAYER_URL % self._player_id
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except (HTTPError, ParserError):
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -695,7 +695,7 @@ def _pull_team_page(self, url):
Returns a PyQuery object of the team's HTML page.
"""
try:
return pq(url)
return utils._rate_limit_pq(url)
except HTTPError:
return None

Expand Down
2 changes: 1 addition & 1 deletion sportsipy/ncaab/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation.lower(), year))
schedule = utils._get_stats_table(doc, 'table#schedule')
if not schedule:
utils._no_data_found()
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaaf/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1287,7 +1287,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaaf/conferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _pull_conference_page(self, conference_abbreviation, year):
A string of the requested year to pull conference information from.
"""
try:
return pq(CONFERENCE_URL % (conference_abbreviation, year))
return utils._rate_limit_pq(CONFERENCE_URL % (conference_abbreviation, year))
except (HTTPError, ParserError):
return None

Expand Down Expand Up @@ -201,7 +201,7 @@ def _pull_conference_page(self, year):
Returns a PyQuery object of the conference HTML page.
"""
try:
return pq(CONFERENCES_URL % year)
return utils._rate_limit_pq(CONFERENCES_URL % year)
except HTTPError:
return None

Expand Down
4 changes: 2 additions & 2 deletions sportsipy/ncaaf/rankings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _pull_rankings_page(self, year):
Returns a PyQuery object of the rankings HTML page.
"""
try:
return pq(RANKINGS_URL % year)
return utils._rate_limit_pq(RANKINGS_URL % year)
except HTTPError:
return None

Expand Down Expand Up @@ -269,7 +269,7 @@ def _pull_rankings_page(self, year):
Returns a PyQuery object of the rankings HTML page.
"""
try:
return pq(RANKINGS_URL % year)
return utils._rate_limit_pq(RANKINGS_URL % year)
except HTTPError:
return None

Expand Down
2 changes: 1 addition & 1 deletion sportsipy/ncaaf/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def _retrieve_html_page(self):
"""
url = self._build_url()
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/ncaaf/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation.lower(), year))
schedule = utils._get_stats_table(doc, 'table#schedule')
if not schedule:
utils._no_data_found()
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/nfl/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
# For NFL, a 404 page doesn't actually raise a 404 error, so it needs
Expand Down Expand Up @@ -1558,7 +1558,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/nfl/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def _retrieve_html_page(self):
"""
url = self._build_url()
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except (HTTPError, ParserError):
return None
# For NFL, a 404 page doesn't actually raise a 404 error, so it needs
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/nfl/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation.lower(), year))
schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year)
if not schedule:
utils._no_data_found()
Expand Down
4 changes: 2 additions & 2 deletions sportsipy/nhl/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def _retrieve_html_page(self, uri):
"""
url = BOXSCORE_URL % uri
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down Expand Up @@ -1212,7 +1212,7 @@ def _get_requested_page(self, url):
A PyQuery object containing the HTML contents of the requested
page.
"""
return pq(url)
return utils._rate_limit_pq(url)

def _get_boxscore_uri(self, url):
"""
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/nhl/roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _retrieve_html_page(self):
"""
url = self._build_url()
try:
url_data = pq(url)
url_data = utils._rate_limit_pq(url)
except HTTPError:
return None
return pq(utils._remove_html_comment_tags(url_data))
Expand Down
2 changes: 1 addition & 1 deletion sportsipy/nhl/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def _pull_schedule(self, abbreviation, year):
utils._url_exists(SCHEDULE_URL % (abbreviation,
str(int(year) - 1))):
year = str(int(year) - 1)
doc = pq(SCHEDULE_URL % (abbreviation, year))
doc = utils._rate_limit_pq(SCHEDULE_URL % (abbreviation, year))
schedule = utils._get_stats_table(doc, 'table#tm_gamelog_rs')
if not schedule:
utils._no_data_found()
Expand Down
Loading