Skip to content

Commit

Permalink
tasks: handle invalid URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
brutasse committed Apr 26, 2017
1 parent e3cedad commit 242711e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
9 changes: 7 additions & 2 deletions feedhq/feeds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,14 @@ def update_feed(self, url, etag=None, last_modified=None, subscribers=1,
backoff_factor=1, previous_error=None, link=None,
title=None, hub=None):
url = URLObject(url)
try:
domain = url.netloc.without_auth().without_port()
except TypeError as e:
logger.info("invalid URL", url=url, exc_info=e)
self.mute_feed(url, UniqueFeed.PARSE_ERROR)
return
# Check if this domain has rate-limiting rules
ratelimit_key = 'ratelimit:{0}'.format(
url.netloc.without_auth().without_port())
ratelimit_key = 'ratelimit:{0}'.format(domain)
retry_at = cache.get(ratelimit_key)
if retry_at:
retry_in = (epoch_to_utc(retry_at) - timezone.now()).seconds
Expand Down
5 changes: 3 additions & 2 deletions feedhq/feeds/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from django.utils import timezone
from rache import job_details, job_key
from requests.exceptions import (ConnectionError, ConnectTimeout,
MissingSchema, ReadTimeout, TooManyRedirects)
InvalidSchema, MissingSchema, ReadTimeout,
TooManyRedirects)

from .. import __version__
from ..utils import get_redis_connection
Expand Down Expand Up @@ -64,7 +65,7 @@ def resolve_url(url):
try:
response = requests.head(url, headers={'User-Agent': LINK_CHECKER},
allow_redirects=True, timeout=3)
except (ConnectionError, ConnectTimeout, MissingSchema,
except (ConnectionError, ConnectTimeout, InvalidSchema, MissingSchema,
ReadTimeout, TooManyRedirects):
pass
else:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_fetching.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ def test_socket_timeout(self, get):
data = job_details(f.url, connection=get_redis_connection())
self.assertEqual(data['error'], f.TIMEOUT)

@patch("requests.get")
def test_invalid_url(self, get):
FeedFactory.create(url='feed/')
f = UniqueFeed.objects.get()
self.assertTrue(f.muted)
self.assertEqual(f.error, f.PARSE_ERROR)

@patch('requests.head')
@patch('requests.get')
def test_ctype(self, get, head):
Expand Down

0 comments on commit 242711e

Please sign in to comment.