Skip to content

Commit

Permalink
Merge pull request #293 from datosgobar/292-modificar-criterio-de-url…
Browse files Browse the repository at this point in the history
…-validas

Modificar criterio de url validas
  • Loading branch information
AWolfsdorf committed Oct 17, 2019
2 parents 7ac9d34 + b0ecf5b commit fc3c27d
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
3 changes: 2 additions & 1 deletion pydatajson/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
REQUESTS_TIMEOUT = 30
DEFAULT_TIMEZONE = "America/Buenos_Aires"

VALID_STATUS_CODES = [200, 203, 302]
INVALID_STATUS_CODES_REGEX = ["^4[0-9]+$", "^5[0-9]+$"]
EXCEPTION_STATUS_CODES = [429]

CANT_THREADS_BROKEN_URL_VALIDATOR = 10
15 changes: 12 additions & 3 deletions pydatajson/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@

import requests
from openpyxl import load_workbook
from requests import RequestException
from requests import RequestException, Timeout
from six.moves.urllib_parse import urlparse

from six import string_types, iteritems
from unidecode import unidecode

from pydatajson.constants import VALID_STATUS_CODES
from pydatajson.constants import \
INVALID_STATUS_CODES_REGEX, \
EXCEPTION_STATUS_CODES
from pydatajson.download import download_to_file

logger = logging.getLogger('pydatajson.helpers')
Expand Down Expand Up @@ -571,6 +573,13 @@ def fields_to_uppercase(fields):
def is_working_url(url):
try:
response = requests.head(url, timeout=1)
return response.status_code in VALID_STATUS_CODES, response.status_code
matches = []
if response.status_code not in EXCEPTION_STATUS_CODES:
matches = \
[re.match(pattern, str(response.status_code)) is not None
for pattern in INVALID_STATUS_CODES_REGEX]
return True not in matches, response.status_code
except Timeout:
return False, 408
except (RequestException, Exception):
return False, None
12 changes: 10 additions & 2 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,23 @@ def test_validate_invalid_url(self, req_mock):
req_mock.head('http://test.com/', status_code=400)
self.assertEqual((False, 400), is_working_url('http://test.com/'))

@requests_mock.Mocker()
def test_validate_too_many_requests_response(self, req_mock):
too_many_request_status_code = 429
req_mock.head('http://test.com/',
status_code=too_many_request_status_code)
self.assertEqual((True, too_many_request_status_code),
is_working_url('http://test.com/'))

@requests_mock.Mocker()
def test_validate_url_with_exception(self, req_mock):
req_mock.head('http://test.com/', exc=ConnectionError)
self.assertEqual((False, None), is_working_url('http://test.com/'))

@requests_mock.Mocker()
def validate_url_with_timeout(self, req_mock):
def test_validate_url_with_timeout(self, req_mock):
req_mock.head('http://test.com/', exc=Timeout)
self.assertEqual((False, None), is_working_url('http://test.com/'))
self.assertEqual((False, 408), is_working_url('http://test.com/'))

def test_validate_malformed_values(self):

Expand Down

0 comments on commit fc3c27d

Please sign in to comment.