Skip to content

Commit

Permalink
codestyle fix
Browse files Browse the repository at this point in the history
  • Loading branch information
FScaccheri committed Sep 23, 2019
1 parent a84b54a commit 706ca73
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 50 deletions.
28 changes: 16 additions & 12 deletions pydatajson/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
except ImportError:
from urllib.parse import urlparse

from requests.exceptions import InvalidURL

class BaseValidationError(object):
"""Estructura para errores de validación personalizados."""
Expand Down Expand Up @@ -107,9 +106,10 @@ class BrokenLandingPageError(BaseValidationError):
def __init__(self, dataset_idx, dataset_title, broken_url, status_code):

validator = "brokenLink"
message = f"Dataset ({dataset_title}) con 'landingPage' ({broken_url}) " \
f"inválida ({status_code})"
validator_value = f"Chequea que la 'landingPage' devuelva un status code válido"
message = "Dataset ({}) con 'landingPage' ({}) inválida ({})".format(
dataset_title, broken_url, status_code)
validator_value = "Chequea que la 'landingPage' devuelva un status " \
"code válido"
path = ['dataset', dataset_idx, 'landingPage']

super(BrokenLandingPageError, self).__init__(
Expand All @@ -118,11 +118,13 @@ def __init__(self, dataset_idx, dataset_title, broken_url, status_code):

class BrokenAccessUrlError(BaseValidationError):

def __init__(self, dataset_idx, distribution_idx, distribution_title, broken_url, status_code):
def __init__(self, dataset_idx, distribution_idx,
distribution_title, broken_url, status_code):
validator = "brokenLink"
message = f"Distribution ({distribution_title}) con 'accessUrl' ({broken_url}) " \
f"inválida ({status_code})"
validator_value = f"Chequea que el 'accessUrl' devuelva un status code válido"
message = "Distribution ({}) con 'accessUrl' ({}) inválida ({})".\
format(distribution_title, broken_url, status_code)
validator_value = f"Chequea que el 'accessUrl' devuelva un status " \
f"code válido"
path = ['dataset', dataset_idx, 'distribution', distribution_idx,
'accessUrl']

Expand All @@ -132,11 +134,13 @@ def __init__(self, dataset_idx, distribution_idx, distribution_title, broken_url

class BrokenDownloadUrlError(BaseValidationError):

def __init__(self, dataset_idx, distribution_idx, distribution_title, broken_url, status_code):
def __init__(self, dataset_idx, distribution_idx, distribution_title,
broken_url, status_code):
validator = "brokenLink"
message = f"Distribution ({distribution_title}) con 'downloadUrl' ({broken_url}) " \
f"inválida ({status_code})"
validator_value = f"Chequea que el 'downloadUrl' devuelva un status code válido"
message = "Distribution ({}) con 'downloadUrl' ({}) inválida ({})".\
format(distribution_title, broken_url, status_code)
validator_value = f"Chequea que el 'downloadUrl' devuelva un status " \
f"code válido"
path = ['dataset', dataset_idx, 'distribution', distribution_idx,
'downloadUrl']

Expand Down
25 changes: 17 additions & 8 deletions pydatajson/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from __future__ import unicode_literals, print_function
from __future__ import with_statement, absolute_import

import logging
import mimetypes
import os
import platform
import mimetypes
import logging
from collections import Counter

import requests
Expand Down Expand Up @@ -241,19 +241,28 @@ def _validate_distributions_urls(self, catalog):
access_url = distribution.get('accessUrl')
download_url = distribution.get('downloadUrl')

access_url_is_valid, status_code = self._validate_url(access_url)
download_url_is_valid, status_code = self._validate_url(download_url)
access_url_is_valid, access_url_status_code = \
self._validate_url(access_url)
download_url_is_valid, download_url_status_code = \
self._validate_url(download_url)
if not access_url_is_valid:
yield ce.BrokenAccessUrlError(dataset_idx, distribution_idx,
distribution_title, access_url, status_code)
yield ce.BrokenAccessUrlError(dataset_idx,
distribution_idx,
distribution_title,
access_url,
access_url_status_code)
if not download_url_is_valid:
yield ce.BrokenDownloadUrlError(dataset_idx, distribution_idx,
distribution_title, download_url, status_code)
yield ce.BrokenDownloadUrlError(dataset_idx,
distribution_idx,
distribution_title,
download_url,
download_url_status_code)

def _validate_url(self, url):
response = requests.head(url)
return response.status_code in VALID_STATUS_CODES, response.status_code


def is_valid_catalog(catalog, validator=None):
"""Valida que un archivo `data.json` cumpla con el schema definido.
Expand Down
15 changes: 1 addition & 14 deletions tests/support/factories/distribution_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,7 @@ def distribution_error():
"issued",
"identifier"
]
},
# Agrego este dict
{
"error_code": 2,
"message": "Dataset (Sistema de contrataciones electrónicas) con 'landingPage' (http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra) inválida (301)",
"validator": "brokenLink",
"validator_value": "Chequea que la 'landingPage' devuelva un status code válido",
"path": [
"dataset",
0,
"landingPage"
],
"instance": None
} #
}
],
"title": "Sistema de contrataciones electrónicas"
}
Expand Down
18 changes: 8 additions & 10 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,15 @@ def tearDown(cls):
(u'dataset_temporal', u'2015-01-01/2015-12-31'),
(u'notas', u'No tiene distribuciones con datos.')])]

LANDING_PAGE = 'http://datos.gob.ar/dataset/' \
'sistema-de-contrataciones-electronicas-argentina-compra'

@requests_mock.mock()
def test_catalog_report_harvest_good(self, m):
"""catalog_report() marcará para cosecha los datasets con metadata
válida si harvest='valid'."""

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')

catalog = os.path.join(self.SAMPLES_DIR, "full_data.json")

Expand All @@ -173,8 +175,7 @@ def test_catalog_report_harvest_valid(self, m):
"""catalog_report() marcará para cosecha los datasets con metadata
válida si harvest='valid'."""

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')
catalog = os.path.join(self.SAMPLES_DIR, "full_data.json")

actual = self.dj.catalog_report(
Expand All @@ -195,8 +196,7 @@ def test_catalog_report_harvest_none(self, m):
"""catalog_report() no marcará ningún dataset para cosecha si
harvest='none'."""

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')

catalog = os.path.join(self.SAMPLES_DIR, "full_data.json")

Expand All @@ -217,8 +217,7 @@ def test_catalog_report_harvest_all(self, m):
"""catalog_report() marcará todo dataset para cosecha si
harvest='all'."""

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')

catalog = os.path.join(self.SAMPLES_DIR, "full_data.json")

Expand All @@ -240,8 +239,7 @@ def test_catalog_report_harvest_report(self, m):
`report` si harvest='report'."""
catalog = os.path.join(self.SAMPLES_DIR, "full_data.json")

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')

# Compruebo que no se harvestee nada si el reporte no incluye el
# dataset del catálogo
Expand Down
11 changes: 5 additions & 6 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class TestDataJsonTestCase(object):
SAMPLES_DIR = os.path.join("tests", "samples")
RESULTS_DIR = RESULTS_DIR
TEMP_DIR = os.path.join("tests", "temp")
LANDING_PAGE = 'http://datos.gob.ar/dataset/' \
'sistema-de-contrataciones-electronicas-argentina-compra'

@classmethod
def get_sample(cls, sample_filename):
Expand Down Expand Up @@ -79,8 +81,7 @@ def run_case(self, case_filename, expected_dict=None):
# Tests de inputs válidos
@requests_mock.mock()
def test_validity(self, m):
m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')
for filename, value_or_none in iteritems(TEST_FILE_RESPONSES):
yield self.run_case, filename, value_or_none

Expand Down Expand Up @@ -318,8 +319,7 @@ def test_correctness_of_accrualPeriodicity_regex(self, m):
"""Prueba que la regex de validación de
dataset["accrualPeriodicity"] sea correcta."""

m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')

datajson_path = "tests/samples/full_data.json"
datajson = json.load(open(datajson_path))
Expand Down Expand Up @@ -347,8 +347,7 @@ def test_correctness_of_accrualPeriodicity_regex(self, m):

@requests_mock.mock()
def test_valid_catalog_list_format(self, m):
m.get('http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra',
text='data')
m.get(self.LANDING_PAGE, text='data')
report_list = self.dj.validate_catalog(fmt='list')
assert_true(len(report_list['catalog']) == 1)
assert_true(report_list['catalog'][0]['catalog_status'] == 'OK')
Expand Down

0 comments on commit 706ca73

Please sign in to comment.