Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parametrizo verificación de SSL y timeouts dentro de DataJson #259

Merged
merged 2 commits into from
May 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions pydatajson/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REQUESTS_TIMEOUT = 30
28 changes: 19 additions & 9 deletions pydatajson/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pydatajson.response_formatters import format_response
from pydatajson.validation import Validator, \
DEFAULT_CATALOG_SCHEMA_FILENAME, ABSOLUTE_SCHEMA_DIR
from . import documentation
from . import documentation, constants
from . import helpers
from . import indicators
from . import readers
Expand Down Expand Up @@ -53,7 +53,8 @@ class DataJson(dict):

def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
default_values=None, catalog_format=None,
validator_class=Validator):
validator_class=Validator, verify_ssl=False,
requests_timeout=constants.REQUESTS_TIMEOUT):
"""Lee un catálogo y crea un objeto con funciones para manipularlo.

Salvo que se indique lo contrario, se utiliza como default el schema
Expand All @@ -78,13 +79,17 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
"distribution_issued": "2017-06-22"
}
"""
self.verify_ssl = verify_ssl
self.requests_timeout = requests_timeout
# se construye el objeto DataJson con la interfaz de un dicconario
if catalog:

# lee representaciones de un catálogo hacia un diccionario
catalog = readers.read_catalog(catalog,
default_values=default_values,
catalog_format=catalog_format)
catalog_format=catalog_format,
verify=self.verify_ssl,
timeout=self.requests_timeout)

# copia todos los atributos del diccionario hacia el objeto
for key, value in iteritems(catalog):
Expand Down Expand Up @@ -242,7 +247,7 @@ def is_valid_catalog(self, catalog=None):
Returns:
bool: True si el data.json cumple con el schema, sino False.
"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self
return self.validator.is_valid(catalog)

@staticmethod
Expand Down Expand Up @@ -333,7 +338,7 @@ def validate_catalog(self, catalog=None, only_errors=False, fmt="dict",
"message", "validator", "validator_value", "error_code".

"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self

validation = self.validator.validate_catalog(catalog, only_errors)
if export_path:
Expand Down Expand Up @@ -562,7 +567,7 @@ def catalog_report(self, catalog, harvest='none', report=None,
"""

url = catalog if isinstance(catalog, string_types) else None
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

validation = self.validate_catalog(catalog)
catalog_validation = validation["error"]["catalog"]
Expand Down Expand Up @@ -799,7 +804,7 @@ def generate_harvestable_catalogs(self, catalogs, harvest='all',
if isinstance(catalogs, string_types + (dict,)):
catalogs = [catalogs]

harvestable_catalogs = [readers.read_catalog(c) for c in catalogs]
harvestable_catalogs = [self._read_catalog(c) for c in catalogs]
catalogs_urls = [catalog if isinstance(catalog, string_types)
else None for catalog in catalogs]

Expand Down Expand Up @@ -871,7 +876,7 @@ def generate_datasets_summary(self, catalog, export_path=None):
list: Contiene tantos dicts como datasets estén presentes en
`catalogs`, con los datos antes mencionados.
"""
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

# Trato de leer todos los datasets bien formados de la lista
# catalog["dataset"], si existe.
Expand Down Expand Up @@ -1012,7 +1017,7 @@ def _count_fields_recursive(self, dataset, fields):
return key_count

def dataset_is_updated(self, catalog, dataset):
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

for catalog_dataset in catalog.get('dataset', []):
if catalog_dataset.get('title') == dataset:
Expand Down Expand Up @@ -1093,6 +1098,11 @@ def make_catalogs_backup(self, catalogs=None,
# TODO: implementar función
pass

def _read_catalog(self, catalog):
return readers.read_catalog(catalog,
verify=self.verify_ssl,
timeout=self.requests_timeout)


def main():
"""Permite ejecutar el módulo por línea de comandos.
Expand Down
23 changes: 15 additions & 8 deletions pydatajson/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import pydatajson
from . import custom_exceptions as ce
from . import helpers
from . import helpers, constants
from .ckan_reader import read_ckan_catalog

import urllib3
Expand All @@ -52,7 +52,8 @@ def read_catalog_obj(catalog):
return pydatajson.DataJson(catalog)


def read_catalog(catalog, default_values=None, catalog_format=None):
def read_catalog(catalog, default_values=None, catalog_format=None,
verify=False, timeout=constants.REQUESTS_TIMEOUT):
"""Toma una representación cualquiera de un catálogo, y devuelve su
representación interna (un diccionario de Python con su metadata.)

Expand Down Expand Up @@ -86,13 +87,17 @@ def read_catalog(catalog, default_values=None, catalog_format=None):
catalog_format = catalog_format or suffix
if catalog_format == "xlsx":
try:
catalog_dict = read_xlsx_catalog(catalog)
catalog_dict = read_xlsx_catalog(catalog,
verify=verify,
timeout=timeout)
except openpyxl_exceptions + \
(ValueError, AssertionError, IOError, BadZipfile) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "json":
try:
catalog_dict = read_json(catalog)
catalog_dict = read_json(catalog,
verify=verify,
timeout=timeout)
except(ValueError, TypeError, IOError) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "ckan":
Expand Down Expand Up @@ -182,7 +187,8 @@ def _set_default_value(dict_obj, keys, value):
variable[keys[-1]] = value


def read_json(json_path_or_url):
def read_json(json_path_or_url, verify=False,
timeout=constants.REQUESTS_TIMEOUT):
"""Toma el path a un JSON y devuelve el diccionario que representa.

Se asume que el parámetro es una URL si comienza con 'http' o 'https', o
Expand All @@ -200,7 +206,7 @@ def read_json(json_path_or_url):

parsed_url = urlparse(json_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(json_path_or_url, verify=False)
res = requests.get(json_path_or_url, verify=verify, timeout=timeout)
json_dict = json.loads(res.content, encoding='utf-8')

else:
Expand All @@ -218,7 +224,8 @@ def read_json(json_path_or_url):
return json_dict


def read_xlsx_catalog(xlsx_path_or_url, logger=None):
def read_xlsx_catalog(xlsx_path_or_url, logger=None, verify=False,
timeout=constants.REQUESTS_TIMEOUT):
"""Toma el path a un catálogo en formato XLSX y devuelve el diccionario
que representa.

Expand All @@ -238,7 +245,7 @@ def read_xlsx_catalog(xlsx_path_or_url, logger=None):

parsed_url = urlparse(xlsx_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(xlsx_path_or_url, verify=False)
res = requests.get(xlsx_path_or_url, verify=verify, timeout=timeout)
tmpfilename = ".tmpfile.xlsx"
with io.open(tmpfilename, 'wb') as tmpfile:
tmpfile.write(res.content)
Expand Down