Skip to content

Commit

Permalink
Merge pull request #259 from datosgobar/253-config-verify-ssl
Browse files Browse the repository at this point in the history
Parametrizo verificación de SSL y timeouts dentro de DataJson
  • Loading branch information
lucaslavandeira committed May 28, 2019
2 parents 66bda5e + f7ab1b3 commit 6378198
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 17 deletions.
1 change: 1 addition & 0 deletions pydatajson/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REQUESTS_TIMEOUT = 30
28 changes: 19 additions & 9 deletions pydatajson/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pydatajson.response_formatters import format_response
from pydatajson.validation import Validator, \
DEFAULT_CATALOG_SCHEMA_FILENAME, ABSOLUTE_SCHEMA_DIR
from . import documentation
from . import documentation, constants
from . import helpers
from . import indicators
from . import readers
Expand Down Expand Up @@ -53,7 +53,8 @@ class DataJson(dict):

def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
default_values=None, catalog_format=None,
validator_class=Validator):
validator_class=Validator, verify_ssl=False,
requests_timeout=constants.REQUESTS_TIMEOUT):
"""Lee un catálogo y crea un objeto con funciones para manipularlo.
Salvo que se indique lo contrario, se utiliza como default el schema
Expand All @@ -78,13 +79,17 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
"distribution_issued": "2017-06-22"
}
"""
self.verify_ssl = verify_ssl
self.requests_timeout = requests_timeout
# se construye el objeto DataJson con la interfaz de un dicconario
if catalog:

# lee representaciones de un catálogo hacia un diccionario
catalog = readers.read_catalog(catalog,
default_values=default_values,
catalog_format=catalog_format)
catalog_format=catalog_format,
verify=self.verify_ssl,
timeout=self.requests_timeout)

# copia todos los atributos del diccionario hacia el objeto
for key, value in iteritems(catalog):
Expand Down Expand Up @@ -242,7 +247,7 @@ def is_valid_catalog(self, catalog=None):
Returns:
bool: True si el data.json cumple con el schema, sino False.
"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self
return self.validator.is_valid(catalog)

@staticmethod
Expand Down Expand Up @@ -333,7 +338,7 @@ def validate_catalog(self, catalog=None, only_errors=False, fmt="dict",
"message", "validator", "validator_value", "error_code".
"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self

validation = self.validator.validate_catalog(catalog, only_errors)
if export_path:
Expand Down Expand Up @@ -562,7 +567,7 @@ def catalog_report(self, catalog, harvest='none', report=None,
"""

url = catalog if isinstance(catalog, string_types) else None
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

validation = self.validate_catalog(catalog)
catalog_validation = validation["error"]["catalog"]
Expand Down Expand Up @@ -799,7 +804,7 @@ def generate_harvestable_catalogs(self, catalogs, harvest='all',
if isinstance(catalogs, string_types + (dict,)):
catalogs = [catalogs]

harvestable_catalogs = [readers.read_catalog(c) for c in catalogs]
harvestable_catalogs = [self._read_catalog(c) for c in catalogs]
catalogs_urls = [catalog if isinstance(catalog, string_types)
else None for catalog in catalogs]

Expand Down Expand Up @@ -871,7 +876,7 @@ def generate_datasets_summary(self, catalog, export_path=None):
list: Contiene tantos dicts como datasets estén presentes en
`catalogs`, con los datos antes mencionados.
"""
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

# Trato de leer todos los datasets bien formados de la lista
# catalog["dataset"], si existe.
Expand Down Expand Up @@ -1012,7 +1017,7 @@ def _count_fields_recursive(self, dataset, fields):
return key_count

def dataset_is_updated(self, catalog, dataset):
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

for catalog_dataset in catalog.get('dataset', []):
if catalog_dataset.get('title') == dataset:
Expand Down Expand Up @@ -1093,6 +1098,11 @@ def make_catalogs_backup(self, catalogs=None,
# TODO: implementar función
pass

def _read_catalog(self, catalog):
return readers.read_catalog(catalog,
verify=self.verify_ssl,
timeout=self.requests_timeout)


def main():
"""Permite ejecutar el módulo por línea de comandos.
Expand Down
23 changes: 15 additions & 8 deletions pydatajson/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import pydatajson
from . import custom_exceptions as ce
from . import helpers
from . import helpers, constants
from .ckan_reader import read_ckan_catalog

import urllib3
Expand All @@ -52,7 +52,8 @@ def read_catalog_obj(catalog):
return pydatajson.DataJson(catalog)


def read_catalog(catalog, default_values=None, catalog_format=None):
def read_catalog(catalog, default_values=None, catalog_format=None,
verify=False, timeout=constants.REQUESTS_TIMEOUT):
"""Toma una representación cualquiera de un catálogo, y devuelve su
representación interna (un diccionario de Python con su metadata.)
Expand Down Expand Up @@ -86,13 +87,17 @@ def read_catalog(catalog, default_values=None, catalog_format=None):
catalog_format = catalog_format or suffix
if catalog_format == "xlsx":
try:
catalog_dict = read_xlsx_catalog(catalog)
catalog_dict = read_xlsx_catalog(catalog,
verify=verify,
timeout=timeout)
except openpyxl_exceptions + \
(ValueError, AssertionError, IOError, BadZipfile) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "json":
try:
catalog_dict = read_json(catalog)
catalog_dict = read_json(catalog,
verify=verify,
timeout=timeout)
except(ValueError, TypeError, IOError) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "ckan":
Expand Down Expand Up @@ -182,7 +187,8 @@ def _set_default_value(dict_obj, keys, value):
variable[keys[-1]] = value


def read_json(json_path_or_url):
def read_json(json_path_or_url, verify=False,
timeout=constants.REQUESTS_TIMEOUT):
"""Toma el path a un JSON y devuelve el diccionario que representa.
Se asume que el parámetro es una URL si comienza con 'http' o 'https', o
Expand All @@ -200,7 +206,7 @@ def read_json(json_path_or_url):

parsed_url = urlparse(json_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(json_path_or_url, verify=False)
res = requests.get(json_path_or_url, verify=verify, timeout=timeout)
json_dict = json.loads(res.content, encoding='utf-8')

else:
Expand All @@ -218,7 +224,8 @@ def read_json(json_path_or_url):
return json_dict


def read_xlsx_catalog(xlsx_path_or_url, logger=None):
def read_xlsx_catalog(xlsx_path_or_url, logger=None, verify=False,
timeout=constants.REQUESTS_TIMEOUT):
"""Toma el path a un catálogo en formato XLSX y devuelve el diccionario
que representa.
Expand All @@ -238,7 +245,7 @@ def read_xlsx_catalog(xlsx_path_or_url, logger=None):

parsed_url = urlparse(xlsx_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(xlsx_path_or_url, verify=False)
res = requests.get(xlsx_path_or_url, verify=verify, timeout=timeout)
tmpfilename = ".tmpfile.xlsx"
with io.open(tmpfilename, 'wb') as tmpfile:
tmpfile.write(res.content)
Expand Down

0 comments on commit 6378198

Please sign in to comment.