Skip to content

Commit

Permalink
Agrego parámetro de verifiación de SSL certs
Browse files Browse the repository at this point in the history
  • Loading branch information
lucaslavandeira committed May 28, 2019
1 parent 66bda5e commit 2b8abf3
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 15 deletions.
22 changes: 14 additions & 8 deletions pydatajson/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class DataJson(dict):

def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
default_values=None, catalog_format=None,
validator_class=Validator):
validator_class=Validator, verify_ssl=False):
"""Lee un catálogo y crea un objeto con funciones para manipularlo.
Salvo que se indique lo contrario, se utiliza como default el schema
Expand All @@ -78,13 +78,16 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
"distribution_issued": "2017-06-22"
}
"""
self.verify_ssl = verify_ssl

# se construye el objeto DataJson con la interfaz de un dicconario
if catalog:

# lee representaciones de un catálogo hacia un diccionario
catalog = readers.read_catalog(catalog,
default_values=default_values,
catalog_format=catalog_format)
catalog_format=catalog_format,
verify=self.verify_ssl)

# copia todos los atributos del diccionario hacia el objeto
for key, value in iteritems(catalog):
Expand Down Expand Up @@ -242,7 +245,7 @@ def is_valid_catalog(self, catalog=None):
Returns:
bool: True si el data.json cumple con el schema, sino False.
"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self
return self.validator.is_valid(catalog)

@staticmethod
Expand Down Expand Up @@ -333,7 +336,7 @@ def validate_catalog(self, catalog=None, only_errors=False, fmt="dict",
"message", "validator", "validator_value", "error_code".
"""
catalog = readers.read_catalog(catalog) if catalog else self
catalog = self._read_catalog(catalog) if catalog else self

validation = self.validator.validate_catalog(catalog, only_errors)
if export_path:
Expand Down Expand Up @@ -562,7 +565,7 @@ def catalog_report(self, catalog, harvest='none', report=None,
"""

url = catalog if isinstance(catalog, string_types) else None
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

validation = self.validate_catalog(catalog)
catalog_validation = validation["error"]["catalog"]
Expand Down Expand Up @@ -799,7 +802,7 @@ def generate_harvestable_catalogs(self, catalogs, harvest='all',
if isinstance(catalogs, string_types + (dict,)):
catalogs = [catalogs]

harvestable_catalogs = [readers.read_catalog(c) for c in catalogs]
harvestable_catalogs = [self._read_catalog(c) for c in catalogs]
catalogs_urls = [catalog if isinstance(catalog, string_types)
else None for catalog in catalogs]

Expand Down Expand Up @@ -871,7 +874,7 @@ def generate_datasets_summary(self, catalog, export_path=None):
list: Contiene tantos dicts como datasets estén presentes en
`catalogs`, con los datos antes mencionados.
"""
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

# Trato de leer todos los datasets bien formados de la lista
# catalog["dataset"], si existe.
Expand Down Expand Up @@ -1012,7 +1015,7 @@ def _count_fields_recursive(self, dataset, fields):
return key_count

def dataset_is_updated(self, catalog, dataset):
catalog = readers.read_catalog(catalog)
catalog = self._read_catalog(catalog)

for catalog_dataset in catalog.get('dataset', []):
if catalog_dataset.get('title') == dataset:
Expand Down Expand Up @@ -1093,6 +1096,9 @@ def make_catalogs_backup(self, catalogs=None,
# TODO: implementar función
pass

def _read_catalog(self, catalog):
return readers.read_catalog(catalog, verify=self.verify_ssl)


def main():
"""Permite ejecutar el módulo por línea de comandos.
Expand Down
15 changes: 8 additions & 7 deletions pydatajson/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def read_catalog_obj(catalog):
return pydatajson.DataJson(catalog)


def read_catalog(catalog, default_values=None, catalog_format=None):
def read_catalog(catalog, default_values=None, catalog_format=None,
verify=False):
"""Toma una representación cualquiera de un catálogo, y devuelve su
representación interna (un diccionario de Python con su metadata.)
Expand Down Expand Up @@ -86,13 +87,13 @@ def read_catalog(catalog, default_values=None, catalog_format=None):
catalog_format = catalog_format or suffix
if catalog_format == "xlsx":
try:
catalog_dict = read_xlsx_catalog(catalog)
catalog_dict = read_xlsx_catalog(catalog, verify=verify)
except openpyxl_exceptions + \
(ValueError, AssertionError, IOError, BadZipfile) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "json":
try:
catalog_dict = read_json(catalog)
catalog_dict = read_json(catalog, verify=verify)
except(ValueError, TypeError, IOError) as e:
raise ce.NonParseableCatalog(catalog, str(e))
elif catalog_format == "ckan":
Expand Down Expand Up @@ -182,7 +183,7 @@ def _set_default_value(dict_obj, keys, value):
variable[keys[-1]] = value


def read_json(json_path_or_url):
def read_json(json_path_or_url, verify=False):
"""Toma el path a un JSON y devuelve el diccionario que representa.
Se asume que el parámetro es una URL si comienza con 'http' o 'https', o
Expand All @@ -200,7 +201,7 @@ def read_json(json_path_or_url):

parsed_url = urlparse(json_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(json_path_or_url, verify=False)
res = requests.get(json_path_or_url, verify=verify)
json_dict = json.loads(res.content, encoding='utf-8')

else:
Expand All @@ -218,7 +219,7 @@ def read_json(json_path_or_url):
return json_dict


def read_xlsx_catalog(xlsx_path_or_url, logger=None):
def read_xlsx_catalog(xlsx_path_or_url, logger=None, verify=False):
"""Toma el path a un catálogo en formato XLSX y devuelve el diccionario
que representa.
Expand All @@ -238,7 +239,7 @@ def read_xlsx_catalog(xlsx_path_or_url, logger=None):

parsed_url = urlparse(xlsx_path_or_url)
if parsed_url.scheme in ["http", "https"]:
res = requests.get(xlsx_path_or_url, verify=False)
res = requests.get(xlsx_path_or_url, verify=verify)
tmpfilename = ".tmpfile.xlsx"
with io.open(tmpfilename, 'wb') as tmpfile:
tmpfile.write(res.content)
Expand Down

0 comments on commit 2b8abf3

Please sign in to comment.