Agrego parámetro de verifiación de SSL certs

datosgobar · May 28, 2019 · 2b8abf3 · 2b8abf3
1 parent 66bda5e
commit 2b8abf3
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 15 deletions.
diff --git a/pydatajson/core.py b/pydatajson/core.py
@@ -53,7 +53,7 @@ class DataJson(dict):
 
     def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
                  default_values=None, catalog_format=None,
-                 validator_class=Validator):
+                 validator_class=Validator, verify_ssl=False):
         """Lee un catálogo y crea un objeto con funciones para manipularlo.
 
         Salvo que se indique lo contrario, se utiliza como default el schema
@@ -78,13 +78,16 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
                         "distribution_issued": "2017-06-22"
                     }
         """
+        self.verify_ssl = verify_ssl
+
         # se construye el objeto DataJson con la interfaz de un dicconario
         if catalog:
 
             # lee representaciones de un catálogo hacia un diccionario
             catalog = readers.read_catalog(catalog,
                                            default_values=default_values,
-                                           catalog_format=catalog_format)
+                                           catalog_format=catalog_format,
+                                           verify=self.verify_ssl)
 
             # copia todos los atributos del diccionario hacia el objeto
             for key, value in iteritems(catalog):
@@ -242,7 +245,7 @@ def is_valid_catalog(self, catalog=None):
         Returns:
             bool: True si el data.json cumple con el schema, sino False.
         """
-        catalog = readers.read_catalog(catalog) if catalog else self
+        catalog = self._read_catalog(catalog) if catalog else self
         return self.validator.is_valid(catalog)
 
     @staticmethod
@@ -333,7 +336,7 @@ def validate_catalog(self, catalog=None, only_errors=False, fmt="dict",
             "message", "validator", "validator_value", "error_code".
 
         """
-        catalog = readers.read_catalog(catalog) if catalog else self
+        catalog = self._read_catalog(catalog) if catalog else self
 
         validation = self.validator.validate_catalog(catalog, only_errors)
         if export_path:
@@ -562,7 +565,7 @@ def catalog_report(self, catalog, harvest='none', report=None,
         """
 
         url = catalog if isinstance(catalog, string_types) else None
-        catalog = readers.read_catalog(catalog)
+        catalog = self._read_catalog(catalog)
 
         validation = self.validate_catalog(catalog)
         catalog_validation = validation["error"]["catalog"]
@@ -799,7 +802,7 @@ def generate_harvestable_catalogs(self, catalogs, harvest='all',
         if isinstance(catalogs, string_types + (dict,)):
             catalogs = [catalogs]
 
-        harvestable_catalogs = [readers.read_catalog(c) for c in catalogs]
+        harvestable_catalogs = [self._read_catalog(c) for c in catalogs]
         catalogs_urls = [catalog if isinstance(catalog, string_types)
                          else None for catalog in catalogs]
 
@@ -871,7 +874,7 @@ def generate_datasets_summary(self, catalog, export_path=None):
             list: Contiene tantos dicts como datasets estén presentes en
             `catalogs`, con los datos antes mencionados.
         """
-        catalog = readers.read_catalog(catalog)
+        catalog = self._read_catalog(catalog)
 
         # Trato de leer todos los datasets bien formados de la lista
         # catalog["dataset"], si existe.
@@ -1012,7 +1015,7 @@ def _count_fields_recursive(self, dataset, fields):
         return key_count
 
     def dataset_is_updated(self, catalog, dataset):
-        catalog = readers.read_catalog(catalog)
+        catalog = self._read_catalog(catalog)
 
         for catalog_dataset in catalog.get('dataset', []):
             if catalog_dataset.get('title') == dataset:
@@ -1093,6 +1096,9 @@ def make_catalogs_backup(self, catalogs=None,
         # TODO: implementar función
         pass
 
+    def _read_catalog(self, catalog):
+        return readers.read_catalog(catalog, verify=self.verify_ssl)
+
 
 def main():
     """Permite ejecutar el módulo por línea de comandos.

diff --git a/pydatajson/readers.py b/pydatajson/readers.py
@@ -52,7 +52,8 @@ def read_catalog_obj(catalog):
         return pydatajson.DataJson(catalog)
 
 
-def read_catalog(catalog, default_values=None, catalog_format=None):
+def read_catalog(catalog, default_values=None, catalog_format=None,
+                 verify=False):
     """Toma una representación cualquiera de un catálogo, y devuelve su
     representación interna (un diccionario de Python con su metadata.)
 
@@ -86,13 +87,13 @@ def read_catalog(catalog, default_values=None, catalog_format=None):
             catalog_format = catalog_format or suffix
         if catalog_format == "xlsx":
             try:
-                catalog_dict = read_xlsx_catalog(catalog)
+                catalog_dict = read_xlsx_catalog(catalog, verify=verify)
             except openpyxl_exceptions + \
                     (ValueError, AssertionError, IOError, BadZipfile) as e:
                 raise ce.NonParseableCatalog(catalog, str(e))
         elif catalog_format == "json":
             try:
-                catalog_dict = read_json(catalog)
+                catalog_dict = read_json(catalog, verify=verify)
             except(ValueError, TypeError, IOError) as e:
                 raise ce.NonParseableCatalog(catalog, str(e))
         elif catalog_format == "ckan":
@@ -182,7 +183,7 @@ def _set_default_value(dict_obj, keys, value):
             variable[keys[-1]] = value
 
 
-def read_json(json_path_or_url):
+def read_json(json_path_or_url, verify=False):
     """Toma el path a un JSON y devuelve el diccionario que representa.
 
     Se asume que el parámetro es una URL si comienza con 'http' o 'https', o
@@ -200,7 +201,7 @@ def read_json(json_path_or_url):
 
     parsed_url = urlparse(json_path_or_url)
     if parsed_url.scheme in ["http", "https"]:
-        res = requests.get(json_path_or_url, verify=False)
+        res = requests.get(json_path_or_url, verify=verify)
         json_dict = json.loads(res.content, encoding='utf-8')
 
     else:
@@ -218,7 +219,7 @@ def read_json(json_path_or_url):
     return json_dict
 
 
-def read_xlsx_catalog(xlsx_path_or_url, logger=None):
+def read_xlsx_catalog(xlsx_path_or_url, logger=None, verify=False):
     """Toma el path a un catálogo en formato XLSX y devuelve el diccionario
     que representa.
 
@@ -238,7 +239,7 @@ def read_xlsx_catalog(xlsx_path_or_url, logger=None):
 
     parsed_url = urlparse(xlsx_path_or_url)
     if parsed_url.scheme in ["http", "https"]:
-        res = requests.get(xlsx_path_or_url, verify=False)
+        res = requests.get(xlsx_path_or_url, verify=verify)
         tmpfilename = ".tmpfile.xlsx"
         with io.open(tmpfilename, 'wb') as tmpfile:
             tmpfile.write(res.content)