Skip to content

Commit

Permalink
Refactor de manejo de mensajes de error. Agrega logger 'pydatajson' p…
Browse files Browse the repository at this point in the history
…ara uso general.
  • Loading branch information
federicotdn committed Jun 11, 2018
1 parent 5947bf3 commit 84a75ef
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 65 deletions.
8 changes: 8 additions & 0 deletions pydatajson/__init__.py
Expand Up @@ -9,7 +9,15 @@
from .core import DataJson
from .helpers import parse_repeating_time_interval
from . import helpers
import logging

__author__ = """Datos Argentina"""
__email__ = 'datos@modernizacion.gob.ar'
__version__ = '0.4.15'

"""
Logger base para librería pydatajson
https://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library
"""
logger = logging.getLogger('pydatajson')
logger.addHandler(logging.NullHandler())
24 changes: 13 additions & 11 deletions pydatajson/backup.py
Expand Up @@ -8,14 +8,16 @@
from __future__ import with_statement
import os
import traceback
from pprint import pprint
import logging

import pydatajson
from .helpers import ensure_dir_exists
from .download import download_to_file

CATALOGS_DIR = ""

logger = logging.getLogger('pydatajson')


def make_catalogs_backup(catalogs, local_catalogs_dir="",
include_metadata=True, include_data=True,
Expand Down Expand Up @@ -55,9 +57,8 @@ def make_catalogs_backup(catalogs, local_catalogs_dir="",
include_metadata=include_metadata,
include_metadata_xlsx=include_metadata_xlsx,
include_data=include_data)
except Exception as e:
print("ERROR en {}".format(catalog))
traceback.print_exc()
except Exception:
logger.exception("ERROR en {}".format(catalog))

elif isinstance(catalogs, dict):
for catalog_id, catalog in catalogs.iteritems():
Expand All @@ -68,9 +69,9 @@ def make_catalogs_backup(catalogs, local_catalogs_dir="",
include_metadata=include_metadata,
include_metadata_xlsx=include_metadata_xlsx,
include_data=include_data)
except Exception as e:
print("ERROR en {} ({})".format(catalog, catalog_id))
traceback.print_exc()
except Exception:
logger.exception(
"ERROR en {} ({})".format(catalog, catalog_id))


def make_catalog_backup(catalog, catalog_id=None, local_catalogs_dir="",
Expand Down Expand Up @@ -100,8 +101,9 @@ def make_catalog_backup(catalog, catalog_id=None, local_catalogs_dir="",
catalog_identifier = catalog_id if catalog_id else catalog["identifier"]

if include_metadata:
print("Descargando catálogo {}".format(catalog_identifier.ljust(30)),
end="\r")
logger.info(
"Descargando catálogo {}".format(
catalog_identifier.ljust(30)))

# catálogo en json
catalog_path = get_catalog_path(catalog_identifier, local_catalogs_dir)
Expand All @@ -120,8 +122,8 @@ def make_catalog_backup(catalog, catalog_id=None, local_catalogs_dir="",
distributions_num = len(distributions)

for index, distribution in enumerate(distributions):
print("Descargando distribución {} de {} ({})".format(
index + 1, distributions_num, catalog_identifier), end="\r")
logger.info("Descargando distribución {} de {} ({})".format(
index + 1, distributions_num, catalog_identifier))

dataset_id = distribution["dataset_identifier"]
distribution_id = distribution["identifier"]
Expand Down
47 changes: 23 additions & 24 deletions pydatajson/ckan_reader.py
Expand Up @@ -17,6 +17,8 @@
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

logger = logging.getLogger('pydatajson')


ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(ABSOLUTE_PROJECT_DIR, "schemas",
Expand All @@ -29,9 +31,6 @@
RAW_SUPER_THEMES = json.load(super_themes)
SUPER_THEMES = {row["label"]: row["id"] for row in RAW_SUPER_THEMES}

logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S')


def read_ckan_catalog(portal_url):
"""Convierte los metadatos de un portal disponibilizados por la Action API
Expand Down Expand Up @@ -61,7 +60,7 @@ def read_ckan_catalog(portal_url):
for index, pkg in enumerate(packages_list):
# progreso (necesario cuando son muchos)
msg = "Leyendo dataset {} de {}".format(index + 1, num_packages)
print(msg, end="\r")
logger.info(msg)

# agrega un nuevo dataset a la lista
packages.append(portal.call_action(
Expand All @@ -81,7 +80,7 @@ def read_ckan_catalog(portal_url):
catalog["themeTaxonomy"] = map_groups_to_themes(groups)

except:
logging.error(
logger.exception(
'Error al procesar el portal %s', portal_url, exc_info=True)

return catalog
Expand All @@ -101,7 +100,7 @@ def map_status_to_catalog(status):
try:
catalog[catalog_key] = status[status_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en el endpoint de status. No se puede completar
catalog['%s'].""", status_key, catalog_key)

Expand All @@ -116,11 +115,11 @@ def map_status_to_catalog(status):
try:
catalog['publisher'][publisher_key] = status[status_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en el endpoint de status. No se puede completar
catalog['publisher'['%s'].""", status_key, publisher_key)
else:
logging.info("""
logger.info("""
No hay ninguna información sobre catalog['publisher'] en el endpoint
de 'status'.""")

Expand Down Expand Up @@ -158,7 +157,7 @@ def map_package_to_dataset(package, portal_url):
try:
dataset[dataset_key] = package[package_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en el endpoint 'package_show' para el package '%s'. No
se puede completar dataset['%s'].""",
package_key, package['name'], dataset_key)
Expand All @@ -174,7 +173,7 @@ def map_package_to_dataset(package, portal_url):
try:
dataset['publisher'][publisher_key] = package[package_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en el endpoint 'package_show' para el package '%s'. No
se puede completar dataset['publisher']['%s'].""",
package_key, package['name'], publisher_key)
Expand All @@ -190,7 +189,7 @@ def map_package_to_dataset(package, portal_url):
try:
dataset['contactPoint'][contact_key] = package[package_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en el endpoint 'package_show' para el package '%s'. No
se puede completar dataset['contactPoint']['%s'].""",
package_key, package['name'], contact_key)
Expand Down Expand Up @@ -219,15 +218,15 @@ def add_temporal(dataset, package):
]

if len(temporal) > 1:
logging.info("""
logger.info("""
Se encontro mas de un valor de cobertura temporal en 'extras' para el
'package' '%s'. No se puede completar dataset['temporal'].\n %s""",
package['name'], temporal)
elif len(temporal) == 1:
try:
dataset["temporal"] = temporal[0]
except KeyError:
logging.warn("""
logger.exception("""
Se encontró '%s' como cobertura temporal, pero no es mapeable a un
'temporal' conocido. La clave no se pudo completar.""", temporal[0])

Expand All @@ -239,7 +238,7 @@ def add_temporal(dataset, package):
extra["key"] != "Cobertura temporal"]

if almost_temporal:
logging.warn("""
logger.warn("""
Se encontraron claves con nombres similares pero no idénticos a
"Cobertura temporal" en 'extras' para el 'package' '%s'. Por favor, considere
corregirlas:
Expand All @@ -254,20 +253,20 @@ def add_superTheme(dataset, package):
]

if len(super_theme) == 0:
logging.info("""
logger.info("""
No se encontraron valores de temática global en 'extras' para el
'package' '%s'. No se puede completar dataset['superTheme'].""",
package['name'])
elif len(super_theme) > 1:
logging.info("""
logger.info("""
Se encontro mas de un valor de temática global en 'extras' para el
'package' '%s'. No se puede completar dataset['superTheme'].\n %s""",
package['name'], super_theme)
else:
try:
dataset["superTheme"] = [SUPER_THEMES[super_theme[0]]]
except KeyError:
logging.warn("""
logger.exception("""
Se encontró '%s' como temática global, pero no es mapeable a un
'superTheme' conocido. La clave no se pudo completar.""", super_theme[0])

Expand All @@ -279,7 +278,7 @@ def add_superTheme(dataset, package):
extra["key"] != "Temática global"]

if almost_super_theme:
logging.warn("""
logger.warn("""
Se encontraron claves con nombres similares pero no idénticos a "Temática
global" en 'extras' para el 'package' '%s'. Por favor, considere corregirlas:
\n%s""", package['name'], almost_accrual)
Expand All @@ -294,20 +293,20 @@ def add_accrualPeriodicity(dataset, package):
]

if len(accrual) == 0:
logging.info("""
logger.info("""
No se encontraron valores de frecuencia de actualización en 'extras' para el
'package' '%s'. No se puede completar dataset['accrualPeriodicity'].""",
package['name'])
elif len(accrual) > 1:
logging.info("""
logger.info("""
Se encontro mas de un valor de frecuencia de actualización en 'extras' para el
'package' '%s'. No se puede completar dataset['accrualPeriodicity'].\n %s""",
package['name'], accrual)
else:
try:
dataset["accrualPeriodicity"] = FREQUENCIES[accrual[0]]
except KeyError:
logging.warn("""
logger.exception("""
Se encontró '%s' como frecuencia de actualización, pero no es mapeable a una
'accrualPeriodicity' conocida. La clave no se pudo completar.""", accrual[0])

Expand All @@ -319,7 +318,7 @@ def add_accrualPeriodicity(dataset, package):
extra["key"] != "Frecuencia de actualización"]

if almost_accrual:
logging.warn("""
logger.warn("""
Se encontraron claves con nombres similares pero no idénticos a "Frecuencia de
actualización" en 'extras' para el 'package' '%s'. Por favor, considere
corregirlas:\n%s""", package['name'], almost_accrual)
Expand Down Expand Up @@ -351,7 +350,7 @@ def map_resource_to_distribution(resource, portal_url):
try:
distribution[distribution_key] = resource[resource_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en la metadata del 'resource' '%s'. No
se puede completar distribution['%s'].""",
resource_key, resource['name'], distribution_key)
Expand Down Expand Up @@ -382,7 +381,7 @@ def map_group_to_theme(group):
try:
theme[theme_key] = group[group_key]
except:
logging.info("""
logger.exception("""
La clave '%s' no está en la metadata del 'group' '%s'. No
se puede completar theme['%s'].""",
group_key, theme['name'], theme_key)
Expand Down
7 changes: 5 additions & 2 deletions pydatajson/ckan_utils.py
Expand Up @@ -4,11 +4,14 @@

import json
import re
import logging
from datetime import time
from dateutil import parser, tz
from .helpers import title_to_name
from . import custom_exceptions as ce

logger = logging.getLogger('pydatajson')


def append_attribute_to_extra(package, dataset, attribute, serialize=False):
value = dataset.get(attribute)
Expand Down Expand Up @@ -83,8 +86,8 @@ def map_dataset_to_package(catalog, dataset, owner_org, catalog_id=None,
try:
label = _get_theme_label(catalog, theme)
package['tags'].append({'name': label})
except Exception as e:
print(e)
except Exception:
logger.exception('Theme no presente en catálogo.')
continue
else:
package['groups'] = package.get('groups', []) + [
Expand Down
18 changes: 11 additions & 7 deletions pydatajson/core.py
Expand Up @@ -17,6 +17,7 @@
import re
import sys
import warnings
import logging
from collections import OrderedDict
from datetime import datetime

Expand All @@ -36,6 +37,9 @@
from . import transformation
from . import backup

logger = logging.getLogger('pydatajson')


ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
CENTRAL_CATALOG = "http://datos.gob.ar/data.json"
DATA_FORMATS = [
Expand Down Expand Up @@ -182,11 +186,11 @@ def remove_dataset(self, identifier):
for index, dataset in enumerate(self["dataset"]):
if dataset["identifier"] == identifier:
self["dataset"].pop(index)
print("Dataset {} en posicion {} fue eliminado.".format(
logger.info("Dataset {} en posicion {} fue eliminado.".format(
identifier, index))
return

print("No se encontro el dataset {}.".format(identifier))
logger.warning("No se encontro el dataset {}.".format(identifier))

def remove_distribution(self, identifier, dataset_identifier=None):
for dataset in self["dataset"]:
Expand All @@ -195,11 +199,11 @@ def remove_distribution(self, identifier, dataset_identifier=None):
(not dataset_identifier or
dataset["identifier"] == dataset_identifier)):
dataset["distribution"].pop(index)
print("Distribution {} del dataset {} en posicion {} fue eliminada.".format(
logger.info("Distribution {} del dataset {} en posicion {} fue eliminada.".format(
identifier, dataset["identifier"], index))
return

print("No se encontro la distribucion {}.".format(identifier))
logger.warning("No se encontro la distribucion {}.".format(identifier))

def is_valid_catalog(self, catalog=None):
catalog = catalog or self
Expand Down Expand Up @@ -1157,13 +1161,13 @@ def main():
full_res = dj_instance.validate_catalog(datajson_file)
pretty_full_res = json.dumps(
full_res, indent=4, separators=(",", ": "))
print(bool_res)
print(pretty_full_res)
logger.info(bool_res)
logger.info(pretty_full_res)
except IndexError as errmsg:
format_str = """
{}: pydatajson.py fue ejecutado como script sin proveer un argumento
"""
print(format_str.format(errmsg))
logger.error(format_str.format(errmsg))


if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions pydatajson/federation.py
Expand Up @@ -11,7 +11,7 @@
from .ckan_utils import map_dataset_to_package, map_theme_to_group
from .search import get_datasets

logger = logging.getLogger(__name__)
logger = logging.getLogger('pydatajson.federation')


def push_dataset_to_ckan(catalog, owner_org, dataset_origin_identifier,
Expand Down Expand Up @@ -86,9 +86,9 @@ def remove_harvested_ds_from_ckan(catalog, portal_url, apikey,
for harvested_id in harvested_ids:
try:
remove_dataset_from_ckan(harvested_id, portal_url, apikey)
print("{} eliminado de {}".format(harvested_id, catalog_id))
except:
print("{} de {} no existe.".format(harvested_id, catalog_id))
logger.info("{} eliminado de {}".format(harvested_id, catalog_id))
except Exception:
logger.exception("{} de {} no existe.".format(harvested_id, catalog_id))


def remove_datasets_from_ckan(portal_url, apikey, filter_in=None,
Expand Down

0 comments on commit 84a75ef

Please sign in to comment.