Skip to content

Commit

Permalink
Merge pull request #258 from datosgobar/255-refactor-validations
Browse files Browse the repository at this point in the history
Refactor de validaciones
  • Loading branch information
lucaslavandeira committed May 27, 2019
2 parents fcdbe9f + 71a4aa1 commit fb96e26
Show file tree
Hide file tree
Showing 8 changed files with 355 additions and 357 deletions.
32 changes: 18 additions & 14 deletions pydatajson/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
from six import string_types, iteritems
from six.moves.urllib_parse import urljoin

from pydatajson.response_formatters import format_response
from pydatajson.validation import Validator, \
DEFAULT_CATALOG_SCHEMA_FILENAME, ABSOLUTE_SCHEMA_DIR
from . import documentation
from . import helpers
from . import indicators
from . import readers
from . import search
from . import time_series
from . import validation
from . import writers
from . import federation
from . import transformation
Expand All @@ -37,7 +39,6 @@

logger = logging.getLogger('pydatajson')


ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
CENTRAL_CATALOG = "http://datos.gob.ar/data.json"
MIN_DATASET_TITLE = 10
Expand All @@ -51,7 +52,8 @@ class DataJson(dict):
CATALOG_FIELDS_PATH = os.path.join(ABSOLUTE_PROJECT_DIR, "fields")

def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
default_values=None, catalog_format=None):
default_values=None, catalog_format=None,
validator_class=Validator):
"""Lee un catálogo y crea un objeto con funciones para manipularlo.
Salvo que se indique lo contrario, se utiliza como default el schema
Expand Down Expand Up @@ -95,17 +97,15 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,

else:
self.has_catalog = False
schema_filename = schema_filename or DEFAULT_CATALOG_SCHEMA_FILENAME
schema_dir = schema_dir or ABSOLUTE_SCHEMA_DIR

self.validator = validation.create_validator(
schema_filename, schema_dir)
self.validator = validator_class(schema_filename, schema_dir)

# asigno docstrings de los métodos modularizados
fn_doc = indicators.generate_catalogs_indicators.__doc__
self.generate_catalogs_indicators.__func__.__doc__ = fn_doc

fn_doc = validation.is_valid_catalog.__doc__
self.is_valid_catalog.__func__.__doc__ = fn_doc

# metodos para buscar entidades cuando DataJson tiene catalogo cargado
get_themes = search.get_themes
themes = property(get_themes)
Expand Down Expand Up @@ -242,8 +242,8 @@ def is_valid_catalog(self, catalog=None):
Returns:
bool: True si el data.json cumple con el schema, sino False.
"""
catalog = catalog or self
return validation.is_valid_catalog(catalog, validator=self.validator)
catalog = readers.read_catalog(catalog) if catalog else self
return self.validator.is_valid(catalog)

@staticmethod
def _update_validation_response(error, response):
Expand Down Expand Up @@ -333,9 +333,13 @@ def validate_catalog(self, catalog=None, only_errors=False, fmt="dict",
"message", "validator", "validator_value", "error_code".
"""
catalog = catalog or self
return validation.validate_catalog(
catalog, only_errors, fmt, export_path, validator=self.validator)
catalog = readers.read_catalog(catalog) if catalog else self

validation = self.validator.validate_catalog(catalog, only_errors)
if export_path:
fmt = 'table'

return format_response(validation, export_path, fmt)

@staticmethod
def _stringify_list(str_or_list):
Expand Down Expand Up @@ -634,7 +638,7 @@ def generate_datasets_report(
catalog_ids.append(catalog.get("identifier", ""))
if isinstance(catalog_ids, string_types + (dict,)):
catalog_ids = [catalog_ids] * len(catalogs)
if not catalog_orgs or\
if not catalog_orgs or \
isinstance(catalog_orgs, string_types + (dict,)):
catalog_orgs = [catalog_orgs] * len(catalogs)
if not catalog_homepages or isinstance(catalog_homepages,
Expand Down
4 changes: 4 additions & 0 deletions pydatajson/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,7 @@ def __init__(self, distribution_id, distribution_downloadURL, status_code):
msg = "Distribution ({}) con URL descarga ({}) inválida ({})"
super(DownloadURLBrokenError, self).__init__(msg.format(
distribution_id, distribution_downloadURL, status_code))


class FormatNameError(ValueError):
pass
22 changes: 22 additions & 0 deletions pydatajson/response_formatters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

from pydatajson import custom_exceptions

from pydatajson.response_formatters.dict_formatter import DictFormatter
from pydatajson.response_formatters.list_formatter import ListFormatter
from pydatajson.response_formatters.tables_formatter import TablesFormatter


def format_response(validation, export_path, response_format):
formats = {
'table': TablesFormatter(validation, export_path),
'dict': DictFormatter(validation),
'list': ListFormatter(validation),
}
try:
return formats[response_format].format()
except KeyError:
msg = "No se reconoce el formato {}".format(response_format)
raise custom_exceptions.FormatNameError(msg)
12 changes: 12 additions & 0 deletions pydatajson/response_formatters/dict_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

from pydatajson.response_formatters.validation_response_formatter import \
ValidationResponseFormatter


class DictFormatter(ValidationResponseFormatter):

def format(self):
return self.response
58 changes: 58 additions & 0 deletions pydatajson/response_formatters/list_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

from pydatajson.response_formatters.validation_response_formatter import \
ValidationResponseFormatter


class ListFormatter(ValidationResponseFormatter):

def format(self):
rows_catalog = []
validation_result = {
"catalog_title": self.response["error"]["catalog"]["title"],
"catalog_status": self.response["error"]["catalog"]["status"],
}
for error in self.response["error"]["catalog"]["errors"]:
catalog_result = dict(validation_result)
catalog_result.update({
"catalog_error_message": error["message"],
"catalog_error_location": ", ".join(error["path"]),
})
rows_catalog.append(catalog_result)

if len(self.response["error"]["catalog"]["errors"]) == 0:
catalog_result = dict(validation_result)
catalog_result.update({
"catalog_error_message": None,
"catalog_error_location": None
})
rows_catalog.append(catalog_result)

# crea una lista de dicts para volcarse en una tabla (dataset)
rows_dataset = []
for dataset in self.response["error"]["dataset"]:
validation_result = {
"dataset_title": dataset["title"],
"dataset_identifier": dataset["identifier"],
"dataset_list_index": dataset["list_index"],
"dataset_status": dataset["status"]
}
for error in dataset["errors"]:
dataset_result = dict(validation_result)
dataset_result.update({
"dataset_error_message": error["message"],
"dataset_error_location": error["path"][-1]
})
rows_dataset.append(dataset_result)

if len(dataset["errors"]) == 0:
dataset_result = dict(validation_result)
dataset_result.update({
"dataset_error_message": None,
"dataset_error_location": None
})
rows_dataset.append(dataset_result)

return {"catalog": rows_catalog, "dataset": rows_dataset}
53 changes: 53 additions & 0 deletions pydatajson/response_formatters/tables_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

from openpyxl.styles import Alignment, Font

from pydatajson import writers
from pydatajson.response_formatters.list_formatter import ListFormatter
from pydatajson.response_formatters.validation_response_formatter import\
ValidationResponseFormatter


class TablesFormatter(ValidationResponseFormatter):

def __init__(self, response, export_path):
super(TablesFormatter, self).__init__(response)
self.export_path = export_path

def format(self):
validation_lists = ListFormatter(self.response).format()

column_styles = {
"catalog": {
"catalog_status": {"width": 20},
"catalog_error_location": {"width": 40},
"catalog_error_message": {"width": 40},
"catalog_title": {"width": 20},
},
"dataset": {
"dataset_error_location": {"width": 20},
"dataset_identifier": {"width": 40},
"dataset_status": {"width": 20},
"dataset_title": {"width": 40},
"dataset_list_index": {"width": 20},
"dataset_error_message": {"width": 40},
}
}
cell_styles = {
"catalog": [
{"alignment": Alignment(vertical="center")},
{"row": 1, "font": Font(bold=True)},
],
"dataset": [
{"alignment": Alignment(vertical="center")},
{"row": 1, "font": Font(bold=True)},
]
}

# crea tablas en un sólo excel o varios CSVs
writers.write_tables(
tables=validation_lists, path=self.export_path,
column_styles=column_styles, cell_styles=cell_styles
)
14 changes: 14 additions & 0 deletions pydatajson/response_formatters/validation_response_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import abc


class ValidationResponseFormatter(object):

def __init__(self, response):
self.response = response

@abc.abstractmethod
def format(self):
raise NotImplementedError

0 comments on commit fb96e26

Please sign in to comment.