Skip to content

Commit

Permalink
Agrego métodos de búsqueda de series de tiempo en un catálogo (`get_t…
Browse files Browse the repository at this point in the history
…ime_series()`) y un parámetro `only_time_series=True or False` para filtrar datasets y distribuciones en sus métodos de búsqueda (`get_datasets(only_time_series=True)` devuelve sólo aquellos datasets que tengan alguna serie de tiempo).
  • Loading branch information
abenassi committed Oct 31, 2017
1 parent a2f1bdd commit 4f60886
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 15 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
@@ -1,6 +1,11 @@
History
=======

0.3.19 (2017-10-31)
-------------------

* Agrego métodos de búsqueda de series de tiempo en un catálogo (`get_time_series()`) y un parámetro `only_time_series=True or False` para filtrar datasets y distribuciones en sus métodos de búsqueda (`get_datasets(only_time_series=True)` devuelve sólo aquellos datasets que tengan alguna serie de tiempo).

0.3.18 (2017-10-19)
-------------------

Expand Down
2 changes: 1 addition & 1 deletion pydatajson/__init__.py
Expand Up @@ -10,4 +10,4 @@

__author__ = """Datos Argentina"""
__email__ = 'datos@modernizacion.gob.ar'
__version__ = '0.3.18'
__version__ = '0.3.19'
2 changes: 2 additions & 0 deletions pydatajson/core.py
Expand Up @@ -94,6 +94,8 @@ def __init__(self, catalog=None, schema_filename=None, schema_dir=None,
distributions = property(get_distributions)
get_fields = search.get_fields
fields = property(get_fields)
get_time_series = search.get_time_series
time_series = property(get_time_series)
get_dataset = search.get_dataset
get_distribution = search.get_distribution
get_field = search.get_field
Expand Down
27 changes: 19 additions & 8 deletions pydatajson/search.py
Expand Up @@ -14,6 +14,7 @@
from functools import partial

from time_series import distribution_has_time_index, dataset_has_time_series
from time_series import field_is_time_series
from readers import read_catalog
import custom_exceptions as ce

Expand Down Expand Up @@ -101,21 +102,26 @@ def get_distributions(catalog, filter_in=None, filter_out=None,
return filtered_distributions


def get_fields(catalog, filter_in=None, filter_out=None, meta_field=None):
def get_fields(catalog, filter_in=None, filter_out=None, meta_field=None,
only_time_series=False):
filter_in = filter_in or {}
filter_out = filter_out or {}
catalog = read_catalog(catalog)

fields = []
for distribution in get_distributions(catalog, filter_in, filter_out):
for distribution in get_distributions(catalog, filter_in, filter_out,
only_time_series=only_time_series):
if "field" in distribution and isinstance(distribution["field"], list):
for field in distribution["field"]:
# agrega el id del dataset
field["dataset_identifier"] = distribution[
"dataset_identifier"]
# agrega el id de la distribución
field["distribution_identifier"] = distribution["identifier"]
fields.append(field)
if not only_time_series or field_is_time_series(field,
distribution):
# agrega el id del dataset
field["dataset_identifier"] = distribution[
"dataset_identifier"]
# agrega el id de la distribución
field["distribution_identifier"] = distribution[
"identifier"]
fields.append(field)

filtered_fields = filter(
lambda x: _filter_dictionary(
Expand All @@ -130,6 +136,11 @@ def get_fields(catalog, filter_in=None, filter_out=None, meta_field=None):
return filtered_fields


def get_time_series(catalog, **kwargs):
kwargs["only_time_series"] = True
return get_fields(catalog, **kwargs)


def get_dataset(catalog, identifier=None, title=None):
msg = "Se requiere un 'identifier' o 'title' para buscar el dataset."
assert identifier or title, msg
Expand Down
16 changes: 16 additions & 0 deletions pydatajson/time_series.py
Expand Up @@ -13,6 +13,22 @@
import os


def field_is_time_series(field, distribution=None):
field_may_be_ts = (
not field.get("specialType") and
not field.get("specialTypeDetail") and
(
field.get("type", "").lower() == "number" or
field.get("type", "").lower() == "integer"
) and
field.get("id")
)
distribution_may_has_ts = (
not distribution or distribution_has_time_index(distribution)
)
return field_may_be_ts and distribution_may_has_ts


def distribution_has_time_index(distribution):
for field in distribution.get('field', []):
if field.get('specialType') == 'time_index':
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -18,7 +18,7 @@

setup(
name='pydatajson',
version='0.3.18',
version='0.3.19',
description="Paquete en python con herramientas para generar y validar metadatos de catálogos de datos en formato data.json.",
long_description=readme + '\n\n' + history,
author="Datos Argentina",
Expand Down
3 changes: 1 addition & 2 deletions tests/results/distributions_only_time_series.json
Expand Up @@ -61,15 +61,14 @@
"scrapingIdentifierCell": "F46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"id": "1.2_DGCP_D_1993_T_27",
"scrapingDataStartCell": "F47"
},
{
"description": "Inversion bruta interna fija, en millones de pesos de 1993 y valores anuales.",
"title": "demanda_global_consumo_publico",
"scrapingIdentifierCell": "G46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"type": "string",
"id": "1.2_DGCP_D_1993_T_30",
"scrapingDataStartCell": "G47"
}
Expand Down
46 changes: 46 additions & 0 deletions tests/results/time_series.json
@@ -0,0 +1,46 @@
[
{
"distribution_identifier": "1.2",
"description": "PIB a precios de comprador, en millones de pesos de 1993 y valores anuales.",
"title": "oferta_global_pib",
"dataset_identifier": "1",
"scrapingIdentifierCell": "B46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"id": "1.2_OGP_D_1993_T_17",
"scrapingDataStartCell": "B47"
},
{
"distribution_identifier": "1.2",
"description": "Importación a precios de comprador, en millones de pesos de 1993 y valores anuales.",
"title": "oferta_global_importacion",
"dataset_identifier": "1",
"scrapingIdentifierCell": "C46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"id": "1.2_OGI_D_1993_T_25",
"scrapingDataStartCell": "C47"
},
{
"distribution_identifier": "1.2",
"description": "Oferta global total a precios de comprador, en millones de pesos de 1993 y valores anuales.",
"title": "demanda_global_exportacion",
"dataset_identifier": "1",
"scrapingIdentifierCell": "D46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"id": "1.2_DGE_D_1993_T_26",
"scrapingDataStartCell": "D47"
},
{
"distribution_identifier": "1.2",
"description": "Consumo privado, en millones de pesos de 1993 y valores anuales.",
"title": "demanda_global_ibif",
"dataset_identifier": "1",
"scrapingIdentifierCell": "E46",
"units": "Millones de pesos a precios de 1993",
"type": "number",
"id": "1.2_DGI_D_1993_T_19",
"scrapingDataStartCell": "E47"
}
]
5 changes: 2 additions & 3 deletions tests/samples/time_series_data.json
Expand Up @@ -607,13 +607,12 @@
"type": "number",
"scrapingIdentifierCell": "F46",
"scrapingDataStartCell": "F47",
"units": "Millones de pesos a precios de 1993",
"id": "1.2_DGCP_D_1993_T_27"
"units": "Millones de pesos a precios de 1993"
},
{
"description": "Inversion bruta interna fija, en millones de pesos de 1993 y valores anuales.",
"title": "demanda_global_consumo_publico",
"type": "number",
"type": "string",
"scrapingIdentifierCell": "G46",
"scrapingDataStartCell": "G47",
"units": "Millones de pesos a precios de 1993",
Expand Down
6 changes: 6 additions & 0 deletions tests/test_search.py
Expand Up @@ -77,6 +77,12 @@ def test_fields(self, expected_result):
pprint(fields)
self.assertEqual(expected_result, fields)

@load_expected_result()
def test_time_series(self, expected_result):
time_series = pydatajson.search.get_time_series(self.catalog_ts)
pprint(time_series)
self.assertEqual(expected_result, time_series)

@load_expected_result()
def test_datasets_filter_in(self, expected_result):
datasets = pydatajson.search.get_datasets(
Expand Down

0 comments on commit 4f60886

Please sign in to comment.