Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

273 frecuencias formatos uppercase #278

Merged
merged 8 commits into from
Sep 16, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions pydatajson/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,22 @@ def filter_by_likely_publisher(central_datasets, catalog_datasets):
def title_in_dataset_list(dataset, dataset_list):
return (dataset.get('title'), dataset.get('landingPage')) \
in dataset_list


def fields_to_uppercase(fields):
uppercase_fields = fields.copy()

for key in fields:
lower_key = key.lower()
upper_key = key.upper()
# Veo si una key existe en minuscula y en mayuscula,
# tanto en el dict original como en la copia que devuelve
if all(k in fields for k in (lower_key, upper_key)) and \
all(k in uppercase_fields for k in (lower_key, upper_key)):
lowercase_counts = fields[lower_key]
uppercase_counts = fields[upper_key]

uppercase_fields.pop(lower_key)
uppercase_fields[upper_key] = lowercase_counts + uppercase_counts

return uppercase_fields
14 changes: 8 additions & 6 deletions pydatajson/indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@
from __future__ import print_function, absolute_import
from __future__ import unicode_literals, with_statement

import logging
import json
import logging
import os
from datetime import datetime
from collections import Counter
from datetime import datetime

from six import string_types

from pydatajson.helpers import fields_to_uppercase
from . import helpers
from . import readers
from .indicator_generators import FederationIndicatorsGenerator
from .reporting import generate_datasets_summary
from .search import get_datasets, get_distributions
from .indicator_generators import FederationIndicatorsGenerator

CENTRAL_CATALOG = "http://datos.gob.ar/data.json"
ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -133,6 +134,7 @@ def _generate_indicators(catalog, validator=None, only_numeric=False):
if not only_numeric:
if 'dataset' in catalog:
format_count = count_fields(get_distributions(catalog), 'format')
format_count = fields_to_uppercase(format_count)
type_count = count_fields(get_distributions(catalog), 'type')
license_count = count_fields(get_datasets(catalog), 'license')
else:
Expand Down Expand Up @@ -389,10 +391,10 @@ def _generate_date_indicators(catalog, tolerance=0.2, only_numeric=False):
if not periodicity:
continue
# Si la periodicity es eventual, se considera como actualizado
if periodicity == 'eventual':
if periodicity in ('eventual', 'EVENTUAL'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Esto podría ser un método privado, solo por legibilidad. if eventual_periodicity(periodicity): ...

actualizados += 1
prev_periodicity = periodicity_amount.get(periodicity, 0)
periodicity_amount[periodicity] = prev_periodicity + 1
prev_periodicity = periodicity_amount.get('EVENTUAL', 0)
periodicity_amount['EVENTUAL'] = prev_periodicity + 1
continue

# dataset sin fecha de última actualización es desactualizado
Expand Down
21 changes: 19 additions & 2 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@

"""Tests del modulo pydatajson."""

from __future__ import unicode_literals
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import with_statement

import os.path
import unittest

import nose
import openpyxl as pyxl

from pydatajson.helpers import fields_to_uppercase
from .context import pydatajson


Expand Down Expand Up @@ -164,7 +167,6 @@ def test_parse_repeating_time_interval_to_str(self):

def test_add_dicts(self):
# Testea la función auxiliar para sumar campos de dicts recursivamente
from pydatajson.helpers import add_dicts

one_dict = {
"distribuciones_formatos_cant": {
Expand Down Expand Up @@ -221,6 +223,21 @@ def test_title_to_name(self):
"exportacion-tomates-ano-2017"
)

def test_fields_to_uppercase_returns_unique_uppercase_keys(self):
fields = {
'csv': 10,
'CSV': 20,
'json': 30,
'JSON': 40
}

expected = {
'CSV': 30,
'JSON': 70
}

self.assertEqual(fields_to_uppercase(fields), expected)


if __name__ == '__main__':
nose.run(defaultTest=__name__)
2 changes: 1 addition & 1 deletion tests/test_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_date_indicators(self):
'datasets_frecuencia_cant': {
'R/P1W': 1,
'R/P1M': 1,
'eventual': 1
'EVENTUAL': 1
},
}

Expand Down