Skip to content

Commit

Permalink
Merge 83ec01c into b17d9ca
Browse files Browse the repository at this point in the history
  • Loading branch information
FScaccheri committed Sep 13, 2019
2 parents b17d9ca + 83ec01c commit 236c66c
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 9 deletions.
17 changes: 17 additions & 0 deletions pydatajson/helpers.py
Expand Up @@ -544,3 +544,20 @@ def filter_by_likely_publisher(central_datasets, catalog_datasets):
def title_in_dataset_list(dataset, dataset_list):
return (dataset.get('title'), dataset.get('landingPage')) \
in dataset_list


def fields_to_uppercase(fields):
uppercase_fields = fields.copy()

for key in fields:
lower_key = key.lower()
upper_key = key.upper()

if lower_key in fields and lower_key in uppercase_fields:
lowercase_counts = fields[lower_key]
uppercase_counts = fields.get(upper_key, 0)

uppercase_fields.pop(lower_key)
uppercase_fields[upper_key] = lowercase_counts + uppercase_counts

return uppercase_fields
18 changes: 12 additions & 6 deletions pydatajson/indicators.py
Expand Up @@ -10,19 +10,20 @@
from __future__ import print_function, absolute_import
from __future__ import unicode_literals, with_statement

import logging
import json
import logging
import os
from datetime import datetime
from collections import Counter
from datetime import datetime

from six import string_types

from pydatajson.helpers import fields_to_uppercase
from . import helpers
from . import readers
from .indicator_generators import FederationIndicatorsGenerator
from .reporting import generate_datasets_summary
from .search import get_datasets, get_distributions
from .indicator_generators import FederationIndicatorsGenerator

CENTRAL_CATALOG = "http://datos.gob.ar/data.json"
ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -133,6 +134,7 @@ def _generate_indicators(catalog, validator=None, only_numeric=False):
if not only_numeric:
if 'dataset' in catalog:
format_count = count_fields(get_distributions(catalog), 'format')
format_count = fields_to_uppercase(format_count)
type_count = count_fields(get_distributions(catalog), 'type')
license_count = count_fields(get_datasets(catalog), 'license')
else:
Expand Down Expand Up @@ -389,10 +391,10 @@ def _generate_date_indicators(catalog, tolerance=0.2, only_numeric=False):
if not periodicity:
continue
# Si la periodicity es eventual, se considera como actualizado
if periodicity == 'eventual':
if _eventual_periodicity(periodicity):
actualizados += 1
prev_periodicity = periodicity_amount.get(periodicity, 0)
periodicity_amount[periodicity] = prev_periodicity + 1
prev_periodicity = periodicity_amount.get('EVENTUAL', 0)
periodicity_amount['EVENTUAL'] = prev_periodicity + 1
continue

# dataset sin fecha de última actualización es desactualizado
Expand Down Expand Up @@ -566,3 +568,7 @@ def count_fields(targets, field):
"""Cuenta la cantidad de values en el key
especificado de una lista de diccionarios"""
return Counter([target.get(field) or 'None' for target in targets])


def _eventual_periodicity(periodicity):
return periodicity in ('eventual', 'EVENTUAL')
46 changes: 44 additions & 2 deletions tests/test_helpers.py
Expand Up @@ -3,14 +3,17 @@

"""Tests del modulo pydatajson."""

from __future__ import unicode_literals
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import with_statement

import os.path
import unittest

import nose
import openpyxl as pyxl

from pydatajson.helpers import fields_to_uppercase
from .context import pydatajson


Expand Down Expand Up @@ -164,7 +167,6 @@ def test_parse_repeating_time_interval_to_str(self):

def test_add_dicts(self):
# Testea la función auxiliar para sumar campos de dicts recursivamente
from pydatajson.helpers import add_dicts

one_dict = {
"distribuciones_formatos_cant": {
Expand Down Expand Up @@ -221,6 +223,46 @@ def test_title_to_name(self):
"exportacion-tomates-ano-2017"
)

def test_fields_to_uppercase_returns_unique_uppercase_keys(self):
fields = {
'csv': 10,
'CSV': 20,
'json': 30,
'JSON': 40
}

expected = {
'CSV': 30,
'JSON': 70
}

self.assertEqual(fields_to_uppercase(fields), expected)

def test_fields_to_uppercase_keeps_uppercase_fields_intact(self):
fields = {
'CSV': 30,
'JSON': 70
}

expected = {
'CSV': 30,
'JSON': 70
}

self.assertEqual(fields_to_uppercase(fields), expected)

def test_fields_to_uppercase_modifies_all_lowercase_fields(self):
fields = {
'csv': 10,
'json': 30,
}

expected = {
'CSV': 10,
'JSON': 30
}

self.assertEqual(fields_to_uppercase(fields), expected)

if __name__ == '__main__':
nose.run(defaultTest=__name__)
9 changes: 8 additions & 1 deletion tests/test_indicators.py
Expand Up @@ -9,6 +9,8 @@
import vcr
from nose.tools import assert_true, assert_false, assert_equal

from pydatajson.indicators import _eventual_periodicity

try:
import mock
except ImportError:
Expand Down Expand Up @@ -87,7 +89,7 @@ def test_date_indicators(self):
'datasets_frecuencia_cant': {
'R/P1W': 1,
'R/P1M': 1,
'eventual': 1
'EVENTUAL': 1
},
}

Expand Down Expand Up @@ -624,3 +626,8 @@ def test_federation_indicators_by_id(self):

for k, v in expected.items():
assert_equal(indicators[k], v)

def test_eventual_periodicity(self):
assert_true(_eventual_periodicity('eventual'))
assert_true(_eventual_periodicity('EVENTUAL'))
assert_false(_eventual_periodicity('not eventual'))

0 comments on commit 236c66c

Please sign in to comment.