Skip to content

Commit

Permalink
Merge 9b8f948 into c99effd
Browse files Browse the repository at this point in the history
  • Loading branch information
lrromero committed Aug 1, 2018
2 parents c99effd + 9b8f948 commit cfc5998
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pydatajson/helpers.py
Expand Up @@ -233,9 +233,9 @@ def sheet_to_table(worksheet):
return table


def string_to_list(string, sep=","):
def string_to_list(string, sep=",", filter_empty=False):
"""Transforma una string con elementos separados por `sep` en una lista."""
return [value.strip() for value in string.split(sep)]
return [value.strip() for value in string.split(sep) if (not filter_empty or value)]


def add_dicts(one_dict, other_dict):
Expand Down
4 changes: 2 additions & 2 deletions pydatajson/readers.py
Expand Up @@ -456,14 +456,14 @@ def read_local_xlsx_catalog(xlsx_path, logger=None):
# Transformo campos de texto separado por comas en listas
if "catalog_language" in catalog:
catalog["catalog_language"] = helpers.string_to_list(
catalog["catalog_language"])
catalog["catalog_language"], filter_empty=True)

for dataset in catalog["catalog_dataset"]:
array_fields = ["dataset_superTheme", "dataset_theme", "dataset_tags",
"dataset_keyword", "dataset_language"]
for field in array_fields:
if field in dataset:
dataset[field] = helpers.string_to_list(dataset[field])
dataset[field] = helpers.string_to_list(dataset[field], filter_empty=True)

# Elimino los prefijos de los campos a nivel catálogo
_remove_prefixes_and_unprefixed_fields(catalog, 'catalog_')
Expand Down
Binary file added tests/samples/lists_extra_commas.xlsx
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/test_readers_and_writers.py
Expand Up @@ -249,6 +249,18 @@ def test_read_write_both_formats_yields_the_same(self):

self.assertEqual(catalog_json, catalog)
self.assertEqual(catalog_xlsx, catalog)

def test_read_xlsx_lists_with_extra_commas(self):
# No hay valores vacíos a pesar que hay listas con comas extras
catalog = pydatajson.readers.read_catalog(self.get_sample("lists_extra_commas.xlsx"))
self.assertTrue(catalog['language'])
self.assertTrue(all(catalog['language']))
for dataset in catalog['dataset']:
for field in ['theme', 'superTheme', 'keyword']:
# Listas no vacias
self.assertTrue(dataset[field])
# Elementos no vacios
self.assertTrue(all(dataset[field]))


if __name__ == '__main__':
Expand Down

0 comments on commit cfc5998

Please sign in to comment.