diff --git a/dataflows/processors/concatenate.py b/dataflows/processors/concatenate.py index c6cd8cf..31d4c40 100644 --- a/dataflows/processors/concatenate.py +++ b/dataflows/processors/concatenate.py @@ -6,10 +6,10 @@ def concatenator(resources, all_target_fields, field_mapping): for resource_ in resources: for row in resource_: - processed = dict((k, '') for k in all_target_fields) + processed = dict((k, None) for k in all_target_fields) values = [(field_mapping[k], v) for (k, v) in row.items() - if k in field_mapping] + if k in field_mapping and v is not None] if len(values) == 0: message = 'Got an empty row after concatenation' +\ '(resource=%s, source=%r)' % (resource_.res.name, row) diff --git a/tests/test_lib.py b/tests/test_lib.py index 22e51c4..1212b5d 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -308,6 +308,31 @@ def test_concatenate(): ] +def test_concatenate_multifield(): + from dataflows import concatenate + + f = Flow( + [ + {'a': 1, 'b': 2, 'c': None}, + {'a': 2, 'b': None, 'c': 3}, + {'a': 3, 'c': 4}, + {'a': 3, 'b': 6, 'c': 4}, + ], + concatenate({ + 'f1': ['a'], + 'f2': ['b', 'c'], + }) + ) + results, _, _ = f.results() + assert results[0] == [ + {'f1': 1, 'f2': 2}, + {'f1': 2, 'f2': 3}, + {'f1': 3, 'f2': 4}, + {'f1': 3, 'f2': 4}, + ] + + + def test_filter_rows(): from dataflows import filter_rows