Skip to content

Commit

Permalink
Fix concatenate not taking the best value when multi-mapping (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Feb 5, 2020
1 parent ad374b2 commit 48388c1
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dataflows/processors/concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
def concatenator(resources, all_target_fields, field_mapping):
for resource_ in resources:
for row in resource_:
processed = dict((k, '') for k in all_target_fields)
processed = dict((k, None) for k in all_target_fields)
values = [(field_mapping[k], v) for (k, v)
in row.items()
if k in field_mapping]
if k in field_mapping and v is not None]
if len(values) == 0:
message = 'Got an empty row after concatenation' +\
'(resource=%s, source=%r)' % (resource_.res.name, row)
Expand Down
25 changes: 25 additions & 0 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,31 @@ def test_concatenate():
]


def test_concatenate_multifield():
from dataflows import concatenate

f = Flow(
[
{'a': 1, 'b': 2, 'c': None},
{'a': 2, 'b': None, 'c': 3},
{'a': 3, 'c': 4},
{'a': 3, 'b': 6, 'c': 4},
],
concatenate({
'f1': ['a'],
'f2': ['b', 'c'],
})
)
results, _, _ = f.results()
assert results[0] == [
{'f1': 1, 'f2': 2},
{'f1': 2, 'f2': 3},
{'f1': 3, 'f2': 4},
{'f1': 3, 'f2': 4},
]



def test_filter_rows():
from dataflows import filter_rows

Expand Down

0 comments on commit 48388c1

Please sign in to comment.