Skip to content

Commit

Permalink
Merge pull request #837 from vintasoftware/fix-836-escape-colon
Browse files Browse the repository at this point in the history
Closes #836: escape colon at CompoundPredicate
  • Loading branch information
fgregg committed Jul 20, 2020
2 parents 5092f25 + 9862fef commit 6c85d53
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
11 changes: 8 additions & 3 deletions dedupe/predicates.py
Expand Up @@ -329,9 +329,14 @@ def __name__(self):
def __call__(self, record, **kwargs):
predicate_keys = [predicate(record, **kwargs)
for predicate in self]
return [u':'.join(block_key)
for block_key
in itertools.product(*predicate_keys)]
return [
u':'.join(
# must escape : to avoid confusion with : join separator
b.replace(u':', u'\\:') for b in block_key
)
for block_key
in itertools.product(*predicate_keys)
]


def wholeFieldPredicate(field: Any) -> Tuple[str]:
Expand Down
42 changes: 42 additions & 0 deletions tests/test_predicates.py
Expand Up @@ -80,5 +80,47 @@ def test_round_to_1(self):
assert predicates.roundTo1(-22315) == (u'-20000',)


class TestCompoundPredicate(unittest.TestCase):
def test_escapes_colon(self):
'''
Regression test for issue #836
'''
predicate_1 = predicates.SimplePredicate(
predicates.commonSetElementPredicate, 'col_1')
predicate_2 = predicates.SimplePredicate(
predicates.commonSetElementPredicate, 'col_2')
record = {
'col_1': ['foo:', 'foo'],
'col_2': [':bar', 'bar']
}

block_val = predicates.CompoundPredicate([
predicate_1,
predicate_2
])(record)
assert len(set(block_val)) == 4
assert block_val == ['foo\\::\\:bar', 'foo\\::bar', 'foo:\\:bar', 'foo:bar']

def test_escapes_escaped_colon(self):
'''
Regression test for issue #836
'''
predicate_1 = predicates.SimplePredicate(
predicates.commonSetElementPredicate, 'col_1')
predicate_2 = predicates.SimplePredicate(
predicates.commonSetElementPredicate, 'col_2')
record = {
'col_1': ['foo\\:', 'foo'],
'col_2': ['\\:bar', 'bar']
}

block_val = predicates.CompoundPredicate([
predicate_1,
predicate_2
])(record)
assert len(set(block_val)) == 4
assert block_val == ['foo\\\\::\\\\:bar', 'foo\\\\::bar', 'foo:\\\\:bar', 'foo:bar']


if __name__ == '__main__':
unittest.main()

0 comments on commit 6c85d53

Please sign in to comment.