Skip to content

Commit

Permalink
make sure that metaphoneToken returns unique bkv
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Feb 20, 2015
1 parent 39a9599 commit 530ddc3
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
8 changes: 4 additions & 4 deletions dedupe/predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ def doubleMetaphone(field) :
return [metaphone for metaphone in doublemetaphone(field) if metaphone]

def metaphoneToken(field) :
return [metaphone_token for metaphone_token
in itertools.chain(*(doublemetaphone(token)
for token in set(field.split())))
if metaphone_token]
return set([metaphone_token for metaphone_token
in itertools.chain(*(doublemetaphone(token)
for token in set(field.split())))
if metaphone_token])

def existsPredicate(field) :
try :
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
setup(
name='dedupe',
url='https://github.com/datamade/dedupe',
version='0.7.7.1.0',
version='0.7.7.1.1',
description='A python library for accurate and scaleable data deduplication and entity-resolution',
packages=['dedupe', 'dedupe.variables'],
ext_modules=[Extension('dedupe.cpredicates', ['src/cpredicates.c'])],
Expand Down
5 changes: 5 additions & 0 deletions tests/test_predicates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import unittest
from dedupe import predicates

class TestMetaphone(unittest.TestCase):
def test_metaphone_token(self) :
block_val = predicates.metaphoneToken('9301 S. State St. ')
assert block_val == set([u'STT', u'SS'])

class TestWholeSet(unittest.TestCase):
def setUp(self):
self.s1 = set(['red', 'blue', 'green'])
Expand Down

0 comments on commit 530ddc3

Please sign in to comment.