Skip to content

Commit

Permalink
Add levenshtein
Browse files Browse the repository at this point in the history
  • Loading branch information
axiak committed Feb 10, 2012
1 parent a04db87 commit ed25dcc
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -1,6 +1,8 @@
*.egg-info
/dist/
/lib/
/bin/
/include/
*~
*.pyc
*#*
Binary file added cities.gz
Binary file not shown.
16 changes: 13 additions & 3 deletions fuzzyset/__init__.py
@@ -1,6 +1,7 @@
import re
import math
import collections
import Levenshtein

_non_word_re = re.compile(r'[^\w, ]+')

Expand Down Expand Up @@ -44,6 +45,9 @@ def __getitem__(self, value):
results = [(match_score / (norm * self.items[idx][0]), self.items[idx][1])
for idx, match_score in matches.items()]
results.sort(reverse=True)
results = [(Levenshtein.distance(matched, value), matched)
for _, matched in results[:50]]
results.sort()
if results:
return [result for result in results
if result[0] == results[0][0]]
Expand All @@ -67,18 +71,24 @@ def _iterate_grams(value, gram_size=2):
for i in range(len(simplified) - gram_size + 1):
yield simplified[i:i + gram_size]

if __name__ == '__main__':
with open('./cities') as input_file:
def _interactive_test():
import gzip
with gzip.GzipFile('./cities.gz') as input_file:
f = FuzzySet((line.strip() for line in input_file), gram_size=2)

while False:
while True:
town = raw_input("Enter town name: ")
print f[town]

def _other_test():
with open('./origin_cities') as cities:
for line in cities:
result = f.get(line.strip())
if result is None:
print "{}: Could not find".format(line.strip())
elif isinstance(result, list):
print "{}: {}".format(line.strip(), result)

if __name__ == '__main__':
_interactive_test()
#_other_test()
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -15,6 +15,7 @@ def read(fname):
url = "https://github.com/axiak/fuzzyset/",
packages=['fuzzyset'],
long_description=read('README.rst'),
install_requires=['python-levenshtein'],
classifiers=[
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: BSD License",
Expand Down

0 comments on commit ed25dcc

Please sign in to comment.