Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Levenshtein distance from Jellyfish library #1389

Merged
merged 1 commit into from
Apr 6, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions beets/autotag/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from beets import plugins
from beets import config
from beets.autotag import mb
from beets.util import levenshtein
from jellyfish import levenshtein_distance
from unidecode import unidecode

log = logging.getLogger('beets')
Expand Down Expand Up @@ -209,7 +209,7 @@ def _string_dist_basic(str1, str2):
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
if not str1 and not str2:
return 0.0
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))


def string_dist(str1, str2):
Expand Down
23 changes: 0 additions & 23 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,29 +564,6 @@ def as_string(value):
return unicode(value)


def levenshtein(s1, s2):
"""A nice DP edit distance implementation from Wikibooks:
http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/
Levenshtein_distance#Python
"""
if len(s1) < len(s2):
return levenshtein(s2, s1)
if not s1:
return len(s2)

previous_row = xrange(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row

return previous_row[-1]


def plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set and the frequency of that object. The
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def _read(fn):
'unidecode',
'musicbrainzngs>=0.4',
'pyyaml',
'jellyfish',
] + (['colorama'] if (sys.platform == 'win32') else []) +
(['ordereddict'] if sys.version_info < (2, 7, 0) else []),

Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ deps =
responses
pathlib
pyxdg
jellyfish
commands =
nosetests {posargs}

Expand Down