Skip to content

Commit

Permalink
New dependency: fuzzywuzzy.
Browse files Browse the repository at this point in the history
We now have a fuzzy matching method for tags. It comes at the cost of
another dependency, but it's pure python, and could come in real handy
when doing filterings on tags when you have a couple variations of
essentially the same stupid thing.
  • Loading branch information
dotsdl committed Mar 19, 2016
1 parent 4207b8e commit 9d0eb6d
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 3 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
scripts=[],
license='BSD',
long_description=open('README.rst').read(),
install_requires=['asciitree', 'pathlib', 'scandir', 'six']
install_requires=['asciitree', 'pathlib', 'scandir', 'six', 'fuzzywuzzy']
)
46 changes: 44 additions & 2 deletions src/datreant/core/agglimbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
"""
from six import string_types, with_metaclass

from fuzzywuzzy import process

from . import filesystem
from . import _AGGTREELIMBS, _AGGLIMBS
from .collections import Bundle
Expand Down Expand Up @@ -57,10 +59,10 @@ def __init__(self, collection):
super(AggTags, self).__init__(collection)

def __repr__(self):
return "<AggTags({})>".format(self.all)
return "<AggTags({})>".format(list(self.all))

def __str__(self):
tags = self.all
tags = list(self.all)
agg = "Tags"
majsep = "="
seplength = len(agg)
Expand Down Expand Up @@ -134,6 +136,46 @@ def clear(self):
for member in self._collection:
member.tags.clear()

def fuzzy(self, tag, threshold=80, scope='all'):
"""Get a tuple of existing tags that fuzzily match a given one.
Parameters
----------
tags : str or list
Tag or tags to get fuzzy matches for.
threshold : int
Lowest match score to return. Setting to 0 will return every tag,
while setting to 100 will return only exact matches.
scope : {'all', 'any'}
Tags to use. 'all' will use only tags found within all Treants in
collection, while 'any' will use tags found within at least one
Treant in collection.
Returns
-------
matches : tuple
Tuple of tags that match.
"""
if isinstance(tag, string_types):
tags = [tag]
else:
tags = tag

if scope == 'all':
choices = self.all
elif scope == 'any':
choices = self.any
else:
raise ValueError("Scope can only be 'any' or 'all'")

matches = []

for tag in tags:
matches += [i[0] for i in process.extract(tag, choices, limit=None)
if i[1] > threshold]

return tuple(matches)


class AggCategories(AggLimb):
"""Interface to categories.
Expand Down
31 changes: 31 additions & 0 deletions src/datreant/core/limbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from six import string_types, with_metaclass
from collections import defaultdict

from fuzzywuzzy import process

from . import filesystem
from .collections import Bundle
from . import _TREELIMBS, _LIMBS
Expand Down Expand Up @@ -260,6 +262,35 @@ def clear(self):
with self._treant._write:
self._treant._state['tags'] = list()

def fuzzy(self, tag, threshold=80):
"""Get a tuple of existing tags that fuzzily match a given one.
Parameters
----------
tags : str or list
Tag or tags to get fuzzy matches for.
threshold : int
Lowest match score to return. Setting to 0 will return every tag,
while setting to 100 will return only exact matches.
Returns
-------
matches : tuple
Tuple of tags that match.
"""
if isinstance(tag, string_types):
tags = [tag]
else:
tags = tag

matches = []

for tag in tags:
matches += [i[0] for i in process.extract(tag, self, limit=None)
if i[1] > threshold]

return tuple(matches)


class Categories(Limb):
"""Interface to categories.
Expand Down

0 comments on commit 9d0eb6d

Please sign in to comment.