Skip to content

Commit

Permalink
check that there are some matches, closes #565
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Jan 20, 2022
1 parent 3cec4ff commit 8cc41b0
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions dedupe/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def learn(self, matches, recall, candidate_types='simple'):
Takes in a set of training pairs and predicates and tries to find
a good set of blocking rules.
'''
assert matches, "You must supply at least one pair of matching records to learn blocking rules."

comparison_cover = self.comparison_cover
match_cover = self.cover(matches)

Expand All @@ -41,7 +43,6 @@ def learn(self, matches, recall, candidate_types='simple'):
target_cover = int(recall * len(matches))

if len(coverable_dupes) < target_cover:
logger.warning(OUT_OF_PREDICATES_WARNING)
logger.debug(uncoverable_dupes)
target_cover = len(coverable_dupes)

Expand Down Expand Up @@ -364,6 +365,3 @@ def __call__(self, iterable: Iterable) -> frozenset:
for k in iterable
if k in self.replacements)
return frozenset(result)


OUT_OF_PREDICATES_WARNING = "Ran out of predicates: Dedupe tries to find blocking rules that will work well with your data. Sometimes it can't find great ones, and you'll get this warning. It means that there are some pairs of true records that dedupe may never compare. If you are getting bad results, try increasing the `max_comparison` argument to the train method" # noqa: E501

0 comments on commit 8cc41b0

Please sign in to comment.