Skip to content

Commit

Permalink
Merge pull request #1065 from NickCrews/labeler-rename
Browse files Browse the repository at this point in the history
Refactor labeler.py
  • Loading branch information
fgregg committed Sep 2, 2022
2 parents 5531945 + 1212a7b commit 5742efc
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 240 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,5 +1,6 @@
.env
.coverage*
htmlcov
cpredicates.c
*.code-workspace
libdistance-0.2.1
Expand Down
8 changes: 6 additions & 2 deletions dedupe/_typing.py
Expand Up @@ -81,10 +81,14 @@ class TrainingData(TypedDict):


class Classifier(Protocol):
def fit(self, X: object, y: object) -> None:
"""Takes an array of pairwise distances and computes the likelihood they are a pair."""

def fit(self, X: numpy.typing.NDArray[numpy.float_], y: LabelsLike) -> None:
...

def predict_proba(self, X: object) -> numpy.typing.NDArray[numpy.float_]:
def predict_proba(
self, X: numpy.typing.NDArray[numpy.float_]
) -> numpy.typing.NDArray[numpy.float_]:
...


Expand Down
8 changes: 2 additions & 6 deletions dedupe/api.py
Expand Up @@ -1285,8 +1285,6 @@ class Dedupe(ActiveMatching, DedupeMatching):
entity.
"""

ActiveLearner = labeler.DedupeDisagreementLearner

def prepare_training(
self,
data: Data,
Expand Down Expand Up @@ -1326,7 +1324,7 @@ def prepare_training(
# existing training data, so add them to data dictionary
examples, y = flatten_training(self.training_pairs)

self.active_learner = self.ActiveLearner(
self.active_learner = labeler.DedupeDisagreementLearner(
self.data_model,
data,
index_include=examples,
Expand All @@ -1346,8 +1344,6 @@ class Link(ActiveMatching):
Mixin Class for Active Learning Record Linkage
"""

ActiveLearner = labeler.RecordLinkDisagreementLearner

def prepare_training(
self,
data_1: Data,
Expand Down Expand Up @@ -1395,7 +1391,7 @@ def prepare_training(
# existing training data, so add them to data dictionaries
examples, y = flatten_training(self.training_pairs)

self.active_learner = self.ActiveLearner(
self.active_learner = labeler.RecordLinkDisagreementLearner(
self.data_model,
data_1,
data_2,
Expand Down

0 comments on commit 5742efc

Please sign in to comment.