Skip to content

Commit

Permalink
implements min max score printing as method
Browse files Browse the repository at this point in the history
  • Loading branch information
fritshermans committed Mar 5, 2022
1 parent 430d1a3 commit 328d6f9
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions deduplipy/active_learning/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ def _print_score_histogram(self, X: pd.DataFrame) -> None:
hist = pd.DataFrame({'count': count, 'score': division[1:]})
print(hist[['score', 'count']].to_string(index=False))

def _print_min_max_scores(self, X):
X_all = pd.concat((self.train_samples, X))
pred_max = self.learner.predict_proba(X_all['similarities'].tolist()).max(axis=0)
print(f'lowest score: {1 - pred_max[0]:.2f}')
print(f'highest score: {pred_max[1]:.2f}')

def fit(self, X: pd.DataFrame) -> 'ActiveStringMatchLearner':
"""
Fit ActiveStringMatchLearner instance on pairs of strings
Expand All @@ -125,9 +131,7 @@ def fit(self, X: pd.DataFrame) -> 'ActiveStringMatchLearner':
uncertainty = 1 - (self.learner.predict_proba(query_inst)[0]).max()
self.uncertainties.append(uncertainty)
if self.verbose >= 2:
pred_max = self.learner.predict_proba(X['similarities'].tolist()).max(axis=0)
print(f'lowest score: {1-pred_max[0]:.2f}')
print(f'highest score: {pred_max[1]:.2f}')
self._print_min_max_scores(X)
except:
pass
y_new = self._get_active_learning_input(X.iloc[query_idx])
Expand Down

0 comments on commit 328d6f9

Please sign in to comment.