Skip to content
Permalink
Browse files

Use optimal matching for the drop eval and add appropriate tests. (#2853

)

* Use optimal matching for the drop eval and add appropriate tests.

* Variable names for pylint

* more pylint
  • Loading branch information...
kentonl authored and matt-gardner committed May 16, 2019
1 parent c59a3a8 commit b20c12ba4a68f5e155fc0cf320dab73fdb077264
Showing with 18 additions and 22 deletions.
  1. +6 −0 allennlp/tests/tools/drop_eval_test.py
  2. +12 −22 allennlp/tools/drop_eval.py
@@ -73,6 +73,12 @@ def test_multi_span_overlap_in_incorrect_cases(self):
assert get_metrics(["ottoman", "Kantakouzenous"],
["ottoman", "army of Kantakouzenous"]) == (0.0, 0.75)

def test_order_invariance(self):
assert get_metrics(["a"], ["a", "b"]) == (0, 0.5)
assert get_metrics(["b"], ["a", "b"]) == (0, 0.5)
assert get_metrics(["b"], ["b", "a"]) == (0, 0.5)


class TestDropEvalFunctional:
def test_json_loader(self):
annotation = {"pid1": {"qa_pairs":[{"answer": {"number": "1"}, "validated_answers": \
@@ -8,6 +8,7 @@
import re

import numpy as np
from scipy.optimize import linear_sum_assignment


# From here through _normalize_answer was originally copied from:
@@ -72,31 +73,20 @@ def _answer_to_bags(answer: Union[str, List[str], Tuple[str, ...]]) -> Tuple[Set

def _align_bags(predicted: List[Set[str]], gold: List[Set[str]]) -> List[float]:
"""
Takes gold and predicted answer sets and first finds a greedy 1-1 alignment
Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
between them and gets maximum metric values over all the answers
"""
f1_scores = []
scores = np.zeros([len(gold), len(predicted)])
for gold_index, gold_item in enumerate(gold):
max_f1 = 0.0
max_index = None
best_alignment: Tuple[Set[str], Set[str]] = (set(), set())
if predicted:
for pred_index, pred_item in enumerate(predicted):
current_f1 = _compute_f1(pred_item, gold_item)
if current_f1 >= max_f1:
best_alignment = (gold_item, pred_item)
max_f1 = current_f1
max_index = pred_index
match_flag = _match_numbers_if_present(*best_alignment)
gold[gold_index] = set()
predicted[max_index] = set()
else:
match_flag = False
if match_flag:
f1_scores.append(max_f1)
else:
f1_scores.append(0.0)
return f1_scores
for pred_index, pred_item in enumerate(predicted):
if _match_numbers_if_present(gold_item, pred_item):
scores[gold_index, pred_index] = _compute_f1(pred_item, gold_item)
row_ind, col_ind = linear_sum_assignment(-scores)

max_scores = np.zeros([len(gold)])
for row, column in zip(row_ind, col_ind):
max_scores[row] = max(max_scores[row], scores[row, column])
return max_scores


def _compute_f1(predicted_bag: Set[str], gold_bag: Set[str]) -> float:

0 comments on commit b20c12b

Please sign in to comment.
You can’t perform that action at this time.