Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Use optimal matching for the drop eval and add appropriate tests. (#2853
Browse files Browse the repository at this point in the history
)

* Use optimal matching for the drop eval and add appropriate tests.

* Variable names for pylint

* more pylint
  • Loading branch information
kentonl authored and matt-gardner committed May 16, 2019
1 parent c59a3a8 commit b20c12b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 22 deletions.
6 changes: 6 additions & 0 deletions allennlp/tests/tools/drop_eval_test.py
Expand Up @@ -73,6 +73,12 @@ def test_multi_span_overlap_in_incorrect_cases(self):
assert get_metrics(["ottoman", "Kantakouzenous"],
["ottoman", "army of Kantakouzenous"]) == (0.0, 0.75)

def test_order_invariance(self):
assert get_metrics(["a"], ["a", "b"]) == (0, 0.5)
assert get_metrics(["b"], ["a", "b"]) == (0, 0.5)
assert get_metrics(["b"], ["b", "a"]) == (0, 0.5)


class TestDropEvalFunctional:
def test_json_loader(self):
annotation = {"pid1": {"qa_pairs":[{"answer": {"number": "1"}, "validated_answers": \
Expand Down
34 changes: 12 additions & 22 deletions allennlp/tools/drop_eval.py
Expand Up @@ -8,6 +8,7 @@
import re

import numpy as np
from scipy.optimize import linear_sum_assignment


# From here through _normalize_answer was originally copied from:
Expand Down Expand Up @@ -72,31 +73,20 @@ def _answer_to_bags(answer: Union[str, List[str], Tuple[str, ...]]) -> Tuple[Set

def _align_bags(predicted: List[Set[str]], gold: List[Set[str]]) -> List[float]:
"""
Takes gold and predicted answer sets and first finds a greedy 1-1 alignment
Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
between them and gets maximum metric values over all the answers
"""
f1_scores = []
scores = np.zeros([len(gold), len(predicted)])
for gold_index, gold_item in enumerate(gold):
max_f1 = 0.0
max_index = None
best_alignment: Tuple[Set[str], Set[str]] = (set(), set())
if predicted:
for pred_index, pred_item in enumerate(predicted):
current_f1 = _compute_f1(pred_item, gold_item)
if current_f1 >= max_f1:
best_alignment = (gold_item, pred_item)
max_f1 = current_f1
max_index = pred_index
match_flag = _match_numbers_if_present(*best_alignment)
gold[gold_index] = set()
predicted[max_index] = set()
else:
match_flag = False
if match_flag:
f1_scores.append(max_f1)
else:
f1_scores.append(0.0)
return f1_scores
for pred_index, pred_item in enumerate(predicted):
if _match_numbers_if_present(gold_item, pred_item):
scores[gold_index, pred_index] = _compute_f1(pred_item, gold_item)
row_ind, col_ind = linear_sum_assignment(-scores)

max_scores = np.zeros([len(gold)])
for row, column in zip(row_ind, col_ind):
max_scores[row] = max(max_scores[row], scores[row, column])
return max_scores


def _compute_f1(predicted_bag: Set[str], gold_bag: Set[str]) -> float:
Expand Down

0 comments on commit b20c12b

Please sign in to comment.