Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ def make_marginal_drop_instance(question_tokens: List[Token],
# We cannot use `number_indices` field for creating that, because the `ListField` will not be empty
# when we want to create a new empty field. That will lead to error.
numbers_in_passage_field = TextField(number_tokens, token_indexers)
fields["numbers_in_passage"] = numbers_in_passage_field
metadata = {"original_passage": passage_text,
"passage_token_offsets": passage_offsets,
"question_token_offsets": question_offsets,
Expand Down
1 change: 1 addition & 0 deletions allennlp/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from allennlp.models.event2mind import Event2Mind
from allennlp.models.encoder_decoders.simple_seq2seq import SimpleSeq2Seq
from allennlp.models.reading_comprehension.bidaf import BidirectionalAttentionFlow
from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.models.reading_comprehension.qanet import QaNet
from allennlp.models.semantic_parsing.nlvr.nlvr_coverage_semantic_parser import NlvrCoverageSemanticParser
from allennlp.models.semantic_parsing.nlvr.nlvr_direct_semantic_parser import NlvrDirectSemanticParser
Expand Down
1 change: 1 addition & 0 deletions allennlp/models/reading_comprehension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
from allennlp.models.reading_comprehension.bidaf import BidirectionalAttentionFlow
from allennlp.models.reading_comprehension.bidaf_ensemble import BidafEnsemble
from allennlp.models.reading_comprehension.dialog_qa import DialogQA
from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.models.reading_comprehension.qanet import QaNet
505 changes: 505 additions & 0 deletions allennlp/models/reading_comprehension/naqanet.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def test_read_from_file(self, lazy):
'question',
'passage',
'number_indices',
'numbers_in_passage',
'answer_as_passage_spans',
'answer_as_question_spans',
'answer_as_add_sub_expressions',
Expand All @@ -33,13 +32,13 @@ def test_read_from_file(self, lazy):

# Note that the last number in here is added as padding in case we don't find any numbers
# in a particular passage.
# Just FYI, these are the actual numbers that the indices correspond to:
# [ "1", "25", "2014", "5", "2018", "1", "2", "1", "54", "52", "6", "60", "58", "2010",
# "67", "2010", "1996", "3", "1", "6", "1", "0"]
assert [f.sequence_index for f in instance["number_indices"]] == [
16, 30, 36, 41, 52, 64, 80, 89, 147, 153, 166, 174, 177, 206, 245, 252, 267, 279,
283, 288, 296, -1
]
assert [t.text for t in instance["numbers_in_passage"]] == [
"1", "25", "2014", "5", "2018", "1", "2", "1", "54", "52", "6", "60", "58", "2010",
"67", "2010", "1996", "3", "1", "6", "1", "0"]
assert len(instance["answer_as_passage_spans"]) == 1
assert instance["answer_as_passage_spans"][0] == (46, 47)
assert len(instance["answer_as_question_spans"]) == 1
Expand Down
139 changes: 139 additions & 0 deletions allennlp/tests/fixtures/naqanet/experiment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"dataset_reader": {
"type": "drop",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 5
}
},
"passage_length_limit": 400,
"question_length_limit": 50,
"skip_when_all_empty": ["passage_span", "question_span", "addition_subtraction", "counting"],
"instance_format": "drop"
},
"validation_dataset_reader": {
"type": "drop",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 2
}
},
"passage_length_limit": 1000,
"question_length_limit": 100,
"skip_when_all_empty": [],
"instance_format": "drop"
},
"train_data_path": "allennlp/tests/fixtures/data/drop.json",
"validation_data_path": "allennlp/tests/fixtures/data/drop.json",
"model": {
"type": "naqanet",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 10,
"trainable": false
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 4
},
"encoder": {
"type": "cnn",
"embedding_dim": 4,
"num_filters": 20,
"ngram_filter_sizes": [
2
]
}
}
}
},
"num_highway_layers": 1,
"phrase_layer": {
"type": "qanet_encoder",
"input_dim": 12,
"hidden_dim": 12,
"attention_projection_dim": 12,
"feedforward_hidden_dim": 12,
"num_blocks": 1,
"num_convs_per_block": 4,
"conv_kernel_size": 7,
"num_attention_heads": 6,
"dropout_prob": 0.1,
"layer_dropout_undecayed_prob": 0.1,
"attention_dropout_prob": 0
},
"matrix_attention_layer": {
"type": "linear",
"tensor_1_dim": 12,
"tensor_2_dim": 12,
"combination": "x,y"
},
"modeling_layer": {
"type": "qanet_encoder",
"input_dim": 12,
"hidden_dim": 12,
"attention_projection_dim": 12,
"feedforward_hidden_dim": 12,
"num_blocks": 1,
"num_convs_per_block": 2,
"conv_kernel_size": 5,
"num_attention_heads": 6,
"dropout_prob": 0.1,
"layer_dropout_undecayed_prob": 0.1,
"attention_dropout_prob": 0
},
"dropout_prob": 0.1,
"regularizer": [
[
".*",
{
"type": "l2",
"alpha": 1e-07
}
]
],
"answering_abilities": [
"passage_span_extraction",
"question_span_extraction",
"addition_subtraction",
"counting"
]
},
"iterator": {
"type": "basic",
"batch_size": 16
},
"trainer": {
"num_epochs": 1,
"grad_norm": 5,
"patience": 10,
"validation_metric": "+f1",
"cuda_device": -1,
"optimizer": {
"type": "adam",
"lr": 0.001,
"betas": [
0.8,
0.999
],
"eps": 1e-07
},
"moving_average": {
"type": "exponential",
"decay": 0.9999
}
}
}
16 changes: 16 additions & 0 deletions allennlp/tests/models/reading_comprehension/naqanet_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pylint: disable=unused-import
from flaky import flaky

from allennlp.common.testing import ModelTestCase


class NumericallyAugmentedQaNetTest(ModelTestCase):
def setUp(self):
super().setUp()
print(self.FIXTURES_ROOT)
self.set_up_model(self.FIXTURES_ROOT / "naqanet" / "experiment.json",
self.FIXTURES_ROOT / "data" / "drop.json")

@flaky
def test_model_can_train_save_and_load(self):
self.ensure_model_can_train_save_and_load(self.param_file)
68 changes: 68 additions & 0 deletions allennlp/training/metrics/drop_em_and_f1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Tuple, List, Union

from overrides import overrides

from allennlp.tools.squad_eval import metric_max_over_ground_truths
from allennlp.tools.drop_eval import (get_metrics as drop_em_and_f1,
answer_json_to_strings)
from allennlp.training.metrics.metric import Metric


@Metric.register("drop")
class DropEmAndF1(Metric):
"""
This :class:`Metric` takes the best span string computed by a model, along with the answer
strings labeled in the data, and computes exact match and F1 score using the official DROP
evaluator (which has special handling for numbers and for questions with multiple answer spans,
among other things).
"""
def __init__(self) -> None:
self._total_em = 0.0
self._total_f1 = 0.0
self._count = 0

@overrides
def __call__(self, prediction: Union[str, List], ground_truths: List): # type: ignore
"""
Parameters
----------
prediction: ``Union[str, List]``
The predicted answer from the model evaluated. This could be a string, or a list of string
when multiple spans are predicted as answer.
ground_truths: ``List``
All the ground truth answer annotations.
"""
# If you wanted to split this out by answer type, you could look at [1] here and group by
# that, instead of only keeping [0].
ground_truth_answer_strings = [answer_json_to_strings(annotation)[0] for annotation in ground_truths]
exact_match, f1_score = metric_max_over_ground_truths(
drop_em_and_f1,
prediction,
ground_truth_answer_strings
)
self._total_em += exact_match
self._total_f1 += f1_score
self._count += 1

@overrides
def get_metric(self, reset: bool = False) -> Tuple[float, float]:
"""
Returns
-------
Average exact match and F1 score (in that order) as computed by the official DROP script
over all inputs.
"""
exact_match = self._total_em / self._count if self._count > 0 else 0
f1_score = self._total_f1 / self._count if self._count > 0 else 0
if reset:
self.reset()
return exact_match, f1_score

@overrides
def reset(self):
self._total_em = 0.0
self._total_f1 = 0.0
self._count = 0

def __str__(self):
return f"DropEmAndF1(em={self._total_em}, f1={self._total_f1})"
5 changes: 5 additions & 0 deletions doc/api/allennlp.models.reading_comprehension.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ allennlp.models.reading_comprehension
:undoc-members:
:show-inheritance:

.. automodule:: allennlp.models.reading_comprehension.naqanet
:members:
:undoc-members:
:show-inheritance:

.. automodule:: allennlp.models.reading_comprehension.util
:members:
:undoc-members:
Expand Down
7 changes: 7 additions & 0 deletions doc/api/allennlp.training.metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ allennlp.training.metrics
* :ref:`CategoricalAccuracy<categorical-accuracy>`
* :ref:`ConllCorefScores<conll-coref-scores>`
* :ref:`Covariance<covariance>`
* :ref:`DropEmAndF1<drop_em_and_f1>`
* :ref:`Entropy<entropy>`
* :ref:`EvalbBracketingScorer<evalb>`
* :ref:`F1Measure<f1-measure>`
Expand Down Expand Up @@ -73,6 +74,12 @@ allennlp.training.metrics
:undoc-members:
:show-inheritance:

.. _drop_em_and_f1:
.. automodule:: allennlp.training.metrics.drop_em_and_f1
:members:
:undoc-members:
:show-inheritance:

.. _entropy:
.. automodule:: allennlp.training.metrics.entropy
:members:
Expand Down