Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NAQANet model for DROP #2560

Merged
merged 12 commits into from Mar 2, 2019
@@ -314,7 +314,6 @@ def make_marginal_drop_instance(question_tokens: List[Token],
# We cannot use `number_indices` field for creating that, because the `ListField` will not be empty
# when we want to create a new empty field. That will lead to error.
numbers_in_passage_field = TextField(number_tokens, token_indexers)
fields["numbers_in_passage"] = numbers_in_passage_field
metadata = {"original_passage": passage_text,
"passage_token_offsets": passage_offsets,
"question_token_offsets": question_offsets,
Copy path View file
@@ -14,6 +14,7 @@
from allennlp.models.event2mind import Event2Mind
from allennlp.models.encoder_decoders.simple_seq2seq import SimpleSeq2Seq
from allennlp.models.reading_comprehension.bidaf import BidirectionalAttentionFlow
from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.models.reading_comprehension.qanet import QaNet
from allennlp.models.semantic_parsing.nlvr.nlvr_coverage_semantic_parser import NlvrCoverageSemanticParser
from allennlp.models.semantic_parsing.nlvr.nlvr_direct_semantic_parser import NlvrDirectSemanticParser
@@ -8,4 +8,5 @@
from allennlp.models.reading_comprehension.bidaf import BidirectionalAttentionFlow
from allennlp.models.reading_comprehension.bidaf_ensemble import BidafEnsemble
from allennlp.models.reading_comprehension.dialog_qa import DialogQA
from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.models.reading_comprehension.qanet import QaNet

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -19,7 +19,6 @@ def test_read_from_file(self, lazy):
'question',
'passage',
'number_indices',
'numbers_in_passage',
'answer_as_passage_spans',
'answer_as_question_spans',
'answer_as_add_sub_expressions',
@@ -33,13 +32,13 @@ def test_read_from_file(self, lazy):

# Note that the last number in here is added as padding in case we don't find any numbers
# in a particular passage.
# Just FYI, these are the actual numbers that the indices correspond to:
# [ "1", "25", "2014", "5", "2018", "1", "2", "1", "54", "52", "6", "60", "58", "2010",
# "67", "2010", "1996", "3", "1", "6", "1", "0"]
assert [f.sequence_index for f in instance["number_indices"]] == [
16, 30, 36, 41, 52, 64, 80, 89, 147, 153, 166, 174, 177, 206, 245, 252, 267, 279,
283, 288, 296, -1
]
assert [t.text for t in instance["numbers_in_passage"]] == [
"1", "25", "2014", "5", "2018", "1", "2", "1", "54", "52", "6", "60", "58", "2010",
"67", "2010", "1996", "3", "1", "6", "1", "0"]
assert len(instance["answer_as_passage_spans"]) == 1
assert instance["answer_as_passage_spans"][0] == (46, 47)
assert len(instance["answer_as_question_spans"]) == 1
@@ -0,0 +1,139 @@
{
"dataset_reader": {
"type": "drop",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 5
}
},
"passage_length_limit": 400,
"question_length_limit": 50,
"skip_when_all_empty": ["passage_span", "question_span", "addition_subtraction", "counting"],
"instance_format": "drop"
},
"validation_dataset_reader": {
"type": "drop",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 2
}
},
"passage_length_limit": 1000,
"question_length_limit": 100,
"skip_when_all_empty": [],
"instance_format": "drop"
},
"train_data_path": "allennlp/tests/fixtures/data/drop.json",
"validation_data_path": "allennlp/tests/fixtures/data/drop.json",
"model": {
"type": "naqanet",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 10,
"trainable": false
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 4
},
"encoder": {
"type": "cnn",
"embedding_dim": 4,
"num_filters": 20,
"ngram_filter_sizes": [
2
]
}
}
}
},
"num_highway_layers": 1,
"phrase_layer": {
"type": "qanet_encoder",
"input_dim": 12,
"hidden_dim": 12,
"attention_projection_dim": 12,
"feedforward_hidden_dim": 12,
"num_blocks": 1,
"num_convs_per_block": 4,
"conv_kernel_size": 7,
"num_attention_heads": 6,
"dropout_prob": 0.1,
"layer_dropout_undecayed_prob": 0.1,
"attention_dropout_prob": 0
},
"matrix_attention_layer": {
"type": "linear",
"tensor_1_dim": 12,
"tensor_2_dim": 12,
"combination": "x,y"
},
"modeling_layer": {
"type": "qanet_encoder",
"input_dim": 12,
"hidden_dim": 12,
"attention_projection_dim": 12,
"feedforward_hidden_dim": 12,
"num_blocks": 1,
"num_convs_per_block": 2,
"conv_kernel_size": 5,
"num_attention_heads": 6,
"dropout_prob": 0.1,
"layer_dropout_undecayed_prob": 0.1,
"attention_dropout_prob": 0
},
"dropout_prob": 0.1,
"regularizer": [
[
".*",
{
"type": "l2",
"alpha": 1e-07
}
]
],
"answering_abilities": [
"passage_span_extraction",
"question_span_extraction",
"addition_subtraction",
"counting"
]
},
"iterator": {
"type": "basic",
"batch_size": 16
},
"trainer": {
"num_epochs": 1,
"grad_norm": 5,
"patience": 10,
"validation_metric": "+f1",
"cuda_device": -1,
"optimizer": {
"type": "adam",
"lr": 0.001,
"betas": [
0.8,
0.999
],
"eps": 1e-07
},
"moving_average": {
"type": "exponential",
"decay": 0.9999
}
}
}
@@ -0,0 +1,16 @@
#pylint: disable=unused-import
from flaky import flaky

from allennlp.common.testing import ModelTestCase


class NumericallyAugmentedQaNetTest(ModelTestCase):
def setUp(self):
super().setUp()
print(self.FIXTURES_ROOT)
self.set_up_model(self.FIXTURES_ROOT / "naqanet" / "experiment.json",
self.FIXTURES_ROOT / "data" / "drop.json")

@flaky
def test_model_can_train_save_and_load(self):
self.ensure_model_can_train_save_and_load(self.param_file)
@@ -0,0 +1,68 @@
from typing import Tuple, List, Union

from overrides import overrides

from allennlp.tools.squad_eval import metric_max_over_ground_truths
from allennlp.tools.drop_eval import (get_metrics as drop_em_and_f1,
answer_json_to_strings)
from allennlp.training.metrics.metric import Metric


@Metric.register("drop")
class DropEmAndF1(Metric):
"""
This :class:`Metric` takes the best span string computed by a model, along with the answer
strings labeled in the data, and computes exact match and F1 score using the official DROP
evaluator (which has special handling for numbers and for questions with multiple answer spans,
among other things).
"""
def __init__(self) -> None:
self._total_em = 0.0
self._total_f1 = 0.0
self._count = 0

@overrides
def __call__(self, prediction: Union[str, List], ground_truths: List): # type: ignore
"""
Parameters
----------
prediction: ``Union[str, List]``
The predicted answer from the model evaluated. This could be a string, or a list of string
when multiple spans are predicted as answer.
ground_truths: ``List``
All the ground truth answer annotations.
"""
# If you wanted to split this out by answer type, you could look at [1] here and group by
# that, instead of only keeping [0].
ground_truth_answer_strings = [answer_json_to_strings(annotation)[0] for annotation in ground_truths]
exact_match, f1_score = metric_max_over_ground_truths(
drop_em_and_f1,
prediction,
ground_truth_answer_strings
)
self._total_em += exact_match
self._total_f1 += f1_score
self._count += 1

@overrides
def get_metric(self, reset: bool = False) -> Tuple[float, float]:
"""
Returns
-------
Average exact match and F1 score (in that order) as computed by the official DROP script
over all inputs.
"""
exact_match = self._total_em / self._count if self._count > 0 else 0
f1_score = self._total_f1 / self._count if self._count > 0 else 0
if reset:
self.reset()
return exact_match, f1_score

@overrides
def reset(self):
self._total_em = 0.0
self._total_f1 = 0.0
self._count = 0

def __str__(self):
return f"DropEmAndF1(em={self._total_em}, f1={self._total_f1})"
@@ -26,6 +26,11 @@ allennlp.models.reading_comprehension
:undoc-members:
:show-inheritance:

.. automodule:: allennlp.models.reading_comprehension.naqanet
:members:
:undoc-members:
:show-inheritance:

.. automodule:: allennlp.models.reading_comprehension.util
:members:
:undoc-members:
@@ -13,6 +13,7 @@ allennlp.training.metrics
* :ref:`CategoricalAccuracy<categorical-accuracy>`
* :ref:`ConllCorefScores<conll-coref-scores>`
* :ref:`Covariance<covariance>`
* :ref:`DropEmAndF1<drop_em_and_f1>`
* :ref:`Entropy<entropy>`
* :ref:`EvalbBracketingScorer<evalb>`
* :ref:`F1Measure<f1-measure>`
@@ -73,6 +74,12 @@ allennlp.training.metrics
:undoc-members:
:show-inheritance:

.. _drop_em_and_f1:
.. automodule:: allennlp.training.metrics.drop_em_and_f1
:members:
:undoc-members:
:show-inheritance:

.. _entropy:
.. automodule:: allennlp.training.metrics.entropy
:members:
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.