Skip to content
Permalink
Browse files

Fix #928 - Minor naming inconsistency in tutorial json files (#929)

* change stacked_encoder to encoder

* remove log file

* new default model
  • Loading branch information...
sebastianGehrmann authored and matt-gardner committed Feb 28, 2018
1 parent 0cb2f60 commit acf21a53e7001bb6c7c3f7113c0d2805ffccceb2
@@ -35,7 +35,7 @@
'machine-comprehension'
),
'semantic-role-labeling': DemoModel(
'https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2017.09.05.tar.gz', # pylint: disable=line-too-long
'https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.02.27.tar.gz', # pylint: disable=line-too-long
'semantic-role-labeling'
),
'textual-entailment': DemoModel(
@@ -36,7 +36,7 @@ class SemanticRoleLabeler(Model):
A Vocabulary, required in order to compute sizes for input/output projections.
text_field_embedder : ``TextFieldEmbedder``, required
Used to embed the ``tokens`` ``TextField`` we get as input to the model.
stacked_encoder : ``Seq2SeqEncoder``
encoder : ``Seq2SeqEncoder``
The encoder (with its own internal stacking) that we will use in between embedding tokens
and predicting output tags.
binary_feature_dim : int, required.
@@ -48,7 +48,7 @@ class SemanticRoleLabeler(Model):
"""
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
stacked_encoder: Seq2SeqEncoder,
encoder: Seq2SeqEncoder,
binary_feature_dim: int,
embedding_dropout: float = 0.0,
initializer: InitializerApplicator = InitializerApplicator(),
@@ -62,15 +62,15 @@ def __init__(self, vocab: Vocabulary,
# for verb, because the verb index is provided to the model.
self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"])

self.stacked_encoder = stacked_encoder
self.encoder = encoder
# There are exactly 2 binary features for the verb predicate embedding.
self.binary_feature_embedding = Embedding(2, binary_feature_dim)
self.tag_projection_layer = TimeDistributed(Linear(self.stacked_encoder.get_output_dim(),
self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
self.num_classes))
self.embedding_dropout = Dropout(p=embedding_dropout)

check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim,
stacked_encoder.get_input_dim(),
encoder.get_input_dim(),
"text embedding dim + verb indicator embedding dim",
"encoder input dim")
initializer(self)
@@ -121,7 +121,7 @@ def forward(self, # type: ignore
embedded_text_with_verb_indicator = torch.cat([embedded_text_input, embedded_verb_indicator], -1)
batch_size, sequence_length, _ = embedded_text_with_verb_indicator.size()

encoded_text = self.stacked_encoder(embedded_text_with_verb_indicator, mask)
encoded_text = self.encoder(embedded_text_with_verb_indicator, mask)

logits = self.tag_projection_layer(encoded_text)
reshaped_log_probs = logits.view(-1, self.num_classes)
@@ -204,15 +204,15 @@ def get_viterbi_pairwise_potentials(self):
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SemanticRoleLabeler':
embedder_params = params.pop("text_field_embedder")
text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)
stacked_encoder = Seq2SeqEncoder.from_params(params.pop("stacked_encoder"))
encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))
binary_feature_dim = params.pop_int("binary_feature_dim")

initializer = InitializerApplicator.from_params(params.pop('initializer', []))
regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
params.assert_empty(cls.__name__)
return cls(vocab=vocab,
text_field_embedder=text_field_embedder,
stacked_encoder=stacked_encoder,
encoder=encoder,
binary_feature_dim=binary_feature_dim,
initializer=initializer,
regularizer=regularizer)
@@ -28,7 +28,7 @@ class SimpleTagger(Model):
A Vocabulary, required in order to compute sizes for input/output projections.
text_field_embedder : ``TextFieldEmbedder``, required
Used to embed the ``tokens`` ``TextField`` we get as input to the model.
stacked_encoder : ``Seq2SeqEncoder``
encoder : ``Seq2SeqEncoder``
The encoder (with its own internal stacking) that we will use in between embedding tokens
and predicting output tags.
initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
@@ -39,18 +39,18 @@ class SimpleTagger(Model):

def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
stacked_encoder: Seq2SeqEncoder,
encoder: Seq2SeqEncoder,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None) -> None:
super(SimpleTagger, self).__init__(vocab, regularizer)

self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size("labels")
self.stacked_encoder = stacked_encoder
self.tag_projection_layer = TimeDistributed(Linear(self.stacked_encoder.get_output_dim(),
self.encoder = encoder
self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
self.num_classes))

check_dimensions_match(text_field_embedder.get_output_dim(), stacked_encoder.get_input_dim(),
check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
"text field embedding dim", "encoder input dim")
self.metrics = {
"accuracy": CategoricalAccuracy(),
@@ -96,7 +96,7 @@ def forward(self, # type: ignore
embedded_text_input = self.text_field_embedder(tokens)
batch_size, sequence_length, _ = embedded_text_input.size()
mask = get_text_field_mask(tokens)
encoded_text = self.stacked_encoder(embedded_text_input, mask)
encoded_text = self.encoder(embedded_text_input, mask)

logits = self.tag_projection_layer(encoded_text)
reshaped_log_probs = logits.view(-1, self.num_classes)
@@ -143,13 +143,13 @@ def get_metrics(self, reset: bool = False) -> Dict[str, float]:
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SimpleTagger':
embedder_params = params.pop("text_field_embedder")
text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)
stacked_encoder = Seq2SeqEncoder.from_params(params.pop("stacked_encoder"))
encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))

initializer = InitializerApplicator.from_params(params.pop('initializer', []))
regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
params.assert_empty(cls.__name__)
return cls(vocab=vocab,
text_field_embedder=text_field_embedder,
stacked_encoder=stacked_encoder,
encoder=encoder,
initializer=initializer,
regularizer=regularizer)
@@ -21,7 +21,7 @@ def setUp(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -26,7 +26,7 @@ def test_train_model(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -55,7 +55,7 @@ def test_train_with_test_set(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -133,7 +133,7 @@ def test_other_modules(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -209,7 +209,7 @@ def test_train_model(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -238,7 +238,7 @@ def test_train_with_test_set(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -13,7 +13,7 @@
"trainable": true
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 2,
"hidden_size": 4,
@@ -13,7 +13,7 @@
"trainable": true
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 2,
"hidden_size": 4,
@@ -12,7 +12,7 @@
"trainable": true
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 150,
"hidden_size": 10,
Binary file not shown.
Binary file not shown.
@@ -23,7 +23,7 @@ def setUp(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -35,7 +35,7 @@ def test_forward_pass_runs_correctly(self):

def test_mismatching_dimensions_throws_configuration_error(self):
params = Params.from_file(self.param_file)
# Make the stacked_encoder wrong - it should be 2 to match
# Make the encoder wrong - it should be 2 to match
# the embedding dimension from the text_field_embedder.
params["model"]["encoder"]["input_size"] = 10
with pytest.raises(ConfigurationError):
@@ -72,6 +72,6 @@ def test_mismatching_dimensions_throws_configuration_error(self):
params = Params.from_file(self.param_file)
# Make the phrase layer wrong - it should be 150 to match
# the embedding + binary feature dimensions.
params["model"]["stacked_encoder"]["input_size"] = 10
params["model"]["encoder"]["input_size"] = 10
with pytest.raises(ConfigurationError):
Model.from_params(self.vocab, params.pop("model"))
@@ -35,9 +35,9 @@ def test_forward_pass_runs_correctly(self):

def test_mismatching_dimensions_throws_configuration_error(self):
params = Params.from_file(self.param_file)
# Make the stacked_encoder wrong - it should be 2 to match
# Make the encoder wrong - it should be 2 to match
# the embedding dimension from the text_field_embedder.
params["model"]["stacked_encoder"]["input_size"] = 10
params["model"]["encoder"]["input_size"] = 10
with pytest.raises(ConfigurationError):
Model.from_params(self.vocab, params.pop("model"))

@@ -19,7 +19,7 @@ def setUp(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -30,7 +30,7 @@ def setUp(self):
"embedding_dim": 5
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
@@ -20,7 +20,7 @@
}
]
],
"stacked_encoder": {
"encoder": {
"type": "alternating_lstm",
"input_size": 200,
"hidden_size": 300,
@@ -264,7 +264,7 @@ which needs to be a
[`Seq2SeqEncoder`](http://docs.allennlp.org/en/latest/api/allennlp.modules.seq2seq_encoders.html#allennlp.modules.seq2seq_encoders.seq2seq_encoder.Seq2SeqEncoder):
```js
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 100,
"hidden_size": 100,
@@ -37,7 +37,7 @@
"dropout": 0.2
}
},
"stacked_encoder": {
"encoder": {
"type": "lstm",
"input_size": 100,
"hidden_size": 100,
@@ -64,7 +64,7 @@ The baseline model uses a 200 dimensional input (100 dimensional GloVe embedding
ELMo provides a 1024 dimension representation so the new `input_size` is 1224.
```json
"stacked_encoder": {
"encoder": {
"type": "alternating_lstm",
"input_size": 1224,
"hidden_size": 300,

0 comments on commit acf21a5

Please sign in to comment.
You can’t perform that action at this time.