Skip to content
Permalink
Browse files

Remove constraint_type parameter from CRF Tagger (#2208)

  • Loading branch information...
schmmd committed Dec 19, 2018
1 parent eff25a3 commit 4cc4b6b62773df49c394180841f43b7e66dac14b
@@ -1,5 +1,4 @@
from typing import Dict, Optional, List, Any
import warnings

from overrides import overrides
import torch
@@ -39,17 +38,6 @@ class CrfTagger(Model):
Label encoding to use when calculating span f1 and constraining
the CRF at decoding time . Valid options are "BIO", "BIOUL", "IOB1", "BMES".
Required if ``calculate_span_f1`` or ``constrain_crf_decoding`` is true.
constraint_type : ``str``, optional (default=``None``)
If provided, the CRF will be constrained at decoding time
to produce valid labels based on the specified type
(e.g. "BIO", or "BIOUL").
.. deprecated:: 0.6.1
``constraint_type`` was deprecated and replaced with
``label_encoding``, ``constrain_crf_decoding``, and
``calculate_span_f1`` in version 0.6.1. It will be removed
in version 0.8.
include_start_end_transitions : ``bool``, optional (default=``True``)
Whether to include start and end transition parameters in the CRF.
constrain_crf_decoding : ``bool``, optional (default=``None``)
@@ -81,7 +69,6 @@ def __init__(self, vocab: Vocabulary,
label_namespace: str = "labels",
feedforward: Optional[FeedForward] = None,
label_encoding: Optional[str] = None,
constraint_type: Optional[str] = None,
include_start_end_transitions: bool = True,
constrain_crf_decoding: bool = None,
calculate_span_f1: bool = None,
@@ -109,13 +96,6 @@ def __init__(self, vocab: Vocabulary,
self.tag_projection_layer = TimeDistributed(Linear(output_dim,
self.num_tags))

if constraint_type is not None:
warnings.warn("'constraint_type' was removed and replaced with"
"'label_encoding', 'constrain_crf_decoding', and "
"'calculate_span_f1' in version 0.6.1. It will be "
"removed in version 0.8.", DeprecationWarning)
label_encoding = constraint_type

# if constrain_crf_decoding and calculate_span_f1 are not
# provided, (i.e., they're None), set them to True
# if label_encoding is provided and False if it isn't.
@@ -152,11 +132,6 @@ def __init__(self, vocab: Vocabulary,
self._f1_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=label_encoding)
elif constraint_type is not None:
# Maintain deprecated behavior if constraint_type is provided
self._f1_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=constraint_type)

check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
"text field embedding dim", "encoder input dim")
@@ -47,7 +47,7 @@ def neural_coreference_resolution_lee_2017() -> predictors.CorefPredictor:
return model.predictor() # type: ignore

def named_entity_recognition_with_elmo_peters_2018() -> predictors.SentenceTaggerPredictor:
model = PretrainedModel('https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.30.tar.gz',
model = PretrainedModel('https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2019.12.18.tar.gz',
'sentence-tagger')
return model.predictor() # type: ignore

Binary file not shown.
@@ -22,7 +22,7 @@
"validation_data_path": "allennlp/tests/fixtures/data/quarel.jsonl",
"model": {
"type": "crf_tagger",
"constraint_type": "BIO",
"label_encoding": "BIO",
"dropout": 0.2,
"include_start_end_transitions": false,
"text_field_embedder": {
Binary file not shown.
@@ -25,34 +25,6 @@ def test_simple_tagger_can_train_save_and_conll2000(self):
self.ensure_model_can_train_save_and_load(
self.FIXTURES_ROOT / 'crf_tagger' / 'experiment_conll2000.json')

def test_simple_tagger_constraint_type_deprecated(self):
params = Params({"model": {
"type": "crf_tagger",
"constraint_type": "IOB1",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50
},
}
},
"encoder": {
"type": "gru",
"input_size": 50,
"hidden_size": 10,
"num_layers": 2,
"dropout": 0.5,
"bidirectional": True
}}})
with pytest.warns(DeprecationWarning):
model = Model.from_params(vocab=self.vocab,
params=params.pop("model"))
assert model._f1_metric is not None
assert model._f1_metric._label_encoding == "IOB1"
assert model.label_encoding == "IOB1"
assert model.crf._constraint_mask.sum().item() != (model.num_tags + 2)**2

@flaky
def test_batch_predictions_are_consistent(self):
self.ensure_batch_predictions_are_consistent()
@@ -31,7 +31,7 @@
"validation_data_path": std.extVar("NER_TEST_A_PATH"),
"model": {
"type": "crf_tagger",
"constraint_type": "BIOUL",
"label_encoding": "BIOUL",
"dropout": 0.5,
"include_start_end_transitions": false,
"text_field_embedder": {
@@ -23,7 +23,7 @@
"validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/quarel/quarel-v1-dev.jsonl",
"model": {
"type": "crf_tagger",
"constraint_type": "BIO",
"label_encoding": "BIO",
"dropout": 0.2,
"include_start_end_transitions": false,
"text_field_embedder": {
@@ -177,8 +177,6 @@ We don't *need* to, but we also make a few other changes
treated as out-of-vocabulary at evaluation time. The second flag just evaluates
the model on the test set when training stops. Use this flag cautiously,
when you're doing real science you don't want to evaluate on your test set too often.
* if you want to specify constraints for the CRF, you can add a `"model.constraint_type"`
to your config that indicates what sort of constraints the CRF tagger should use.


## Putting It All Together

0 comments on commit 4cc4b6b

Please sign in to comment.
You can’t perform that action at this time.