Skip to content
Permalink
Browse files

Match vocab generation in currently online Event2Mind model. (#1978)

- I noticed this difference while retraining the model to account for #1944.
- With this change we generate the exact same vocab as was used for the 10/5 model and now the new 10/26 model.
- In the long run we'll want to be more principled with our vocab. In the short term I'm more concerned with ensuring that we can retrain consistently.
  • Loading branch information...
brendan-ai2 committed Oct 29, 2018
1 parent bc97ce8 commit c262ef5a0f805d7f0fd7ef52b2ab382a43a0b972
@@ -104,14 +104,15 @@ def _read(self, file_path):
)
# Generate instances where each token of input appears once.
else:
# To the extent that sources are duplicated in the dataset
# (which appears common), we will duplicate them here.
yield self.text_to_instance(source_sequence, "none", "none", "none")
for xintent in xintents:
# NOTE: source_sequence should really be broken out and deduplicated. We're
# adding it here to ensure we generate the same vocabulary as the model at
# https://s3-us-west-2.amazonaws.com/allennlp/models/event2mind-2018.10.05.tar.gz
# was trained against.
yield self.text_to_instance(source_sequence, xintent, "none", "none")
for xreact in xreacts:
# Since "none" is a special token we don't mind it
# appearing a disproportionate number of times.
yield self.text_to_instance("none", xintent, "none", "none")
for xreact in xreacts:
yield self.text_to_instance("none", "none", xreact, "none")
for oreact in oreacts:
yield self.text_to_instance("none", "none", "none", oreact)
@@ -74,42 +74,36 @@ def test_read_with_dummy_instances_for_vocab_generation(self, lazy):
)
instances = ensure_list(instances)

assert len(instances) == 21
assert len(instances) == 17
instance = instances[0]
assert get_text("source", instance) == ["@start@", "it", "is", "personx", "'s",
"favorite", "animal", "@end@"]
assert get_text("xintent", instance) == ["@start@", "none", "@end@"]
assert get_text("xreact", instance) == ["@start@", "none", "@end@"]
assert get_text("oreact", instance) == ["@start@", "none", "@end@"]

instance = instances[6]
instance = instances[5]
assert get_text("source", instance) == ["@start@", "personx", "drives",
"persony", "'s", "truck", "@end@"]
assert get_text("xintent", instance) == ["@start@", "none", "@end@"]
assert get_text("xreact", instance) == ["@start@", "none", "@end@"]
assert get_text("oreact", instance) == ["@start@", "none", "@end@"]

instance = instances[7]
assert get_text("source", instance) == ["@start@", "none", "@end@"]
assert get_text("xintent", instance) == ["@start@", "move", "@end@"]
assert get_text("xreact", instance) == ["@start@", "none", "@end@"]
assert get_text("oreact", instance) == ["@start@", "none", "@end@"]

instance = instances[9]
instance = instances[7]
assert get_text("source", instance) == ["@start@", "none", "@end@"]
assert get_text("xintent", instance) == ["@start@", "none", "@end@"]
assert get_text("xreact", instance) == ["@start@", "grateful", "@end@"]
assert get_text("oreact", instance) == ["@start@", "none", "@end@"]

instance = instances[11]
instance = instances[9]
assert get_text("source", instance) == ["@start@", "none", "@end@"]
assert get_text("xintent", instance) == ["@start@", "none", "@end@"]
assert get_text("xreact", instance) == ["@start@", "none", "@end@"]
assert get_text("oreact", instance) == ["@start@", "charitable", "@end@"]

instance = instances[13]
assert get_text("source", instance) == ["@start@", "personx", "gets", "persony",
"'s", "mother", "@end@"]
assert get_text("xintent", instance) == ["@start@", "none", "@end@"]
instance = instances[14]
assert get_text("source", instance) == ["@start@", "personx", "drives",
"persony", "'s", "truck", "@end@"]
assert get_text("xintent", instance) == ["@start@", "for", "fun", "@end@"]
assert get_text("xreact", instance) == ["@start@", "none", "@end@"]
assert get_text("oreact", instance) == ["@start@", "none", "@end@"]

0 comments on commit c262ef5

Please sign in to comment.
You can’t perform that action at this time.