update with master

allenai · Nov 11, 2020 · 602399c · 602399c
2 parents ffafaf6 + 1e7492d
commit 602399c
Show file tree

Hide file tree

Showing 20 changed files with 911 additions and 122 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,22 +35,29 @@ dataset at every epoch) and a `MultiTaskScheduler` (for ordering the instances w
   the `MultiProcessDataLoading` this is controlled by the `max_instances_in_memory` setting.
 - `TensorField` is now implemented in terms of torch tensors, not numpy.
 
-### Fixed
-
-- Ignore `*args` when constructing classes with `FromParams`.
-- Ensured some consistency in the types of the values that metrics return.
-- `PretrainedTransformerTokenizer` will now never return a sequence that's longer than `max_length`, even with special tokens
-
 
 ## Unreleased (1.x branch)
 
+## [v1.2.1](https://github.com/allenai/allennlp/releases/tag/v1.2.1) - 2020-11-10
+
 ### Added
 
 - Added an optional `seed` parameter to `ModelTestCase.set_up_model` which sets the random
   seed for `random`, `numpy`, and `torch`.
+- Added support for a global plugins file at `~/.allennlp/plugins`.
+- Added more documentation about plugins.
+- Added sampler class and parameter in beam search for non-deterministic search, with several
+  implementations, including `MultinomialSampler`, `TopKSampler`, `TopPSampler`, and
+  `GumbelMaxSampler`. Utilizing `GumbelMaxSampler` will give [Stochastic Beam Search](https://api.semanticscholar.org/CorpusID:76662039).
+
+### Changed
+
+- Pass batch metrics to `BatchCallback`.
 
 ### Fixed
 
+- Fixed a bug where forward hooks were not cleaned up with saliency interpreters if there
+  was an exception.
 - Fixed the computation of saliency maps in the Interpret code when using mismatched indexing.
   Previously, we would compute gradients from the top of the transformer, after aggregation from
   wordpieces to tokens, which gives results that are not very informative.  Now, we compute gradients
@@ -59,7 +66,7 @@ dataset at every epoch) and a `MultiTaskScheduler` (for ordering the instances w
   `transformers` library broke our old heuristic.
 - Fixed typo with registered name of ROUGE metric. Previously was `rogue`, fixed to `rouge`.
 - Fixed default masks that were erroneously created on the CPU even when a GPU is available.
-
+- Fixed pretrained embeddings for transformers that don't use end tokens.
 
 ## [v1.2.0](https://github.com/allenai/allennlp/releases/tag/v1.2.0) - 2020-10-29
 

diff --git a/Makefile b/Makefile
@@ -110,6 +110,8 @@ $(MD_DOCS_ROOT)README.md : README.md
 	cp $< $@
 	# Alter the relative path of the README image for the docs.
 	$(SED) -i '1s/docs/./' $@
+	# Alter external doc links to relative links.
+	$(SED) -i 's|https://docs.allennlp.org/master/api/|/api/|' $@
 
 $(MD_DOCS_ROOT)%.md : %.md
 	cp $< $@

diff --git a/README.md b/README.md
@@ -60,6 +60,25 @@ In addition, there are external tutorials:
 
 And others on the [AI2 AllenNLP blog](https://medium.com/ai2-blog/allennlp/home).
 
+## Plugins
+
+AllenNLP supports loading "plugins" dynamically. A plugin is just a Python package that
+provides custom registered classes or additional `allennlp` subcommands.
+
+There are several official [default plugins](https://docs.allennlp.org/master/api/common/plugins/#default_plugins) and an ecosystem of third-party plugins, including:
+
+* [allennlp-optuna](https://github.com/himkt/allennlp-optuna)
+
+In order for AllenNLP to find personal or third-party plugins, you have to create either a local plugins file named `.allennlp_plugins`
+in the directory where the `allennlp` command is run, or a global plugins file at `~/.allennlp/plugins`.
+The file should list the plugin modules that you want to be loaded, one per line.
+
+To test that your plugins can be found and imported by AllenNLP, you can run the `allennlp test-install` command.
+Each discovered plugin will be logged to the terminal.
+
+For more information about plugins, see the [plugins API docs](https://docs.allennlp.org/master/api/common/plugins/). And for information on how to create a custom subcommand
+to distribute as a plugin, see the [subcommand API docs](https://docs.allennlp.org/master/api/commands/subcommand/).
+
 ## Package Overview
 
 <table>

diff --git a/allennlp/commands/subcommand.py b/allennlp/commands/subcommand.py
@@ -26,8 +26,9 @@ class Subcommand(Registrable):
 
     requires_plugins: bool = True
     """
-    If `True`, the sub-command will trigger a call to `import_plugins` and will also
-    have an additional `--include-package` flag.
+    If `True`, the sub-command will trigger a call to `import_plugins()` (except for custom
+    subcommands which come from plugins, since plugins will already have been imported by the
+    time the subcommand is discovered), and will also have an additional `--include-package` flag.
     """
 
     _reverse_registry: Dict[Type, str] = {}

diff --git a/allennlp/common/plugins.py b/allennlp/common/plugins.py
@@ -1,46 +1,74 @@
 """
-Plugin management.
+# Plugin management.
 
 AllenNLP supports loading "plugins" dynamically. A plugin is just a Python package that
-can be found and imported by AllenNLP. This is done by creating a file named `.allennlp_plugins`
-in the directory where the `allennlp` command is run that lists the modules that should be loaded,
-one per line.
+provides custom registered classes or additional `allennlp` subcommands.
+
+In order for AllenNLP to find your plugins, you have to create either a local plugins
+file named `.allennlp_plugins` in the directory where the `allennlp` command is run, or a global
+plugins file at `~/.allennlp/plugins`. The file should list the plugin modules that you want to
+be loaded, one per line.
 """
 
 import importlib
 import logging
 import os
+from pathlib import Path
 import sys
-from typing import Iterable
+from typing import Iterable, Set
 
 from allennlp.common.util import push_python_path, import_module_and_submodules
 
+
 logger = logging.getLogger(__name__)
 
 
+LOCAL_PLUGINS_FILENAME = ".allennlp_plugins"
+"""
+Local plugin files should have this name.
+"""
+
+GLOBAL_PLUGINS_FILENAME = str(Path.home() / ".allennlp" / "plugins")
+"""
+The global plugins file will be found here.
+"""
+
 DEFAULT_PLUGINS = ("allennlp_models", "allennlp_server")
+"""
+Default plugins do not need to be declared in a plugins file. They will always
+be imported when they are installed in the current Python environment.
+"""
 
 
-def discover_file_plugins(plugins_filename: str = ".allennlp_plugins") -> Iterable[str]:
+def discover_file_plugins(plugins_filename: str = LOCAL_PLUGINS_FILENAME) -> Iterable[str]:
     """
     Returns an iterable of the plugins found, declared within a file whose path is `plugins_filename`.
     """
-    if os.path.isfile(plugins_filename):
-        with open(plugins_filename) as file_:
-            for module_name in file_.readlines():
-                module_name = module_name.strip()
-                if module_name:
-                    yield module_name
-    else:
-        return []
+    with open(plugins_filename) as file_:
+        for module_name in file_.readlines():
+            module_name = module_name.strip()
+            if module_name:
+                yield module_name
 
 
 def discover_plugins() -> Iterable[str]:
     """
     Returns an iterable of the plugins found.
     """
-    with push_python_path("."):
-        yield from discover_file_plugins()
+    plugins: Set[str] = set()
+    if os.path.isfile(LOCAL_PLUGINS_FILENAME):
+        with push_python_path("."):
+            for plugin in discover_file_plugins(LOCAL_PLUGINS_FILENAME):
+                if plugin in plugins:
+                    continue
+                yield plugin
+                plugins.add(plugin)
+    if os.path.isfile(GLOBAL_PLUGINS_FILENAME):
+        for plugin in discover_file_plugins(GLOBAL_PLUGINS_FILENAME):
+            if plugin in plugins:
+                continue
+            yield plugin
+            plugins.add(plugin)
 
 
 def import_plugins() -> None:

diff --git a/allennlp/data/token_indexers/pretrained_transformer_indexer.py b/allennlp/data/token_indexers/pretrained_transformer_indexer.py
@@ -164,7 +164,9 @@ def _postprocess_output(self, output: IndexedTokenList) -> IndexedTokenList:
 
             indices = output["token_ids"]
             # Strips original special tokens
-            indices = indices[self._num_added_start_tokens : -self._num_added_end_tokens]
+            indices = indices[
+                self._num_added_start_tokens : len(indices) - self._num_added_end_tokens
+            ]
             # Folds indices
             folded_indices = [
                 indices[i : i + self._effective_max_length]

diff --git a/allennlp/data/tokenizers/pretrained_transformer_tokenizer.py b/allennlp/data/tokenizers/pretrained_transformer_tokenizer.py
@@ -118,6 +118,12 @@ def _reverse_engineer_special_tokens(
             return_token_type_ids=True,
             return_attention_mask=False,
         )
+        if len(dummy_output["token_type_ids"]) != len(dummy_output["input_ids"]):
+            logger.warning(
+                "Tokenizer library did not return valid token type ids. We will assume they are all zero."
+            )
+            dummy_output["token_type_ids"] = [0] * len(dummy_output["input_ids"])
+
         dummy_a = self.tokenizer.encode(token_a, add_special_tokens=False)[0]
         assert dummy_a in dummy_output["input_ids"]
         dummy_b = self.tokenizer.encode(token_b, add_special_tokens=False)[0]
@@ -176,6 +182,11 @@ def _reverse_engineer_special_tokens(
             return_token_type_ids=True,
             return_attention_mask=False,
         )
+        if len(dummy_output["token_type_ids"]) != len(dummy_output["input_ids"]):
+            logger.warning(
+                "Tokenizer library did not return valid token type ids. We will assume they are all zero."
+            )
+            dummy_output["token_type_ids"] = [0] * len(dummy_output["input_ids"])
 
         seen_dummy_a = False
         for token_id, token_type_id in zip(

diff --git a/allennlp/interpret/saliency_interpreters/integrated_gradient.py b/allennlp/interpret/saliency_interpreters/integrated_gradient.py
@@ -88,9 +88,11 @@ def _integrate_gradients(self, instance: Instance) -> Dict[str, numpy.ndarray]:
             # Hook for modifying embedding value
             handles = self._register_hooks(alpha, embeddings_list, token_offsets)
 
-            grads = self.predictor.get_gradients([instance])[0]
-            for handle in handles:
-                handle.remove()
+            try:
+                grads = self.predictor.get_gradients([instance])[0]
+            finally:
+                for handle in handles:
+                    handle.remove()
 
             # Running sum of gradients
             if ig_grads == {}:

diff --git a/allennlp/interpret/saliency_interpreters/simple_gradient.py b/allennlp/interpret/saliency_interpreters/simple_gradient.py
@@ -30,9 +30,11 @@ def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
 
             # Hook used for saving embeddings
             handles = self._register_hooks(embeddings_list, token_offsets)
-            grads = self.predictor.get_gradients([instance])[0]
-            for handle in handles:
-                handle.remove()
+            try:
+                grads = self.predictor.get_gradients([instance])[0]
+            finally:
+                for handle in handles:
+                    handle.remove()
 
             # Gradients come back in the reverse order that they were sent into the network
             embeddings_list.reverse()

diff --git a/allennlp/interpret/saliency_interpreters/smooth_gradient.py b/allennlp/interpret/saliency_interpreters/smooth_gradient.py
@@ -72,8 +72,10 @@ def _smooth_grads(self, instance: Instance) -> Dict[str, numpy.ndarray]:
         total_gradients: Dict[str, Any] = {}
         for _ in range(self.num_samples):
             handle = self._register_forward_hook(self.stdev)
-            grads = self.predictor.get_gradients([instance])[0]
-            handle.remove()
+            try:
+                grads = self.predictor.get_gradients([instance])[0]
+            finally:
+                handle.remove()
 
             # Sum gradients
             if total_gradients == {}:

diff --git a/allennlp/modules/token_embedders/pretrained_transformer_embedder.py b/allennlp/modules/token_embedders/pretrained_transformer_embedder.py
@@ -322,7 +322,7 @@ def lengths_to_mask(lengths, max_len, device):
 
         embeddings = embeddings.reshape(batch_size, num_segments, self._max_length, embedding_size)
         embeddings = embeddings[
-            :, :, self._num_added_start_tokens : -self._num_added_end_tokens, :
+            :, :, self._num_added_start_tokens : embeddings.size(2) - self._num_added_end_tokens, :
         ]  # truncate segment-level start/end tokens
         embeddings = embeddings.reshape(batch_size, -1, embedding_size)  # flatten