Merge branch 'master' into vision

allenai · Dec 16, 2020 · 3da8e62 · 3da8e62
2 parents 147fefe + a3732d0
commit 3da8e62
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -76,7 +76,7 @@ dataset at every epoch) and a `MultiTaskScheduler` (for ordering the instances w
 - Improved the band-aid solution for segmentation faults and the "ImportError: dlopen: cannot load any more object with static TLS" 
   by adding a `transformers` import.
 - Added safety checks for extracting tar files
-
+- Turned superfluous warning to info when extending the vocab in the embedding matrix, if no pretrained file was provided
 
 ## [v1.2.2](https://github.com/allenai/allennlp/releases/tag/v1.2.2) - 2020-11-17
 

diff --git a/Makefile b/Makefile
@@ -20,7 +20,7 @@ DOCKER_TEST_TORCH_VERSION = 'torch==1.7.0+cu110 torchvision==0.8.1+cu110 -f http
 DOCKER_TEST_DETECTRON_VERSION = 'detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu110/torch1.7/index.html'
 DOCKER_RUN_CMD = docker run --rm \
 		-v $$HOME/.allennlp:/root/.allennlp \
-		-v $$HOME/.cache/torch:/root/.cache/torch \
+		-v $$HOME/.cache/huggingface:/root/.cache/huggingface \
 		-v $$HOME/nltk_data:/root/nltk_data
 
 ifeq ($(shell uname),Darwin)

diff --git a/allennlp/modules/token_embedders/embedding.py b/allennlp/modules/token_embedders/embedding.py
@@ -253,7 +253,7 @@ def extend_vocab(
         vocab_namespace = vocab_namespace or self._vocab_namespace
         if not vocab_namespace:
             # It's not safe to default to "tokens" or any other namespace.
-            logging.info(
+            logger.info(
                 "Loading a model trained before embedding extension was implemented; "
                 "pass an explicit vocab namespace if you want to extend the vocabulary."
             )
@@ -285,19 +285,18 @@ def extend_vocab(
         elif is_url_or_existing_file(self._pretrained_file):
             extension_pretrained_file = self._pretrained_file
         # Case 4: no file is available, hope that pretrained embeddings weren't used in the first place and warn
-        else:
-            extra_info = (
-                f"Originally pretrained_file was at " f"{self._pretrained_file}. "
-                if self._pretrained_file
-                else ""
-            )
-            # It's better to warn here and not give error because there is no way to distinguish between
-            # whether pretrained-file wasn't used during training or user forgot to pass / passed incorrect
-            # mapping. Raising an error would prevent fine-tuning in the former case.
-            logging.warning(
+        elif self._pretrained_file is not None:
+            # Warn here instead of an exception to allow a fine-tuning even without the original pretrained_file
+            logger.warning(
                 f"Embedding at model_path, {model_path} cannot locate the pretrained_file. "
-                f"{extra_info} If you are fine-tuning and want to use using pretrained_file for "
-                f"embedding extension, please pass the mapping by --embedding-sources argument."
+                f"Originally pretrained_file was at '{self._pretrained_file}'."
+            )
+        else:
+            # When loading a model from archive there is no way to distinguish between whether a pretrained-file
+            # was or wasn't used during the original training. So we leave an info here.
+            logger.info(
+                "If you are fine-tuning and want to use a pretrained_file for "
+                "embedding extension, please pass the mapping by --embedding-sources argument."
             )
 
         embedding_dim = self.weight.data.shape[-1]