Minor fixes for interpret code (#3260)

* Minor fixes for interpret code * pylint caught a bug!
allenai · Sep 18, 2019 · 3e0bad4 · 3e0bad4
1 parent 05be16a
commit 3e0bad4
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 2 deletions.
diff --git a/allennlp/data/token_indexers/pretrained_transformer_indexer.py b/allennlp/data/token_indexers/pretrained_transformer_indexer.py
@@ -48,6 +48,7 @@ def __init__(self,
         elif model_name.endswith("-uncased") and not do_lowercase:
             logger.warning("Your pretrained model appears to be uncased, "
                            "but your indexer is not lowercasing tokens.")
+        self._model_name = model_name
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=do_lowercase)
         self._namespace = namespace
         self._added_to_vocabulary = False
@@ -91,3 +92,15 @@ def as_padded_tensor(self,
                                                              desired_num_tokens[key],
                                                              default_value=lambda: self._padding_value))
                 for key, val in tokens.items()}
+
+    def __eq__(self, other):
+        if isinstance(other, PretrainedTransformerIndexer):
+            for key in self.__dict__:
+                if key == 'tokenizer':
+                    # This is a reference to a function in the huggingface code, which we can't
+                    # really modify to make this clean.  So we special-case it.
+                    continue
+                if self.__dict__[key] != other.__dict__[key]:
+                    return False
+            return True
+        return NotImplemented
diff --git a/allennlp/nn/util.py b/allennlp/nn/util.py
@@ -1484,11 +1484,14 @@ def find_embedding_layer(model: torch.nn.Module) -> torch.nn.Module:
     """
     # We'll look for a few special cases in a first pass, then fall back to just finding a
     # TextFieldEmbedder in a second pass if we didn't find a special case.
-    from pytorch_pretrained_bert.modeling import BertEmbeddings
+    from pytorch_pretrained_bert.modeling import BertEmbeddings as BertEmbeddingsOld
     from pytorch_transformers.modeling_gpt2 import GPT2Model
+    from pytorch_transformers.modeling_bert import BertEmbeddings as BertEmbeddingsNew
     from allennlp.modules.text_field_embedders.text_field_embedder import TextFieldEmbedder
     for module in model.modules():
-        if isinstance(module, BertEmbeddings):
+        if isinstance(module, BertEmbeddingsOld):
+            return module.word_embeddings
+        if isinstance(module, BertEmbeddingsNew):
             return module.word_embeddings
         if isinstance(module, GPT2Model):
             return module.wte