Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Merge branch 'TransformerToolkitUpdates' into Tango
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkgr committed Jun 18, 2021
2 parents e93ef1d + 32fda86 commit 43a200b
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 27 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Expand Up @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `on_backward` training callback which allows for control over backpropagation and gradient manipulation.
- Added `AdversarialBiasMitigator`, a Model wrapper to adversarially mitigate biases in predictions produced by a pretrained model for a downstream task.
- Added `which_loss` parameter to `ensure_model_can_train_save_and_load` in `ModelTestCase` to specify which loss to test.
- The activation layer in the transformer toolkit now can be queried for its output dimension.
- `TransformerEmbeddings` now takes, but ignores, a parameter for the attention mask. This is needed for compatibility with some other modules that get called the same way and use the mask.
- `TransformerPooler` can now be instantiated from a pretrained transformer module, just like the other modules in the transformer toolkit.

### Fixed

Expand Down
3 changes: 2 additions & 1 deletion allennlp/modules/transformer/transformer_embeddings.py
Expand Up @@ -104,7 +104,7 @@ class TransformerEmbeddings(Embeddings):
Optionally apply a linear transform after the dropout, projecting to `output_size`.
"""

_pretrained_relevant_module = ["embeddings", "bert.embeddings"]
_pretrained_relevant_module = ["embeddings", "bert.embeddings", "roberta.embeddings"]
_pretrained_mapping = {
"LayerNorm": "layer_norm",
"word_embeddings": "embeddings.word_embeddings",
Expand Down Expand Up @@ -163,6 +163,7 @@ def forward( # type: ignore
attention_mask: Optional[torch.Tensor] = None,
token_type_ids: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:

"""
Expand Down
28 changes: 3 additions & 25 deletions allennlp/modules/transformer/transformer_pooler.py
@@ -1,4 +1,4 @@
from typing import Dict, Optional, Any, Union, TYPE_CHECKING
from typing import Union, TYPE_CHECKING

import torch

Expand All @@ -11,7 +11,7 @@

class TransformerPooler(ActivationLayer, FromParams):

_pretrained_relevant_module = ["pooler", "bert.pooler"]
_pretrained_relevant_module = ["pooler", "bert.pooler", "roberta.pooler"]

def __init__(
self,
Expand All @@ -21,28 +21,6 @@ def __init__(
):
super().__init__(hidden_size, intermediate_size, activation, pool=True)

@classmethod
def _get_input_arguments(
cls,
pretrained_module: torch.nn.Module,
source: str = "huggingface",
mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> Dict[str, Any]:
final_kwargs = {}

final_kwargs["hidden_size"] = pretrained_module.dense.in_features
final_kwargs["intermediate_size"] = pretrained_module.dense.out_features
final_kwargs["activation"] = pretrained_module.activation

final_kwargs.update(kwargs)

return final_kwargs

@classmethod
def _from_config(cls, config: "PretrainedConfig", **kwargs):
return cls(
config.hidden_size,
config.hidden_size,
"tanh" # BERT has this hardcoded
)
return cls(config.hidden_size, config.hidden_size, "tanh") # BERT has this hardcoded
2 changes: 1 addition & 1 deletion allennlp/modules/transformer/transformer_stack.py
Expand Up @@ -56,7 +56,7 @@ class TransformerStack(TransformerModule, FromParams):
"""

_pretrained_mapping = {"layer": "layers"}
_pretrained_relevant_module = ["encoder", "bert.encoder"]
_pretrained_relevant_module = ["encoder", "bert.encoder", "roberta.encoder"]

def __init__(
self,
Expand Down

0 comments on commit 43a200b

Please sign in to comment.