Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TAPAS MLM-only models #13408

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,7 @@
"TapasForSequenceClassification",
"TapasModel",
"TapasPreTrainedModel",
"load_tf_weights_in_tapas",
]
)
_import_structure["models.transfo_xl"].extend(
Expand Down Expand Up @@ -2771,6 +2772,7 @@
TapasForSequenceClassification,
TapasModel,
TapasPreTrainedModel,
load_tf_weights_in_tapas,
)
from .models.transfo_xl import (
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/tapas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"TapasForSequenceClassification",
"TapasModel",
"TapasPreTrainedModel",
"load_tf_weights_in_tapas",
]


Expand All @@ -49,6 +50,7 @@
TapasForSequenceClassification,
TapasModel,
TapasPreTrainedModel,
load_tf_weights_in_tapas,
)

else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,22 +81,21 @@ def convert_tf_checkpoint_to_pytorch(
model = TapasForMaskedLM(config=config)
elif task == "INTERMEDIATE_PRETRAINING":
model = TapasModel(config=config)
else:
raise ValueError(f"Task {task} not supported.")

print(f"Building PyTorch model from configuration: {config}")

# Load weights from tf checkpoint
load_tf_weights_in_tapas(model, config, tf_checkpoint_path)

# Save pytorch-model (weights and configuration)
print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path[:-17])
model.save_pretrained(pytorch_dump_path)

# Save tokenizer files
dir_name = r"C:\Users\niels.rogge\Documents\Python projecten\tensorflow\Tensorflow models\SQA\Base\tapas_sqa_inter_masklm_base_reset"
tokenizer = TapasTokenizer(vocab_file=dir_name + r"\vocab.txt", model_max_length=512)

print(f"Save tokenizer files to {pytorch_dump_path}")
tokenizer.save_pretrained(pytorch_dump_path[:-17])
tokenizer = TapasTokenizer(vocab_file=tf_checkpoint_path[:-10] + "vocab.txt", model_max_length=512)
tokenizer.save_pretrained(pytorch_dump_path)

print("Used relative position embeddings:", model.config.reset_position_index_per_cell)

Expand Down
70 changes: 64 additions & 6 deletions src/transformers/models/tapas/modeling_tapas.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
if any(n in ["output_bias", "output_weights", "output_bias_cls", "output_weights_cls"] for n in name):
logger.info(f"Skipping {'/'.join(name)}")
continue
# in case the model is TapasForMaskedLM, we skip the pooler
if isinstance(model, TapasForMaskedLM):
if any(n in ["pooler"] for n in name):
logger.info(f"Skipping {'/'.join(name)}")
continue
# if first scope name starts with "bert", change it to "tapas"
if name[0] == "bert":
name[0] = "tapas"
Expand All @@ -207,7 +212,10 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
pointer = getattr(pointer, "bias")
# cell selection heads
elif scope_names[0] == "output_bias":
pointer = getattr(pointer, "output_bias")
if not isinstance(model, TapasForMaskedLM):
pointer = getattr(pointer, "output_bias")
else:
pointer = getattr(pointer, "bias")
elif scope_names[0] == "output_weights":
pointer = getattr(pointer, "output_weights")
elif scope_names[0] == "column_output_bias":
Expand Down Expand Up @@ -697,6 +705,56 @@ def forward(self, hidden_states):
return pooled_output


# Copied from transformers.models.bert.modeling_bert.BertPredictionHeadTransform with Bert->Tapas
class TapasPredictionHeadTransform(nn.Module):
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
if isinstance(config.hidden_act, str):
self.transform_act_fn = ACT2FN[config.hidden_act]
else:
self.transform_act_fn = config.hidden_act
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)

def forward(self, hidden_states):
hidden_states = self.dense(hidden_states)
hidden_states = self.transform_act_fn(hidden_states)
hidden_states = self.LayerNorm(hidden_states)
return hidden_states


# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->Tapas
class TapasLMPredictionHead(nn.Module):
def __init__(self, config):
super().__init__()
self.transform = TapasPredictionHeadTransform(config)

# The output weights are the same as the input embeddings, but there is
# an output-only bias for each token.
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

self.bias = nn.Parameter(torch.zeros(config.vocab_size))

# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self.decoder.bias = self.bias

def forward(self, hidden_states):
hidden_states = self.transform(hidden_states)
hidden_states = self.decoder(hidden_states)
return hidden_states


# Copied from transformers.models.bert.modeling_bert.BertOnlyMLMHead with Bert->Tapas
class TapasOnlyMLMHead(nn.Module):
def __init__(self, config):
super().__init__()
self.predictions = TapasLMPredictionHead(config)

def forward(self, sequence_output):
prediction_scores = self.predictions(sequence_output)
return prediction_scores


class TapasPreTrainedModel(PreTrainedModel):
"""
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
Expand Down Expand Up @@ -947,15 +1005,15 @@ def __init__(self, config):
super().__init__(config)

self.tapas = TapasModel(config, add_pooling_layer=False)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
self.cls = TapasOnlyMLMHead(config)

self.init_weights()

def get_output_embeddings(self):
return self.lm_head
return self.cls.predictions.decoder

def set_output_embeddings(self, word_embeddings):
self.lm_head = word_embeddings
def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings

@add_start_docstrings_to_model_forward(TAPAS_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC)
Expand Down Expand Up @@ -1020,7 +1078,7 @@ def forward(
)

sequence_output = outputs[0]
prediction_scores = self.lm_head(sequence_output)
prediction_scores = self.cls(sequence_output)

masked_lm_loss = None
if labels is not None:
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/utils/dummy_pt_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3406,6 +3406,10 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])


def load_tf_weights_in_tapas(*args, **kwargs):
requires_backends(load_tf_weights_in_tapas, ["torch"])


TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None


Expand Down