huggingface · patrickvonplaten · Jul 7, 2020 · Jun 30, 2020 · Jun 30, 2020 · Jun 30, 2020
diff --git a/examples/token-classification/run_tf_ner.py b/examples/token-classification/run_tf_ner.py
@@ -184,7 +184,7 @@ def align_predictions(predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[L
 
         for i in range(batch_size):
             for j in range(seq_len):
-                if label_ids[i, j] != -1:
+                if label_ids[i, j] != -100:
                     out_label_list[i].append(label_map[label_ids[i][j]])
                     preds_list[i].append(label_map[preds[i][j]])
 

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -421,13 +421,19 @@
         TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
         TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
         TF_MODEL_WITH_LM_HEAD_MAPPING,
+        TF_MODEL_FOR_CAUSAL_LM_MAPPING,
+        TF_MODEL_FOR_MASKED_LM_MAPPING,
+        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
         TFAutoModel,
         TFAutoModelForMultipleChoice,
         TFAutoModelForPreTraining,
         TFAutoModelForQuestionAnswering,
         TFAutoModelForSequenceClassification,
         TFAutoModelForTokenClassification,
         TFAutoModelWithLMHead,
+        TFAutoModelForCausalLM,
+        TFAutoModelForMaskedLM,
+        TFAutoModelForSeq2SeqLM,
     )
 
     from .modeling_tf_albert import (
@@ -446,6 +452,7 @@
     from .modeling_tf_bert import (
         TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST,
         TFBertEmbeddings,
+        TFBertLMHeadModel,
         TFBertForMaskedLM,
         TFBertForMultipleChoice,
         TFBertForNextSentencePrediction,

diff --git a/src/transformers/modeling_auto.py b/src/transformers/modeling_auto.py
@@ -73,6 +73,7 @@
 from .modeling_camembert import (
     CamembertForMaskedLM,
     CamembertForMultipleChoice,
+    CamembertForQuestionAnswering,
     CamembertForSequenceClassification,
     CamembertForTokenClassification,
     CamembertModel,
@@ -300,6 +301,7 @@
     [
         (DistilBertConfig, DistilBertForQuestionAnswering),
         (AlbertConfig, AlbertForQuestionAnswering),
+        (CamembertConfig, CamembertForQuestionAnswering),
         (BartConfig, BartForQuestionAnswering),
         (LongformerConfig, LongformerForQuestionAnswering),
         (XLMRobertaConfig, XLMRobertaForQuestionAnswering),
@@ -329,7 +331,6 @@
     ]
 )
 
-
 MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
     [
         (CamembertConfig, CamembertForMultipleChoice),

diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py
@@ -29,6 +29,7 @@
 )
 from .modeling_tf_bert import ACT2FN, TFBertSelfAttention
 from .modeling_tf_utils import (
+    TFMaskedLanguageModelingLoss,
     TFMultipleChoiceLoss,
     TFPreTrainedModel,
     TFQuestionAnsweringLoss,
@@ -822,7 +823,7 @@ def call(self, pooled_output, training: bool):
 
 
 @add_start_docstrings("""Albert Model with a `language modeling` head on top. """, ALBERT_START_DOCSTRING)
-class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
+class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss):
     def __init__(self, config, *inputs, **kwargs):
         super().__init__(config, *inputs, **kwargs)
 
@@ -834,8 +835,26 @@ def get_output_embeddings(self):
 
     @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
-    def call(self, inputs, **kwargs):
+    def call(
+        self,
+        inputs=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        labels=None,
+        training=False,
+    ):
         r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the masked language modeling loss.
+            Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
+            Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
+            in ``[0, ..., config.vocab_size]``
+
     Returns:
         :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs:
         prediction_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`
@@ -852,14 +871,35 @@ def call(self, inputs, **kwargs):
             Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
             heads.
         """
-        outputs = self.albert(inputs, **kwargs)
+        if isinstance(inputs, (tuple, list)):
+            labels = inputs[8] if len(inputs) > 8 else labels
+            if len(inputs) > 8:
+                inputs = inputs[:8]
+        elif isinstance(inputs, (dict, BatchEncoding)):
+            labels = inputs.pop("labels", labels)
+
+        outputs = self.albert(
+            inputs,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            training=training,
+        )
 
         sequence_output = outputs[0]
-        prediction_scores = self.predictions(sequence_output, training=kwargs.get("training", False))
+        prediction_scores = self.predictions(sequence_output, training=training)
 
         # Add hidden states and attention if they are here
         outputs = (prediction_scores,) + outputs[2:]
 
+        if labels is not None:
+            loss = self.compute_loss(labels, prediction_scores)
+            outputs = (loss,) + outputs
+
         return outputs  # prediction_scores, (hidden_states), (attentions)