diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index ff40551da12820..32313e1d8f21a4 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -278,23 +278,9 @@ def booleans_processing(config, **kwargs): if "use_cache" in kwargs: final_booleans["use_cache"] = kwargs["use_cache"] if kwargs["use_cache"] is not None else config.use_cache - else: - if ( - kwargs["output_attentions"] is not None - or kwargs["output_hidden_states"] is not None - or ("use_cache" in kwargs and kwargs["use_cache"] is not None) - ): - tf.print( - "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model." - "They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`)." - ) - final_booleans["output_attentions"] = config.output_attentions final_booleans["output_hidden_states"] = config.output_hidden_states - - if kwargs["return_dict"] is not None: - tf.print("The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.") final_booleans["return_dict"] = True if "use_cache" in kwargs: diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 108f55dcf602c2..54bb69eba4f775 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -802,12 +802,15 @@ class TFAlbertForPreTrainingOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index ad72b06bd0b30c..8c5e641a59ff55 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -574,12 +574,15 @@ def serving(self, inputs): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 01d51ddaa52dd8..890b362ac49ca7 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -881,12 +881,15 @@ class TFBertForPreTrainingOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 669c211c560183..cffc6095ad174e 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -571,12 +571,15 @@ def serving(self, inputs): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -661,12 +664,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -854,12 +863,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 710697f58adf29..a1b5d26dbdd7bd 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -576,12 +576,15 @@ def serving(self, inputs): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -666,12 +669,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -859,12 +868,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 8b29c7f4bc9930..81930b83fb6090 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -516,12 +516,15 @@ class TFCTRLPreTrainedModel(TFPreTrainedModel): ``past``). output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 64786f3ed96bc6..3a439766213d6a 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -585,12 +585,15 @@ def serving(self, inputs): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 2d6e01a7b9e6df..cc595b85cda378 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -523,12 +523,18 @@ def serving(self, inputs): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ TF_DPR_READER_INPUTS_DOCSTRING = r""" @@ -556,14 +562,16 @@ def serving(self, inputs): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert :obj:`input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`): - Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. output_hidden_states (:obj:`bool`, `optional`): - Whether or not to rturn the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index a75c406170092d..5198943f9b847e 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -773,12 +773,15 @@ class TFElectraForPreTrainingOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index f24dfa747380a0..db30f188a8c3c8 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -156,12 +156,15 @@ vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 819b553d3fb4e9..676b9b769ab9e4 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -1131,12 +1131,15 @@ class TFFunnelForPreTrainingOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 2012cbb0a66c9d..6c6fcacb0ad098 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -552,12 +552,15 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index f4a0a524058474..9896d0bc2ec016 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1451,12 +1451,15 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index c9f2838fb2e56f..4f4e449c56d3ea 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2007,12 +2007,15 @@ def serving(self, inputs): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 16b72f2466be0a..0493ef5cdddd4c 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -1015,12 +1015,15 @@ def serving(self, inputs): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 78fac03a5d398e..16ec9269ab3526 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -595,12 +595,15 @@ def serving(self, inputs): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -683,12 +686,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -872,12 +881,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 6f77248f70a534..8562b0eb173abd 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -552,12 +552,15 @@ def serving(self, inputs): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -672,12 +675,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -867,12 +876,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 40351514057d60..386dfbee6b108f 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -1009,12 +1009,15 @@ class TFMobileBertForPreTrainingOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index e1ff0ba7015a17..33f90cfcf9dbba 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -748,12 +748,15 @@ def call( vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 6a725e116e2604..1c4729fc6cbb33 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -485,12 +485,15 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index b36e36c4371300..8d009329cb8a33 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -598,14 +598,16 @@ def serving(self, inputs): use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation - output_attentions (:obj:`bool`, `optional`): - Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all + attention layers. See ``attentions`` under returned tensors for more detail. This argument can be used only + in eager mode, in graph mode the value in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -689,12 +691,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -881,12 +889,18 @@ def call( into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index e8f3e18880f5aa..9580d6b02c34be 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -784,12 +784,15 @@ def serving(self, inputs): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 50c792fbc5fa3f..9e6b16bfc10cd6 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -999,12 +999,15 @@ def _shift_right(self, input_ids): decoding (see :obj:`past_key_values`). output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 959e80d65b6162..6936030af01862 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -828,12 +828,15 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 8cd3c7ef4814fb..e7812846bfd3de 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -669,12 +669,15 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 8269bb4ddae4b5..56fc4ecd2d007c 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -1133,12 +1133,15 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation).