diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index 1b531c048961..3f255248c1ba 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -977,13 +977,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, Data2VecAudioBaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1085,13 +1085,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1221,13 +1221,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1342,12 +1342,12 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, TokenClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1519,13 +1519,13 @@ def _conv_out_length(input_length, kernel_size, stride): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, XVectorOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 00ac994dd632..5af0197fb95c 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -1008,13 +1008,13 @@ def _mask_hidden_states( @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: """ Returns: @@ -1135,13 +1135,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1271,13 +1271,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index e3561f780b5b..ac2a6293cb95 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -914,13 +914,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1018,13 +1018,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1154,13 +1154,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index 4470a10ebcd1..a297e4c7b25b 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -1427,13 +1427,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1531,13 +1531,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1667,13 +1667,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index e7206b587524..94f8f0e3d2e6 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -1160,13 +1160,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, UniSpeechBaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1274,12 +1274,12 @@ def compute_contrastive_logits( @replace_return_docstrings(output_type=UniSpeechForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, UniSpeechForPreTrainingOutput]: r""" mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*): Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict @@ -1412,13 +1412,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1548,13 +1548,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index ae5f57d8a1e5..ea010b264794 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -1199,13 +1199,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, UniSpeechSatBaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1318,12 +1318,12 @@ def compute_contrastive_logits( @replace_return_docstrings(output_type=UniSpeechSatForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, UniSpeechSatForPreTrainingOutput]: r""" Returns: @@ -1440,13 +1440,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1576,13 +1576,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1697,12 +1697,12 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, TokenClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1874,13 +1874,13 @@ def _conv_out_length(input_length, kernel_size, stride): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, XVectorOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 08a86d540311..f58ec9a3363e 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1331,13 +1331,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, Wav2Vec2BaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1448,14 +1448,14 @@ def compute_contrastive_logits( @replace_return_docstrings(output_type=Wav2Vec2ForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - sampled_negative_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.BoolTensor] = None, + sampled_negative_indices: Optional[torch.BoolTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, Wav2Vec2ForPreTrainingOutput]: r""" mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*): Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict @@ -1696,13 +1696,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1831,13 +1831,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1951,12 +1951,12 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, TokenClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -2125,13 +2125,13 @@ def _conv_out_length(input_length, kernel_size, stride): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, XVectorOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index e6b47056ce2e..c2eb193160ad 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -1282,13 +1282,13 @@ def _mask_hidden_states( ) def forward( self, - input_values, - attention_mask=None, - mask_time_indices=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + mask_time_indices: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, WavLMBaseModelOutput]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1390,13 +1390,13 @@ def freeze_feature_encoder(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, CausalLMOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*): Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to @@ -1526,13 +1526,13 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1647,12 +1647,12 @@ def freeze_base_model(self): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, TokenClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -1824,13 +1824,13 @@ def _conv_out_length(input_length, kernel_size, stride): ) def forward( self, - input_values, - attention_mask=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - ): + input_values: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + ) -> Union[Tuple, XVectorOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,