From 76cfebe19cdd37921ff25d36d1ccd05091386ae0 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Wed, 15 May 2024 20:06:03 +0000 Subject: [PATCH] Remove more kwargs --- .../models/cohere/modeling_cohere.py | 2 -- .../modeling_conditional_detr.py | 24 +------------------ src/transformers/models/detr/modeling_detr.py | 24 +------------------ .../models/llama/modeling_llama.py | 3 --- .../models/maskformer/modeling_maskformer.py | 17 +------------ src/transformers/models/olmo/modeling_olmo.py | 3 --- .../modeling_table_transformer.py | 13 +--------- 7 files changed, 4 insertions(+), 82 deletions(-) diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py index f194993c04c51..b0eaa71bd4cf3 100644 --- a/src/transformers/models/cohere/modeling_cohere.py +++ b/src/transformers/models/cohere/modeling_cohere.py @@ -634,7 +634,6 @@ def forward( output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, - **kwargs, ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: """ Args: @@ -663,7 +662,6 @@ def forward( output_attentions=output_attentions, use_cache=use_cache, cache_position=cache_position, - **kwargs, ) # Fully Connected diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 11eaffeb4e3bd..7fd04b8b43b72 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -556,9 +556,7 @@ def __init__( def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() - def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") + def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]): return tensor if object_queries is None else tensor + object_queries def forward( @@ -569,12 +567,8 @@ def forward( key_value_states: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - # if key_value_states are provided this layer is used as a cross-attention layer # for the decoder is_cross_attention = key_value_states is not None @@ -798,7 +792,6 @@ def forward( attention_mask: torch.Tensor, object_queries: torch.Tensor = None, output_attentions: bool = False, - **kwargs, ): """ Args: @@ -812,9 +805,6 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - residual = hidden_states hidden_states, attn_weights = self.self_attn( hidden_states=hidden_states, @@ -903,7 +893,6 @@ def forward( encoder_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, is_first: Optional[bool] = False, - **kwargs, ): """ Args: @@ -926,9 +915,6 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - residual = hidden_states # ========== Begin of Self-Attention ============= @@ -1170,7 +1156,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -1197,9 +1182,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1298,7 +1280,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -1335,9 +1316,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 732d403563aca..37da6b6ee5918 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -524,9 +524,7 @@ def __init__( def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() - def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") + def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]): return tensor if object_queries is None else tensor + object_queries def forward( @@ -537,12 +535,8 @@ def forward( key_value_states: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - # if key_value_states are provided this layer is used as a cross-attention layer # for the decoder is_cross_attention = key_value_states is not None @@ -648,7 +642,6 @@ def forward( attention_mask: torch.Tensor, object_queries: torch.Tensor = None, output_attentions: bool = False, - **kwargs, ): """ Args: @@ -662,9 +655,6 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - residual = hidden_states hidden_states, attn_weights = self.self_attn( hidden_states=hidden_states, @@ -734,7 +724,6 @@ def forward( encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - **kwargs, ): """ Args: @@ -757,9 +746,6 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - residual = hidden_states # Self Attention @@ -929,7 +915,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -956,9 +941,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -1050,7 +1032,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -1088,9 +1069,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index f80643139276f..c6f4b174e48cc 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -103,9 +103,6 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, s self.register_buffer("inv_freq", inv_freq, persistent=False) # For BC we register cos and sin cached self.max_seq_len_cached = max_position_embeddings - t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq) - t = t / self.scaling_factor - freqs = torch.outer(t, self.inv_freq) @torch.no_grad() def forward(self, x, position_ids): diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 3132c76a37af8..2c2746603ac63 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -440,10 +440,7 @@ def __init__( def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() - def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - + def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]): return tensor if object_queries is None else tensor + object_queries def forward( @@ -454,12 +451,8 @@ def forward( key_value_states: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - # if key_value_states are provided this layer is used as a cross-attention layer # for the decoder is_cross_attention = key_value_states is not None @@ -577,7 +570,6 @@ def forward( encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = False, - **kwargs, ): """ Args: @@ -600,9 +592,6 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - residual = hidden_states # Self Attention @@ -690,7 +679,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -727,9 +715,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index e4f9a0f2ea10a..4c3169107796b 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -100,9 +100,6 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, s self.register_buffer("inv_freq", inv_freq, persistent=False) # For BC we register cos and sin cached self.max_seq_len_cached = max_position_embeddings - t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq) - t = t / self.scaling_factor - freqs = torch.outer(t, self.inv_freq) @torch.no_grad() def forward(self, x, position_ids): diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index f004fff71b76f..3bfdde8907928 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -461,10 +461,7 @@ def __init__( def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() - def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - + def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]): return tensor if object_queries is None else tensor + object_queries def forward( @@ -475,12 +472,8 @@ def forward( key_value_states: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - # if key_value_states are provided this layer is used as a cross-attention layer # for the decoder is_cross_attention = key_value_states is not None @@ -981,7 +974,6 @@ def forward( output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Args: @@ -1019,9 +1011,6 @@ def forward( return_dict (`bool`, *optional*): Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ - if kwargs: - raise ValueError(f"Unexpected arguments {kwargs.keys()}") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states