From 7ee957205dd4df381f6eb2f1d33a20d1d54bce38 Mon Sep 17 00:00:00 2001 From: hugehope <166521727+hugehope@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:49:36 +0800 Subject: [PATCH] chore: remove repetitive words (#30174) Signed-off-by: hugehope --- src/transformers/models/canine/modeling_canine.py | 2 +- src/transformers/models/mamba/configuration_mamba.py | 2 +- src/transformers/models/rwkv/configuration_rwkv.py | 2 +- src/transformers/optimization.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index 023287153afc3..39d89c6e0b3da 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -608,7 +608,7 @@ def forward( chunk_end = min(from_seq_length, chunk_start + self.attend_from_chunk_width) from_chunks.append((chunk_start, chunk_end)) - # Determine the chunks (windows) that will will attend *to*. + # Determine the chunks (windows) that will attend *to*. to_chunks = [] if self.first_position_attends_to_all: to_chunks.append((0, to_seq_length)) diff --git a/src/transformers/models/mamba/configuration_mamba.py b/src/transformers/models/mamba/configuration_mamba.py index 695d9a62737dc..b3e9b4eb946b9 100644 --- a/src/transformers/models/mamba/configuration_mamba.py +++ b/src/transformers/models/mamba/configuration_mamba.py @@ -67,7 +67,7 @@ class MambaConfig(PretrainedConfig): residual_in_fp32 (`bool`, *optional*, defaults to `True`): Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model time_step_rank (`Union[int,str]`, *optional*, defaults to `"auto"`): - Rank of the the discretization projection matrix. `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)` + Rank of the discretization projection matrix. `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)` time_step_scale (`float`, *optional*, defaults to 1.0): Scale used used to scale `dt_proj.bias`. time_step_min (`float`, *optional*, defaults to 0.001): diff --git a/src/transformers/models/rwkv/configuration_rwkv.py b/src/transformers/models/rwkv/configuration_rwkv.py index a6abfc549e667..5e0598dad5c42 100644 --- a/src/transformers/models/rwkv/configuration_rwkv.py +++ b/src/transformers/models/rwkv/configuration_rwkv.py @@ -41,7 +41,7 @@ class RwkvConfig(PretrainedConfig): Vocabulary size of the RWKV model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`RwkvModel`]. context_length (`int`, *optional*, defaults to 1024): - The maximum sequence length that this model can be be used with in a single forward (using it in RNN mode + The maximum sequence length that this model can be used with in a single forward (using it in RNN mode lets use any sequence length). hidden_size (`int`, *optional*, defaults to 4096): Dimensionality of the embeddings and hidden states. diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index ce9f9b78dcebe..3727784fba9ee 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -273,7 +273,7 @@ def get_polynomial_decay_schedule_with_warmup( lr_init = optimizer.defaults["lr"] if not (lr_init > lr_end): - raise ValueError(f"lr_end ({lr_end}) must be be smaller than initial lr ({lr_init})") + raise ValueError(f"lr_end ({lr_end}) must be smaller than initial lr ({lr_init})") lr_lambda = partial( _get_polynomial_decay_schedule_with_warmup_lr_lambda,