diff --git a/tests/commands/test_serving.py b/tests/commands/test_serving.py
index 1838e37eddcf..9fbd79464b17 100644
--- a/tests/commands/test_serving.py
+++ b/tests/commands/test_serving.py
@@ -72,7 +72,7 @@ def test_parsed_args(self):
 
     def test_build_chat_completion_chunk(self):
         """
-        Tests that the chunks are correctly built for the Chat Completion API. The `choices` checks implictly
+        Tests that the chunks are correctly built for the Chat Completion API. The `choices` checks implicitly
         confirm that empty fields are not emitted.
         """
         dummy = ServeCommand.__new__(ServeCommand)
diff --git a/tests/generation/test_stopping_criteria.py b/tests/generation/test_stopping_criteria.py
index b258f0e82d27..b9527327b71d 100644
--- a/tests/generation/test_stopping_criteria.py
+++ b/tests/generation/test_stopping_criteria.py
@@ -239,7 +239,7 @@ def test_single_letter_stop_string(self):
         for input_ids in false_input_ids["input_ids"]:
             self.assertFalse(criteria(input_ids.unsqueeze(0), scores))
 
-    def test_criterias_per_row(self):
+    def test_criteria_per_row(self):
         text = "They completed the challenging puzzle, revealing the hidden image at the end"
         stop_strings = ["end"]
 
@@ -261,7 +261,7 @@ def test_criterias_per_row(self):
         # return False when neither is satisfied
         self.assertFalse(criteria(inputs["input_ids"][:, :-1], scores))
 
-    def test_criterias_per_row_batched(self):
+    def test_criteria_per_row_batched(self):
         text = [
             "They completed the challenging puzzle, revealing the hidden image at the end",
             "Today a dragon flew over France",
diff --git a/tests/models/auto/test_image_processing_auto.py b/tests/models/auto/test_image_processing_auto.py
index bb45b5abca38..bd0c9a2c76ef 100644
--- a/tests/models/auto/test_image_processing_auto.py
+++ b/tests/models/auto/test_image_processing_auto.py
@@ -91,7 +91,7 @@ def test_image_processor_from_local_directory_from_config(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             model_config = CLIPConfig()
 
-            # Create a dummy config file with image_proceesor_type
+            # Create a dummy config file with image_processor_type
             processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
             config_tmpfile = Path(tmpdirname) / "config.json"
             json.dump(
diff --git a/tests/models/auto/test_video_processing_auto.py b/tests/models/auto/test_video_processing_auto.py
index 1aa89d92cb2d..06325f148c00 100644
--- a/tests/models/auto/test_video_processing_auto.py
+++ b/tests/models/auto/test_video_processing_auto.py
@@ -83,7 +83,7 @@ def test_video_processor_from_local_directory_from_config(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             model_config = LlavaOnevisionConfig()
 
-            # Create a dummy config file with image_proceesor_type
+            # Create a dummy config file with image_processor_type
             processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json"
             config_tmpfile = Path(tmpdirname) / "config.json"
             json.dump(
diff --git a/tests/models/bart/test_tokenization_bart.py b/tests/models/bart/test_tokenization_bart.py
index 81f142a8a523..9651efbde21d 100644
--- a/tests/models/bart/test_tokenization_bart.py
+++ b/tests/models/bart/test_tokenization_bart.py
@@ -157,7 +157,7 @@ def test_special_tokens(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/bert/test_tokenization_bert.py b/tests/models/bert/test_tokenization_bert.py
index 6da10d0fe3a5..b9d46dea3a55 100644
--- a/tests/models/bert/test_tokenization_bert.py
+++ b/tests/models/bert/test_tokenization_bert.py
@@ -307,8 +307,8 @@ def test_offsets_with_special_characters(self):
                 self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])
 
     def test_change_tokenize_chinese_chars(self):
-        list_of_commun_chinese_char = ["的", "人", "有"]
-        text_with_chinese_char = "".join(list_of_commun_chinese_char)
+        list_of_common_chinese_char = ["的", "人", "有"]
+        text_with_chinese_char = "".join(list_of_common_chinese_char)
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 kwargs["tokenize_chinese_chars"] = True
@@ -322,8 +322,8 @@ def test_change_tokenize_chinese_chars(self):
                 tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
 
                 # it is expected that each Chinese character is not preceded by "##"
-                self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
-                self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)
 
                 kwargs["tokenize_chinese_chars"] = False
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
@@ -337,7 +337,7 @@ def test_change_tokenize_chinese_chars(self):
 
                 # it is expected that only the first Chinese character is not preceded by "##".
                 expected_tokens = [
-                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
+                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
                 ]
                 self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
                 self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
index 2f6df2aab27e..0b3ab74d519c 100644
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -513,7 +513,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
@@ -949,7 +949,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
diff --git a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
index 26bc36b9af1a..930f2504dee8 100644
--- a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
+++ b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
@@ -98,7 +98,9 @@ def test_model_rope_scaling_frequencies(self):
         long_input_length = int(config.max_position_embeddings * 1.5)
 
         # Inputs
-        x = torch.randn(1, dtype=torch.float32, device=torch_device)  # used exlusively to get the dtype and the device
+        x = torch.randn(
+            1, dtype=torch.float32, device=torch_device
+        )  # used exclusively to get the dtype and the device
         position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device)
         position_ids_short = position_ids_short.unsqueeze(0)
         position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device)
@@ -161,7 +163,7 @@ def test_past_key_values_format(self):
         super().test_past_key_values_format(custom_all_cache_shapes=all_cache_shapes)
 
     def _check_past_key_values_for_generate(self, batch_size, decoder_past_key_values, cache_length, config):
-        """Needs to be overriden as deepseek has special MLA cache format (though we don't really use the MLA)"""
+        """Needs to be overridden as deepseek has special MLA cache format (though we don't really use the MLA)"""
         self.assertIsInstance(decoder_past_key_values, Cache)
 
         # (batch, head, seq_length, head_features)
diff --git a/tests/models/dia/test_modeling_dia.py b/tests/models/dia/test_modeling_dia.py
index 900bb0cef73d..989608d686ea 100644
--- a/tests/models/dia/test_modeling_dia.py
+++ b/tests/models/dia/test_modeling_dia.py
@@ -250,7 +250,7 @@ def skip_non_greedy_generate(self):
                 self.skipTest(reason="Dia only supports greedy search / sampling with one sequence.")
 
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
-        """Overriden to account for the 2D flattened structure"""
+        """Overridden to account for the 2D flattened structure"""
         inputs_dict = copy.deepcopy(inputs_dict)
 
         if return_labels:
diff --git a/tests/models/electra/test_tokenization_electra.py b/tests/models/electra/test_tokenization_electra.py
index 4c736e167082..f2ac66e21ae9 100644
--- a/tests/models/electra/test_tokenization_electra.py
+++ b/tests/models/electra/test_tokenization_electra.py
@@ -300,8 +300,8 @@ def test_offsets_with_special_characters(self):
                 self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])
 
     def test_change_tokenize_chinese_chars(self):
-        list_of_commun_chinese_char = ["的", "人", "有"]
-        text_with_chinese_char = "".join(list_of_commun_chinese_char)
+        list_of_common_chinese_char = ["的", "人", "有"]
+        text_with_chinese_char = "".join(list_of_common_chinese_char)
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 kwargs["tokenize_chinese_chars"] = True
@@ -315,8 +315,8 @@ def test_change_tokenize_chinese_chars(self):
                 tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
 
                 # it is expected that each Chinese character is not preceded by "##"
-                self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
-                self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)
 
                 kwargs["tokenize_chinese_chars"] = False
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
@@ -330,7 +330,7 @@ def test_change_tokenize_chinese_chars(self):
 
                 # it is expected that only the first Chinese character is not preceded by "##".
                 expected_tokens = [
-                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
+                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
                 ]
                 self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
                 self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py
index 9717ea942972..28ef2eeb8b57 100644
--- a/tests/models/gemma2/test_modeling_gemma2.py
+++ b/tests/models/gemma2/test_modeling_gemma2.py
@@ -387,7 +387,7 @@ def test_generation_beyond_sliding_window(self, attn_implementation: str):
             self.skipTest("FlashAttention2 is required for this test.")
 
         if torch_device == "xpu" and attn_implementation == "flash_attention_2":
-            self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
+            self.skipTest(reason="Intel XPU doesn't support flash_attention_2 as of now.")
 
         model_id = "google/gemma-2-2b"
         EXPECTED_COMPLETIONS = [
@@ -433,7 +433,7 @@ def test_generation_beyond_sliding_window_dynamic(self, attn_implementation: str
             self.skipTest("FlashAttention2 is required for this test.")
 
         if torch_device == "xpu" and attn_implementation == "flash_attention_2":
-            self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
+            self.skipTest(reason="Intel XPU doesn't support flash_attention_2 as of now.")
 
         model_id = "google/gemma-2-2b"
         EXPECTED_COMPLETIONS = [
diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py
index b68b5810ac17..4802604efb55 100644
--- a/tests/models/grounding_dino/test_modeling_grounding_dino.py
+++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py
@@ -638,7 +638,7 @@ def test_tied_weights_keys(self):
 
             # GroundingDino when sharing weights also uses the shared ones in GroundingDinoDecoder
             # Therefore, differently from DeformableDetr, we expect the group lens to be 2
-            # one for self.bbox_embed in GroundingDinoForObejectDetection and another one
+            # one for self.bbox_embed in GroundingDinoForObjectDetection and another one
             # in the decoder
             tied_params = [group for group in tied_params if len(group) > 2]
             self.assertListEqual(
diff --git a/tests/models/instructblipvideo/test_modeling_instructblipvideo.py b/tests/models/instructblipvideo/test_modeling_instructblipvideo.py
index 270ba8bcc63f..a91d31082da9 100644
--- a/tests/models/instructblipvideo/test_modeling_instructblipvideo.py
+++ b/tests/models/instructblipvideo/test_modeling_instructblipvideo.py
@@ -669,8 +669,8 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature):
     def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
-        This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        This tests only by looking at layer names, as usually SDPA layers call "SDPAAttention".
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py
index 6ce8f8d02f36..ac16e62c55f3 100644
--- a/tests/models/kosmos2/test_modeling_kosmos2.py
+++ b/tests/models/kosmos2/test_modeling_kosmos2.py
@@ -481,7 +481,7 @@ def test_sdpa_padding_matches_padding_free_with_position_ids(self):
 
     @pytest.mark.generate
     def test_left_padding_compatibility(self):
-        # Overwrite because Kosmos-2 need to padd pixel values and pad image-attn-mask
+        # Overwrite because Kosmos-2 need to pad pixel values and pad image-attn-mask
 
         def _prepare_model_kwargs(input_ids, attention_mask, pad_size, signature):
             model_kwargs = {"input_ids": input_ids, "attention_mask": attention_mask}
diff --git a/tests/models/kosmos2_5/test_modeling_kosmos2_5.py b/tests/models/kosmos2_5/test_modeling_kosmos2_5.py
index 5d000f1634d6..c2a18cb5b690 100644
--- a/tests/models/kosmos2_5/test_modeling_kosmos2_5.py
+++ b/tests/models/kosmos2_5/test_modeling_kosmos2_5.py
@@ -570,7 +570,7 @@ def test_generate_from_inputs_embeds(self):
 
     @pytest.mark.generate
     def test_left_padding_compatibility(self):
-        # Overwrite because Kosmos-2.5 need to padd pixel values and pad image-attn-mask
+        # Overwrite because Kosmos-2.5 need to pad pixel values and pad image-attn-mask
 
         def _prepare_model_kwargs(input_ids, attention_mask, pad_size, signature):
             model_kwargs = {"input_ids": input_ids, "attention_mask": attention_mask}
diff --git a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
index ab34b1007d68..c87e345a542d 100644
--- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
@@ -1337,7 +1337,7 @@ def test_tokenization_python_rust_equals(self):
                 ):
                     self.assertSequenceEqual(input_p[key], input_r[key][0])
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
@@ -1733,7 +1733,7 @@ def test_batch_encode_dynamic_overflowing(self):
                         self.assertEqual(tokens[key].shape[-1], 4)
 
     @unittest.skip(reason="TO DO: overwrite this very extensive test.")
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         pass
 
     def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
diff --git a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
index b07768c65ac1..ae484e7459dc 100644
--- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
@@ -1222,7 +1222,7 @@ def test_tokenization_python_rust_equals(self):
                 ):
                     self.assertSequenceEqual(input_p[key], input_r[key][0])
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
@@ -1623,7 +1623,7 @@ def test_batch_encode_dynamic_overflowing(self):
                         self.assertEqual(tokens[key].shape[-1], 4)
 
     @unittest.skip(reason="TO DO: overwrite this very extensive test.")
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         pass
 
     def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
diff --git a/tests/models/layoutxlm/test_tokenization_layoutxlm.py b/tests/models/layoutxlm/test_tokenization_layoutxlm.py
index 506ddd1f2418..185525702c63 100644
--- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py
+++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py
@@ -1266,7 +1266,7 @@ def test_tokenization_python_rust_equals(self):
                 ):
                     self.assertSequenceEqual(input_p[key], input_r[key][0])
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
@@ -1734,7 +1734,7 @@ def test_save_pretrained(self):
                 shutil.rmtree(tmpdirname2)
 
     @unittest.skip(reason="TO DO: overwrite this very extensive test.")
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         pass
 
     @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py
index 60a10eb860b2..a8e47955dfa1 100644
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -515,7 +515,7 @@ def _long_tensor(tok_lst):
 class LEDModelIntegrationTests(unittest.TestCase):
     """All the below results were obtained with the original checkpoints and code
     base from https://github.com/allenai/longformer.
-    IMPORTANT: Note that the original checkpoints include a `postion_embeddings` "hack"
+    IMPORTANT: Note that the original checkpoints include a `position_embeddings` "hack"
     and have to be cut to have the correct shape.
     See: https://github.com/huggingface/transformers/pull/9278#issue-544709661.
     """
diff --git a/tests/models/led/test_tokenization_led.py b/tests/models/led/test_tokenization_led.py
index 5290138d5730..6d647c4785e9 100644
--- a/tests/models/led/test_tokenization_led.py
+++ b/tests/models/led/test_tokenization_led.py
@@ -164,7 +164,7 @@ def test_global_attention_mask(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/longformer/test_tokenization_longformer.py b/tests/models/longformer/test_tokenization_longformer.py
index bad745e982bf..d1b4447930fc 100644
--- a/tests/models/longformer/test_tokenization_longformer.py
+++ b/tests/models/longformer/test_tokenization_longformer.py
@@ -174,7 +174,7 @@ def test_space_encoding(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/luke/test_tokenization_luke.py b/tests/models/luke/test_tokenization_luke.py
index 8db058f882d0..d85075d2ee29 100644
--- a/tests/models/luke/test_tokenization_luke.py
+++ b/tests/models/luke/test_tokenization_luke.py
@@ -134,7 +134,7 @@ def test_space_encoding(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/markuplm/test_tokenization_markuplm.py b/tests/models/markuplm/test_tokenization_markuplm.py
index 3bf526407f93..253f525faa63 100644
--- a/tests/models/markuplm/test_tokenization_markuplm.py
+++ b/tests/models/markuplm/test_tokenization_markuplm.py
@@ -1107,7 +1107,7 @@ def test_tokenization_python_rust_equals(self):
                 ):
                     self.assertSequenceEqual(input_p[key], input_r[key][0])
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
@@ -1508,7 +1508,7 @@ def test_batch_encode_dynamic_overflowing(self):
                         self.assertEqual(tokens[key].shape[-2], 6)
 
     @unittest.skip(reason="TO DO: overwrite this very extensive test.")
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         pass
 
     def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
diff --git a/tests/models/mask2former/test_image_processing_mask2former.py b/tests/models/mask2former/test_image_processing_mask2former.py
index 526f481eb93b..439a111db8f2 100644
--- a/tests/models/mask2former/test_image_processing_mask2former.py
+++ b/tests/models/mask2former/test_image_processing_mask2former.py
@@ -477,16 +477,16 @@ def test_binary_mask_to_rle(self):
 
     def test_post_process_semantic_segmentation(self):
         for image_processing_class in self.image_processor_list:
-            fature_extractor = image_processing_class(num_labels=self.image_processor_tester.num_classes)
+            feature_extractor = image_processing_class(num_labels=self.image_processor_tester.num_classes)
             outputs = self.image_processor_tester.get_fake_mask2former_outputs()
 
-            segmentation = fature_extractor.post_process_semantic_segmentation(outputs)
+            segmentation = feature_extractor.post_process_semantic_segmentation(outputs)
 
             self.assertEqual(len(segmentation), self.image_processor_tester.batch_size)
             self.assertEqual(segmentation[0].shape, (384, 384))
 
             target_sizes = [(1, 4) for i in range(self.image_processor_tester.batch_size)]
-            segmentation = fature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
+            segmentation = feature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
 
             self.assertEqual(segmentation[0].shape, target_sizes[0])
 
diff --git a/tests/models/mluke/test_tokenization_mluke.py b/tests/models/mluke/test_tokenization_mluke.py
index 262188e82204..39b6f94e6aea 100644
--- a/tests/models/mluke/test_tokenization_mluke.py
+++ b/tests/models/mluke/test_tokenization_mluke.py
@@ -97,7 +97,7 @@ def get_clean_sequence(self, tokenizer, max_length=20) -> tuple[str, list]:
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/mm_grounding_dino/test_modeling_mm_grounding_dino.py b/tests/models/mm_grounding_dino/test_modeling_mm_grounding_dino.py
index 1d380bc3e097..a84c4bb17078 100644
--- a/tests/models/mm_grounding_dino/test_modeling_mm_grounding_dino.py
+++ b/tests/models/mm_grounding_dino/test_modeling_mm_grounding_dino.py
@@ -645,7 +645,7 @@ def test_tied_weights_keys(self):
 
             # MMGroundingDino when sharing weights also uses the shared ones in MMGroundingDinoDecoder
             # Therefore, differently from DeformableDetr, we expect the group lens to be 2
-            # one for self.bbox_embed in MMGroundingDinoForObejectDetection and another one
+            # one for self.bbox_embed in MMGroundingDinoForObjectDetection and another one
             # in the decoder
             tied_params = [group for group in tied_params if len(group) > 2]
             self.assertListEqual(
@@ -746,7 +746,7 @@ def test_inference_object_detection_head_equivalence_cpu_gpu(self):
         )
         # HACK: the issue happens during top-k (k=900) after the encoder
         # there are some flips between cpu and gpu query ordering (idxs 195<->196 and 267<->268 on my machine)
-        # which causes different query position embedding assingments
+        # which causes different query position embedding assignments
         # which in turn significantly changes the decoder pass due to self attention
         model.config.num_queries = 100
         model.model.query_position_embeddings.weight.data = model.model.query_position_embeddings.weight.data[:100]
@@ -788,7 +788,7 @@ def test_cross_attention_mask(self):
         ).to(torch_device)
         # HACK: the issue happens during top-k (k=900) after the encoder
         # there are some flips between cpu and gpu query ordering
-        # which causes different query position embedding assingments
+        # which causes different query position embedding assignments
         # which in turn significantly changes the decoder pass due to self attention
         model.config.num_queries = 100
         model.model.query_position_embeddings.weight.data = model.model.query_position_embeddings.weight.data[:100]
diff --git a/tests/models/mobilebert/test_tokenization_mobilebert.py b/tests/models/mobilebert/test_tokenization_mobilebert.py
index 5c344f9d4388..c0a179a8ff1c 100644
--- a/tests/models/mobilebert/test_tokenization_mobilebert.py
+++ b/tests/models/mobilebert/test_tokenization_mobilebert.py
@@ -326,8 +326,8 @@ def test_offsets_with_special_characters(self):
 
     # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_change_tokenize_chinese_chars
     def test_change_tokenize_chinese_chars(self):
-        list_of_commun_chinese_char = ["的", "人", "有"]
-        text_with_chinese_char = "".join(list_of_commun_chinese_char)
+        list_of_common_chinese_char = ["的", "人", "有"]
+        text_with_chinese_char = "".join(list_of_common_chinese_char)
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 kwargs["tokenize_chinese_chars"] = True
@@ -341,8 +341,8 @@ def test_change_tokenize_chinese_chars(self):
                 tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
 
                 # it is expected that each Chinese character is not preceded by "##"
-                self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
-                self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)
 
                 kwargs["tokenize_chinese_chars"] = False
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
@@ -356,7 +356,7 @@ def test_change_tokenize_chinese_chars(self):
 
                 # it is expected that only the first Chinese character is not preceded by "##".
                 expected_tokens = [
-                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
+                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
                 ]
                 self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
                 self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
diff --git a/tests/models/moshi/test_tokenization_moshi.py b/tests/models/moshi/test_tokenization_moshi.py
index d1e5287b233c..2b6030f6d792 100644
--- a/tests/models/moshi/test_tokenization_moshi.py
+++ b/tests/models/moshi/test_tokenization_moshi.py
@@ -312,7 +312,7 @@ def test_training_new_tokenizer_with_special_tokens_change(self):
 
         self.assertEqual(expected_result, decoded_input)
 
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         # TODO: @ArthurZucker - alignment is broken
         pass
 
diff --git a/tests/models/mvp/test_tokenization_mvp.py b/tests/models/mvp/test_tokenization_mvp.py
index 62906138f3c9..4ba264388e2d 100644
--- a/tests/models/mvp/test_tokenization_mvp.py
+++ b/tests/models/mvp/test_tokenization_mvp.py
@@ -156,7 +156,7 @@ def test_special_tokens(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/nougat/test_image_processing_nougat.py b/tests/models/nougat/test_image_processing_nougat.py
index 1a174ebc0f48..2a4f4ad6139b 100644
--- a/tests/models/nougat/test_image_processing_nougat.py
+++ b/tests/models/nougat/test_image_processing_nougat.py
@@ -315,7 +315,7 @@ def test_slow_fast_equivalence(self):
 
         encoding_slow = image_processor_slow(dummy_image, return_tensors="pt")
         encoding_fast = image_processor_fast(dummy_image, return_tensors="pt")
-        # Adding a larget than usual tolerance because the slow processor uses reducing_gap=2.0 during resizing.
+        # Adding a larger than usual tolerance because the slow processor uses reducing_gap=2.0 during resizing.
         torch.testing.assert_close(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=2e-1, rtol=0)
         self.assertLessEqual(
             torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 2e-2
diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py
index d201c704091c..4fe89959bf0b 100644
--- a/tests/models/oneformer/test_image_processing_oneformer.py
+++ b/tests/models/oneformer/test_image_processing_oneformer.py
@@ -273,7 +273,7 @@ def test_binary_mask_to_rle(self):
 
     def test_post_process_semantic_segmentation(self):
         for image_processing_class in self.image_processor_list:
-            fature_extractor = image_processing_class(
+            feature_extractor = image_processing_class(
                 num_labels=self.image_processor_tester.num_classes,
                 max_seq_length=77,
                 task_seq_length=77,
@@ -283,7 +283,7 @@ def test_post_process_semantic_segmentation(self):
             )
             outputs = self.image_processor_tester.get_fake_oneformer_outputs()
 
-            segmentation = fature_extractor.post_process_semantic_segmentation(outputs)
+            segmentation = feature_extractor.post_process_semantic_segmentation(outputs)
 
             self.assertEqual(len(segmentation), self.image_processor_tester.batch_size)
             self.assertEqual(
@@ -295,7 +295,7 @@ def test_post_process_semantic_segmentation(self):
             )
 
             target_sizes = [(1, 4) for i in range(self.image_processor_tester.batch_size)]
-            segmentation = fature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
+            segmentation = feature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
 
             self.assertEqual(segmentation[0].shape, target_sizes[0])
 
diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py
index 9557e936eb9e..6cbdba8e26c0 100644
--- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py
+++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py
@@ -235,7 +235,7 @@ def test_mismatching_num_image_tokens(self):
         for model_class in self.all_model_classes:
             model = model_class(config).to(torch_device)
             curr_input_dict = copy.deepcopy(input_dict)
-            _ = model(**curr_input_dict)  # successfull forward with no modifications
+            _ = model(**curr_input_dict)  # successful forward with no modifications
 
             # remove one image but leave the image token in text
             patch_size = config.vision_config.patch_size
diff --git a/tests/models/qwen2_vl/test_video_processing_qwen2_vl.py b/tests/models/qwen2_vl/test_video_processing_qwen2_vl.py
index 4ffb70fc40df..a9e800734712 100644
--- a/tests/models/qwen2_vl/test_video_processing_qwen2_vl.py
+++ b/tests/models/qwen2_vl/test_video_processing_qwen2_vl.py
@@ -156,7 +156,7 @@ def test_video_processor_properties(self):
         self.assertTrue(hasattr(video_processing, "image_std"))
         self.assertTrue(hasattr(video_processing, "do_convert_rgb"))
 
-    # OVERRIDEN BECAUSE QWEN2_VL HAS SPECIAL OUTPUT SHAPES
+    # OVERRIDDEN BECAUSE QWEN2_VL HAS SPECIAL OUTPUT SHAPES
     def test_video_processor_from_dict_with_kwargs(self):
         for video_processing_class in self.video_processor_list:
             video_processor = video_processing_class(**self.video_processor_dict)
diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py
index 3937edc0e354..8f2b1cdc9957 100644
--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@@ -620,7 +620,7 @@ def test_model_from_pretrained(self):
     def _check_attentions_for_generate(
         self, batch_size, attentions, prompt_length, output_length, config, decoder_past_key_values
     ):
-        # NOTE (joao): this function is substancially different from the original, the attention has different
+        # NOTE (joao): this function is substantially different from the original, the attention has different
         # *number* of shapes in certain conditions
         self.assertIsInstance(attentions, tuple)
         self.assertListEqual(
@@ -663,7 +663,7 @@ def _check_attentions_for_generate(
     def _check_hidden_states_for_generate(
         self, batch_size, hidden_states, prompt_length, output_length, config, use_cache=False
     ):
-        # NOTE (joao): this function is substancially different from the original, the hidden states have different
+        # NOTE (joao): this function is substantially different from the original, the hidden states have different
         # length in certain conditions
         self.assertIsInstance(hidden_states, tuple)
         self.assertListEqual(
@@ -798,7 +798,7 @@ def setUp(self):
     def _check_attentions_for_generate(
         self, batch_size, attentions, prompt_length, output_length, config, decoder_past_key_values
     ):
-        # NOTE (joao): this function is substancially different from the original, the attention has different
+        # NOTE (joao): this function is substantially different from the original, the attention has different
         # *number* of shapes in certain conditions
         self.assertIsInstance(attentions, tuple)
         self.assertListEqual(
@@ -841,7 +841,7 @@ def _check_attentions_for_generate(
     def _check_hidden_states_for_generate(
         self, batch_size, hidden_states, prompt_length, output_length, config, use_cache=False
     ):
-        # NOTE (joao): this function is substancially different from the original, the hidden states have different
+        # NOTE (joao): this function is substantially different from the original, the hidden states have different
         # length in certain conditions
         self.assertIsInstance(hidden_states, tuple)
         self.assertListEqual(
diff --git a/tests/models/roberta/test_tokenization_roberta.py b/tests/models/roberta/test_tokenization_roberta.py
index 0990dfd9c796..bf55614d4c87 100644
--- a/tests/models/roberta/test_tokenization_roberta.py
+++ b/tests/models/roberta/test_tokenization_roberta.py
@@ -172,7 +172,7 @@ def test_space_encoding(self):
     def test_pretokenized_inputs(self):
         pass
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
diff --git a/tests/models/roc_bert/test_tokenization_roc_bert.py b/tests/models/roc_bert/test_tokenization_roc_bert.py
index 11083e614a92..3b7c4d0eb228 100644
--- a/tests/models/roc_bert/test_tokenization_roc_bert.py
+++ b/tests/models/roc_bert/test_tokenization_roc_bert.py
@@ -255,8 +255,8 @@ def test_offsets_with_special_characters(self):
 
     # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_change_tokenize_chinese_chars
     def test_change_tokenize_chinese_chars(self):
-        list_of_commun_chinese_char = ["的", "人", "有"]
-        text_with_chinese_char = "".join(list_of_commun_chinese_char)
+        list_of_common_chinese_char = ["的", "人", "有"]
+        text_with_chinese_char = "".join(list_of_common_chinese_char)
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 kwargs["tokenize_chinese_chars"] = True
@@ -270,8 +270,8 @@ def test_change_tokenize_chinese_chars(self):
                 tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
 
                 # it is expected that each Chinese character is not preceded by "##"
-                self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
-                self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
+                self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)
 
                 kwargs["tokenize_chinese_chars"] = False
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
@@ -285,7 +285,7 @@ def test_change_tokenize_chinese_chars(self):
 
                 # it is expected that only the first Chinese character is not preceded by "##".
                 expected_tokens = [
-                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
+                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
                 ]
                 self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
                 self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py
index 4ee29697dba4..5923ce5bc8a5 100644
--- a/tests/models/sam/test_modeling_sam.py
+++ b/tests/models/sam/test_modeling_sam.py
@@ -666,7 +666,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
diff --git a/tests/models/sam2/test_modeling_sam2.py b/tests/models/sam2/test_modeling_sam2.py
index eac076d7d424..b1456a3eb273 100644
--- a/tests/models/sam2/test_modeling_sam2.py
+++ b/tests/models/sam2/test_modeling_sam2.py
@@ -563,7 +563,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
@@ -696,7 +696,7 @@ def flash_attn_inference_equivalence(self, attn_implementation: str, padding_sid
                 else:
                     assert torch.allclose(logits_fa[:-1], logits[:-1], atol=4e-2, rtol=4e-2)
 
-    # Override as diffence slightly higher than the threshold
+    # Override as difference slightly higher than the threshold
     def test_batching_equivalence(self, atol=5e-4, rtol=5e-4):
         super().test_batching_equivalence(atol=atol, rtol=rtol)
 
diff --git a/tests/models/sam_hq/test_modeling_sam_hq.py b/tests/models/sam_hq/test_modeling_sam_hq.py
index c2587ac67523..d008b788f6ad 100644
--- a/tests/models/sam_hq/test_modeling_sam_hq.py
+++ b/tests/models/sam_hq/test_modeling_sam_hq.py
@@ -714,7 +714,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
diff --git a/tests/models/udop/test_tokenization_udop.py b/tests/models/udop/test_tokenization_udop.py
index 7badff1b4b27..5ca837bd9f8e 100644
--- a/tests/models/udop/test_tokenization_udop.py
+++ b/tests/models/udop/test_tokenization_udop.py
@@ -1210,7 +1210,7 @@ def test_tokenization_python_rust_equals(self):
                 ):
                     self.assertSequenceEqual(input_p[key], input_r[key][0])
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
@@ -1606,7 +1606,7 @@ def test_batch_encode_dynamic_overflowing(self):
                         self.assertEqual(tokens[key].shape[-1], 4)
 
     @unittest.skip(reason="TO DO: overwrite this very extensive test.")
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         pass
 
     @unittest.skip(reason="UDOP tokenizer requires boxes besides sequences.")
diff --git a/tests/models/umt5/test_modeling_umt5.py b/tests/models/umt5/test_modeling_umt5.py
index 6887f4c7e5fa..426dfe96021e 100644
--- a/tests/models/umt5/test_modeling_umt5.py
+++ b/tests/models/umt5/test_modeling_umt5.py
@@ -144,7 +144,7 @@ def prepare_config_and_inputs(self):
         # all pad tokens have pos id = 2 and rest are between 2..seq_length
         # and the seq_length here is seq_length - num_pad_tokens
         # but when using past, there is no way of knowing if the past input ids had
-        # pad tokens in them, which results in incorrect seq_lenth and which in turn results in
+        # pad tokens in them, which results in incorrect seq_length and which in turn results in
         # position_ids being off by num_pad_tokens in past input
         input_ids = input_ids.clamp(self.pad_token_id + 2)
         input_ids[:, -1] = self.eos_token_id  # Eos Token
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index 16e4c177ddaa..4debe1da5991 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -1404,8 +1404,8 @@ def test_small_en_logits_librispeech(self):
 
         input_speech = self._load_datasamples(1)
 
-        feaure_extractor = WhisperFeatureExtractor()
-        input_features = feaure_extractor(input_speech, return_tensors="pt").input_features.to(torch_device)
+        feature_extractor = WhisperFeatureExtractor()
+        input_features = feature_extractor(input_speech, return_tensors="pt").input_features.to(torch_device)
 
         logits = model(
             input_features,
diff --git a/tests/pipelines/test_pipelines_mask_generation.py b/tests/pipelines/test_pipelines_mask_generation.py
index 011ef7b8b1cc..3fd8f7b5c44a 100644
--- a/tests/pipelines/test_pipelines_mask_generation.py
+++ b/tests/pipelines/test_pipelines_mask_generation.py
@@ -110,9 +110,9 @@ def test_small_model_pt(self):
         outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", points_per_batch=256)
 
         # Shortening by hashing
-        new_outupt = []
+        new_output = []
         for i, o in enumerate(outputs["masks"]):
-            new_outupt += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
+            new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
 
         # fmt: off
         last_output = Expectations({
@@ -121,7 +121,7 @@ def test_small_model_pt(self):
         }).get_expectation()
 
         self.assertEqual(
-            nested_simplify(new_outupt, decimals=4),
+            nested_simplify(new_output, decimals=4),
             [
                 {'mask': {'hash': '115ad19f5f', 'shape': (480, 640)}, 'scores': 1.0444},
                 {'mask': {'hash': '6affa964c6', 'shape': (480, 640)}, 'scores': 1.021},
@@ -168,12 +168,12 @@ def test_threshold(self):
         )
 
         # Shortening by hashing
-        new_outupt = []
+        new_output = []
         for i, o in enumerate(outputs["masks"]):
-            new_outupt += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
+            new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
 
         self.assertEqual(
-            nested_simplify(new_outupt, decimals=4),
+            nested_simplify(new_output, decimals=4),
             [
                 {"mask": {"hash": "115ad19f5f", "shape": (480, 640)}, "scores": 1.0444},
                 {"mask": {"hash": "6affa964c6", "shape": (480, 640)}, "scores": 1.0210},
diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py
index 8789427e0377..cca65df73064 100644
--- a/tests/quantization/autoawq/test_awq.py
+++ b/tests/quantization/autoawq/test_awq.py
@@ -336,7 +336,7 @@ def test_raise_save_pretrained(self):
 
     def test_fused_modules_to_not_convert(self):
         """
-        Test if fused + modules to_not_covnert work as expected
+        Test if fused + modules to_not_convert work as expected
         """
         model_id = "hf-internal-testing/Mixtral-tiny-AWQ"
 
diff --git a/tests/quantization/quark_integration/test_quark.py b/tests/quantization/quark_integration/test_quark.py
index 206b89b24d63..45de422adc43 100644
--- a/tests/quantization/quark_integration/test_quark.py
+++ b/tests/quantization/quark_integration/test_quark.py
@@ -36,7 +36,7 @@
 
 @require_quark
 class QuarkConfigTest(unittest.TestCase):
-    def test_commmon_args(self):
+    def test_common_args(self):
         config = AutoConfig.from_pretrained("amd/Llama-3.1-8B-Instruct-w-int8-a-int8-sym-test")
         QuarkConfig(**config.quantization_config)
 
diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py
index 8f3f64c1a25c..727cb2affa08 100644
--- a/tests/repo_utils/test_tests_fetcher.py
+++ b/tests/repo_utils/test_tests_fetcher.py
@@ -177,14 +177,14 @@ def patch_transformer_repo_path(new_folder):
     old_repo_path = tests_fetcher.PATH_TO_REPO
     tests_fetcher.PATH_TO_REPO = Path(new_folder).resolve()
     tests_fetcher.PATH_TO_EXAMPLES = tests_fetcher.PATH_TO_REPO / "examples"
-    tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
+    tests_fetcher.PATH_TO_TRANSFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
     tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests"
     try:
         yield
     finally:
         tests_fetcher.PATH_TO_REPO = old_repo_path
         tests_fetcher.PATH_TO_EXAMPLES = tests_fetcher.PATH_TO_REPO / "examples"
-        tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
+        tests_fetcher.PATH_TO_TRANSFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
         tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests"
 
 
diff --git a/tests/sagemaker/README.md b/tests/sagemaker/README.md
index 70dc301f9c33..e25873e54aea 100644
--- a/tests/sagemaker/README.md
+++ b/tests/sagemaker/README.md
@@ -12,7 +12,7 @@ This document explains the testing strategy for releasing the new Hugging Face D
 Before we can run the tests we need to adjust the `requirements.txt` for PyTorch under `/tests/sagemaker/scripts/pytorch` and for TensorFlow under `/tests/sagemaker/scripts/pytorch`. We adjust the branch to the new RC-tag.
 
 ```
-git+https://github.com/huggingface/transformers.git@v4.5.0.rc0 # install main or adjust ist with vX.X.X for installing version specific-transforms
+git+https://github.com/huggingface/transformers.git@v4.5.0.rc0 # install main or adjust it with vX.X.X for installing version specific-transforms
 ```
 
 After we adjusted the `requirements.txt` we can run Amazon SageMaker tests with:  
@@ -140,9 +140,9 @@ images:
 
 | ID                                  | Description                                                       | Platform                   | #GPUs | Collected & evaluated metrics            |
 |-------------------------------------|-------------------------------------------------------------------|-----------------------------|-------|------------------------------------------|
-| pytorch-transfromers-test-single    | test bert finetuning using BERT fromtransformerlib+PT             | SageMaker createTrainingJob | 1     | train_runtime, eval_accuracy & eval_loss |
-| pytorch-transfromers-test-2-ddp     | test bert finetuning using BERT from transformer lib+ PT DPP      | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
-| pytorch-transfromers-test-2-smd     | test bert finetuning using BERT from transformer lib+ PT SM DDP   | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
-| pytorch-transfromers-test-1-smp     | test roberta finetuning using BERT from transformer lib+ PT SM MP | SageMaker createTrainingJob | 8     | train_runtime, eval_accuracy & eval_loss |
-| tensorflow-transfromers-test-single | Test bert finetuning using BERT from transformer lib+TF           | SageMaker createTrainingJob | 1     | train_runtime, eval_accuracy & eval_loss |
-| tensorflow-transfromers-test-2-smd  | test bert finetuning using BERT from transformer lib+ TF SM DDP   | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
+| pytorch-transformers-test-single    | test bert finetuning using BERT fromtransformerlib+PT             | SageMaker createTrainingJob | 1     | train_runtime, eval_accuracy & eval_loss |
+| pytorch-transformers-test-2-ddp     | test bert finetuning using BERT from transformer lib+ PT DPP      | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
+| pytorch-transformers-test-2-smd     | test bert finetuning using BERT from transformer lib+ PT SM DDP   | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
+| pytorch-transformers-test-1-smp     | test roberta finetuning using BERT from transformer lib+ PT SM MP | SageMaker createTrainingJob | 8     | train_runtime, eval_accuracy & eval_loss |
+| tensorflow-transformers-test-single | Test bert finetuning using BERT from transformer lib+TF           | SageMaker createTrainingJob | 1     | train_runtime, eval_accuracy & eval_loss |
+| tensorflow-transformers-test-2-smd  | test bert finetuning using BERT from transformer lib+ TF SM DDP   | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |
diff --git a/tests/sagemaker/conftest.py b/tests/sagemaker/conftest.py
index 9d987bc8d006..89b89966d542 100644
--- a/tests/sagemaker/conftest.py
+++ b/tests/sagemaker/conftest.py
@@ -45,7 +45,7 @@ def metric_definitions(self) -> str:
 
     @property
     def base_job_name(self) -> str:
-        return f"{self.framework}-transfromers-test"
+        return f"{self.framework}-transformers-test"
 
     @property
     def test_path(self) -> str:
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 98c095f96804..8330c19a5e6b 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -3238,7 +3238,7 @@ def test_problem_types(self):
 
     def test_load_with_mismatched_shapes(self):
         if not self.test_mismatched_shapes:
-            self.skipTest(reason="test_missmatched_shapes is set to False")
+            self.skipTest(reason="test_mismatched_shapes is set to False")
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
 
         for model_class in self.all_model_classes:
@@ -3282,7 +3282,7 @@ def test_load_with_mismatched_shapes(self):
 
     def test_mismatched_shapes_have_properly_initialized_weights(self):
         if not self.test_mismatched_shapes:
-            self.skipTest(reason="test_missmatched_shapes is set to False")
+            self.skipTest(reason="test_mismatched_shapes is set to False")
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
 
         configs_no_init = _config_zero_init(config)
@@ -3715,7 +3715,7 @@ def test_attn_implementation_composite_models(self):
             model = model_class(config)
             self.assertTrue(model.config.get_text_config(decoder=True)._attn_implementation == "eager")
 
-            # Test that using `dict` atttention implementation works with `from_pretrained`
+            # Test that using `dict` attention implementation works with `from_pretrained`
             #  Set all backbones to "eager" because "eager" attention is always available
             with tempfile.TemporaryDirectory() as tmpdirname:
                 model.save_pretrained(tmpdirname)
@@ -3770,7 +3770,7 @@ def test_sdpa_can_dispatch_composite_models(self):
         """
         Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
         This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
-        In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
+        In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
         is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
         See https://github.com/huggingface/transformers/pull/32238 for more info
 
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index eef97c5b06c7..b5c74c8f25d0 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -289,7 +289,7 @@ def test_processor_text_has_no_visual(self):
         if "videos" in inputs_dict:
             processing_kwargs["do_sample_frames"] = False
 
-        # Firts call processor with all inputs and use nested input type, which is the format supported by all multimodal processors
+        # First call processor with all inputs and use nested input type, which is the format supported by all multimodal processors
         image_inputs_nested = [[image] if not isinstance(image, list) else image for image in image_inputs]
         video_inputs_nested = [[video] for video in video_inputs]
         inputs_dict_nested = {"text": text, "images": image_inputs_nested, "videos": video_inputs_nested}
@@ -1172,7 +1172,7 @@ def test_apply_chat_template_video_frame_sampling(self):
         # 3 frames are inferred from input video's length and FPS, so can be hardcoded
         self.assertEqual(len(out_dict_with_video[self.videos_input_name][0]), 3)
 
-        # Whan `do_sample_frames=False` no sampling is done and whole video is loaded, even if number of frames is passed
+        # When `do_sample_frames=False` no sampling is done and whole video is loaded, even if number of frames is passed
         fps = 10
         out_dict_with_video = processor.apply_chat_template(
             messages,
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 251dab178af5..c1b3bd796b40 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -3212,7 +3212,7 @@ def test_fast_only_inputs(self):
                 self.assertRaises(TypeError, tokenizer_r.encode_plus, None)
                 self.assertRaises(TypeError, tokenizer_r.batch_encode_plus, None)
 
-    def test_alignement_methods(self):
+    def test_alignment_methods(self):
         for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
             with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
                 tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
@@ -4101,7 +4101,7 @@ def test_save_pretrained(self):
 
                 shutil.rmtree(tmpdirname2)
 
-    def test_embeded_special_tokens(self):
+    def test_embedded_special_tokens(self):
         if not self.test_slow_tokenizer:
             # as we don't have a slow version, we can't compare the outputs between slow and fast versions
             self.skipTest(reason="test_slow_tokenizer is set to False")
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 683c76032dd0..7c8f633d54b8 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1652,7 +1652,7 @@ def test_train_and_eval_dataloaders(self):
         if torch_device in ["cuda"]:
             n_gpu = max(1, backend_device_count(torch_device))
         else:
-            # DP is decprecated by PyTorch, accelerators like XPU doesn't support DP
+            # DP is deprecated by PyTorch, accelerators like XPU doesn't support DP
             n_gpu = 1
 
         tmp_dir = self.get_auto_remove_tmp_dir()
@@ -4980,7 +4980,7 @@ def test_best_model_checkpoint_behavior(self):
 
             assert len(os.listdir(tmpdir)) == trainer.state.global_step // 2
 
-    def test_special_token_aligment(self):
+    def test_special_token_alignment(self):
         """
         Tests that special token changes in the tokenizer result in model configs updates when using the trainer, to
         ensure special tokens are aligned across configs
diff --git a/tests/utils/test_masking_utils.py b/tests/utils/test_masking_utils.py
index 98ce87189fab..d7c2734f34fd 100644
--- a/tests/utils/test_masking_utils.py
+++ b/tests/utils/test_masking_utils.py
@@ -138,7 +138,7 @@ def test_find_packed_sequence_indices(self):
         self.assertTrue((find_packed_sequence_indices(position_ids) == EXPECTED_SEQUENCE_INDICES).all())
 
     def test_chunked_mask_with_left_padding_and_large_prefill(self):
-        # Make sur we have an attention_chunk_size in the config
+        # Make sure we have an attention_chunk_size in the config
         config = LlamaConfig(attention_chunk_size=3, attn_implementation="sdpa")
 
         batch_size = 2
@@ -193,7 +193,7 @@ def test_chunked_mask_with_left_padding_and_large_prefill(self):
         self.assertTrue((chunked_attention_mask == EXPECTED_CHUNKED_MASK).all())
 
     def test_chunked_mask_with_left_padding_decoding(self):
-        # Make sur we have an attention_chunk_size in the config
+        # Make sure we have an attention_chunk_size in the config
         config = LlamaConfig(attention_chunk_size=4, attn_implementation="sdpa", num_hidden_layers=1)
 
         cache = DynamicCache(config=config)
diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py
index f15b95ed70cb..7f24c9882540 100644
--- a/tests/utils/test_modeling_utils.py
+++ b/tests/utils/test_modeling_utils.py
@@ -714,9 +714,9 @@ def test_model_from_pretrained_attn_implementation(self):
 
     def test_model_from_config_attn_implementation(self):
         # test that the model can be instantiated with attn_implementation of either
-        # 1. config created with explicit attn_implementatation and from_config
+        # 1. config created with explicit attn_implementation and from_config
         # 2. explicit from_config's attn_implementation argument with a config argument
-        # 3. config created with explicit attn_implementatation and from_config overriding with explicit attn_implementation argument
+        # 3. config created with explicit attn_implementation and from_config overriding with explicit attn_implementation argument
         attn_implementation_available = ["eager", "sdpa"]
 
         if is_flash_attn_available():
@@ -1588,7 +1588,7 @@ def test_modifying_model_config_gets_moved_to_generation_config(self):
             with tempfile.TemporaryDirectory() as tmp_dir:
                 model.save_pretrained(tmp_dir)
                 # 1 - That parameter will be removed from `model.config`. We don't want to use `model.config` to store
-                # generative parameters, and the old default (1.0) would no longer relect the user's wishes.
+                # generative parameters, and the old default (1.0) would no longer reflect the user's wishes.
                 self.assertTrue(model.config.repetition_penalty is None)
                 # 2 - That parameter will be set in `model.generation_config` instead.
                 self.assertTrue(model.generation_config.repetition_penalty == 3.0)
diff --git a/utils/check_repo.py b/utils/check_repo.py
index eeec1aec1bc6..8a73468a1e49 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -151,13 +151,13 @@
         "ChameleonVQVAE",  # VQVAE here is used only for encoding (discretizing) and is tested as part of bigger model
         "Qwen2VLModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2VLForConditionalGeneration.
         "Qwen2_5_VLModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5_VLForConditionalGeneration.
-        "Qwen2_5OmniForConditionalGeneration",  # Not a regular model. Testted in Qwen2_5OmniModelIntergrationTest
-        "Qwen2_5OmniTalkerForConditionalGeneration",  #  Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
-        "Qwen2_5OmniTalkerModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
-        "Qwen2_5OmniThinkerTextModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
-        "Qwen2_5OmniToken2WavModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
-        "Qwen2_5OmniToken2WavDiTModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
-        "Qwen2_5OmniToken2WavBigVGANModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntergrationTest.
+        "Qwen2_5OmniForConditionalGeneration",  # Not a regular model. Testted in Qwen2_5OmniModelIntegrationTest
+        "Qwen2_5OmniTalkerForConditionalGeneration",  #  Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
+        "Qwen2_5OmniTalkerModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
+        "Qwen2_5OmniThinkerTextModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
+        "Qwen2_5OmniToken2WavModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
+        "Qwen2_5OmniToken2WavDiTModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
+        "Qwen2_5OmniToken2WavBigVGANModel",  # Building part of bigger (tested) model. Tested implicitly through Qwen2_5OmniModelIntegrationTest.
         "MllamaTextModel",  # Building part of bigger (tested) model. # TODO: add tests
         "MllamaVisionModel",  # Building part of bigger (tested) model. # TODO: add tests
         "Llama4TextModel",  # Building part of bigger (tested) model. # TODO: add tests
@@ -891,7 +891,7 @@ def check_all_auto_mappings_importable():
 
 def check_objects_being_equally_in_main_init():
     """
-    Check if a (TensorFlow or Flax) object is in the main __init__ iif its counterpart in PyTorch is.
+    Check if a (TensorFlow or Flax) object is in the main __init__ if its counterpart in PyTorch is.
     """
     attrs = dir(transformers)
 
diff --git a/utils/create_dependency_mapping.py b/utils/create_dependency_mapping.py
index debcd0ae7d9d..0e0df9b66ef3 100644
--- a/utils/create_dependency_mapping.py
+++ b/utils/create_dependency_mapping.py
@@ -37,7 +37,7 @@ def topological_sort(dependencies: dict) -> list[list[str]]:
         leaf_nodes = {node for node in graph if len(graph[node]) == 0}
         # Add them to the list as next level
         sorting_list.append([name_mapping[node] for node in leaf_nodes])
-        # Remove the leafs from the graph (and from the deps of other nodes)
+        # Remove the leaves from the graph (and from the deps of other nodes)
         graph = {node: deps - leaf_nodes for node, deps in graph.items() if node not in leaf_nodes}
 
     return sorting_list
diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py
index c8e2bd4557ab..53ee7597d89c 100644
--- a/utils/create_dummy_models.py
+++ b/utils/create_dummy_models.py
@@ -1458,7 +1458,7 @@ def create_tiny_models(
             all_build_args.append((c, models_to_create, os.path.join(output_path, c.model_type)))
         with multiprocessing.Pool() as pool:
             results = pool.starmap(build, all_build_args)
-            results = {buid_args[0].__name__: result for buid_args, result in zip(all_build_args, results)}
+            results = {build_args[0].__name__: result for build_args, result in zip(all_build_args, results)}
 
     if upload:
         if organization is None:
diff --git a/utils/custom_init_isort.py b/utils/custom_init_isort.py
index 3d476c809e86..7bea9310f31b 100644
--- a/utils/custom_init_isort.py
+++ b/utils/custom_init_isort.py
@@ -252,7 +252,7 @@ def sort_imports(file: str, check_only: bool = True):
         code, start_prompt="_import_structure = {", end_prompt="if TYPE_CHECKING:"
     )
 
-    # We ignore block 0 (everything untils start_prompt) and the last block (everything after end_prompt).
+    # We ignore block 0 (everything until start_prompt) and the last block (everything after end_prompt).
     for block_idx in range(1, len(main_blocks) - 1):
         # Check if the block contains some `_import_structure`s thingy to sort.
         block = main_blocks[block_idx]
diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py
index eb8ffa37b803..ca907aa6a2e6 100644
--- a/utils/get_ci_error_statistics.py
+++ b/utils/get_ci_error_statistics.py
@@ -62,14 +62,16 @@ def get_job_links(workflow_run_id, token=None):
     return {}
 
 
-def get_artifacts_links(worflow_run_id, token=None):
+def get_artifacts_links(workflow_run_id, token=None):
     """Get all artifact links from a workflow run"""
 
     headers = None
     if token is not None:
         headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
 
-    url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{worflow_run_id}/artifacts?per_page=100"
+    url = (
+        f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/artifacts?per_page=100"
+    )
     result = requests.get(url, headers=headers).json()
     artifacts = {}
 
diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py
index 97d3696465cf..23c2359491f2 100644
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@@ -103,7 +103,7 @@ def get_last_daily_ci_artifacts(
         token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha
     )
     if workflow_run_id is not None:
-        artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
+        artifacts_links = get_artifacts_links(workflow_run_id=workflow_run_id, token=token)
 
         if artifact_names is None:
             artifact_names = artifacts_links.keys()
diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py
index 54520b6aed0d..b039a86870f3 100644
--- a/utils/modular_model_converter.py
+++ b/utils/modular_model_converter.py
@@ -105,7 +105,7 @@ class ReplaceNameTransformer(m.MatcherDecoratableTransformer):
         - llama -> my_new_model     and     my_new_model    -> llama
         - Llama -> MyNewModel       and     MyNewModel      -> Llama
         - LLAMA -> MY_NEW_MODEL     and     MY_NEW_MODEL    -> LLAMA
-        - LLaMa -> MyNewModel       abd     MyNewModel      -> Llama
+        - LLaMa -> MyNewModel       and     MyNewModel      -> Llama
     """
 
     def __init__(self, old_name: str, new_name: str, original_new_model_name: str = "", only_doc: bool = False):
@@ -378,7 +378,7 @@ def find_all_dependencies(
             If provided, entities already present in `initial_checked_dependencies` will not be part of the returned dependencies.
         return_parent (bool, *optional*):
             If `True`, will return a list consisting of tuples (dependency, parent) instead of a simple set of dependencies. Note
-            that the order of the items in the list reflects the traversal order. Thus, no parent can ever appear before childs.
+            that the order of the items in the list reflects the traversal order. Thus, no parent can ever appear before children.
     Returns:
         A set of all the dependencies, or a list of tuples `(dependency, parent)` if `return_parent=True`.
 
@@ -864,7 +864,7 @@ def replace_class_node(
     """
     Replace a class node which inherits from another modeling class. This function works in the following way:
     - start from the methods and class attributes of the original modeling code node, and replace their definition
-    if overriden in the modular
+    if overridden in the modular
     - append all new methods and class attributes defined in the child class
     - all potential method/class docstrings and decorators use the ones found in modular if any, else in original modeling
     - replace all calls to super() with the unravelled code
diff --git a/utils/release.py b/utils/release.py
index d8fee77733e7..0b62a6060274 100644
--- a/utils/release.py
+++ b/utils/release.py
@@ -110,7 +110,7 @@ def update_version_in_examples(version: str, patch: bool = False):
         for fname in fnames:
             if fname.endswith(".py"):
                 if UV_SCRIPT_MARKER in Path(folder, fname).read_text():
-                    # Update the depdendencies in UV scripts
+                    # Update the dependencies in UV scripts
                     uv_script_file_type = "uv_script_dev" if ".dev" in version else "uv_script_release"
                     update_version_in_file(os.path.join(folder, fname), version, file_type=uv_script_file_type)
                 if not patch:
diff --git a/utils/scan_skipped_tests.py b/utils/scan_skipped_tests.py
index 20d794f733dc..c796be0092cc 100644
--- a/utils/scan_skipped_tests.py
+++ b/utils/scan_skipped_tests.py
@@ -161,7 +161,7 @@ def summarize_all_tests(
 
 def main() -> None:
     parser = argparse.ArgumentParser(
-        description="Scan model tests for overridden or skipped common or generat tests.",
+        description="Scan model tests for overridden or skipped common or generate tests.",
     )
     parser.add_argument(
         "--output_dir",
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 6dc7618eb31f..d200fc83b742 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -67,7 +67,7 @@
 
 PATH_TO_REPO = Path(__file__).parent.parent.resolve()
 PATH_TO_EXAMPLES = PATH_TO_REPO / "examples"
-PATH_TO_TRANFORMERS = PATH_TO_REPO / "src/transformers"
+PATH_TO_TRANSFORMERS = PATH_TO_REPO / "src/transformers"
 PATH_TO_TESTS = PATH_TO_REPO / "tests"
 
 # The value is just a heuristic to determine if we `guess` all models are impacted.
@@ -734,7 +734,7 @@ def create_reverse_dependency_tree() -> list[tuple[str, str]]:
     Create a list of all edges (a, b) which mean that modifying a impacts b with a going over all module and test files.
     """
     cache = {}
-    all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py"))
+    all_modules = list(PATH_TO_TRANSFORMERS.glob("**/*.py"))
     all_modules = [x for x in all_modules if not ("models" in x.parts and x.parts[-1].startswith("convert_"))]
     all_modules += list(PATH_TO_TESTS.glob("**/*.py"))
     all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules]
@@ -820,7 +820,7 @@ def init_test_examples_dependencies() -> tuple[dict[str, list[str]], list[str]]:
     for framework in ["flax", "pytorch", "tensorflow"]:
         test_files = list((PATH_TO_EXAMPLES / framework).glob("test_*.py"))
         all_examples.extend(test_files)
-        # Remove the files at the root of examples/framework since they are not proper examples (they are eith utils
+        # Remove the files at the root of examples/framework since they are not proper examples (they are either utils
         # or example test files).
         examples = [
             f for f in (PATH_TO_EXAMPLES / framework).glob("**/*.py") if f.parent != PATH_TO_EXAMPLES / framework
@@ -854,7 +854,7 @@ def create_reverse_dependency_map() -> dict[str, list[str]]:
     # Start from the example deps init.
     example_deps, examples = init_test_examples_dependencies()
     # Add all modules and all tests to all examples
-    all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py"))
+    all_modules = list(PATH_TO_TRANSFORMERS.glob("**/*.py"))
     all_modules = [x for x in all_modules if not ("models" in x.parts and x.parts[-1].startswith("convert_"))]
     all_modules += list(PATH_TO_TESTS.glob("**/*.py")) + examples
     all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules]
diff --git a/utils/update_metadata.py b/utils/update_metadata.py
index e188bac152f3..9f04300382e4 100755
--- a/utils/update_metadata.py
+++ b/utils/update_metadata.py
@@ -152,9 +152,9 @@ def get_frameworks_table() -> pd.DataFrame:
     modules.
     """
     # Dictionary model names to config.
-    config_maping_names = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING_NAMES
+    config_mapping_names = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING_NAMES
     model_prefix_to_model_type = {
-        config.replace("Config", ""): model_type for model_type, config in config_maping_names.items()
+        config.replace("Config", ""): model_type for model_type, config in config_mapping_names.items()
     }
 
     # Dictionaries flagging if each model prefix has a backend in PT/TF/Flax.