Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/commands/test_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_parsed_args(self):

def test_build_chat_completion_chunk(self):
"""
Tests that the chunks are correctly built for the Chat Completion API. The `choices` checks implictly
Tests that the chunks are correctly built for the Chat Completion API. The `choices` checks implicitly
confirm that empty fields are not emitted.
"""
dummy = ServeCommand.__new__(ServeCommand)
Expand Down
4 changes: 2 additions & 2 deletions tests/generation/test_stopping_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def test_single_letter_stop_string(self):
for input_ids in false_input_ids["input_ids"]:
self.assertFalse(criteria(input_ids.unsqueeze(0), scores))

def test_criterias_per_row(self):
def test_criteria_per_row(self):
text = "They completed the challenging puzzle, revealing the hidden image at the end"
stop_strings = ["end"]

Expand All @@ -261,7 +261,7 @@ def test_criterias_per_row(self):
# return False when neither is satisfied
self.assertFalse(criteria(inputs["input_ids"][:, :-1], scores))

def test_criterias_per_row_batched(self):
def test_criteria_per_row_batched(self):
text = [
"They completed the challenging puzzle, revealing the hidden image at the end",
"Today a dragon flew over France",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/auto/test_image_processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_image_processor_from_local_directory_from_config(self):
with tempfile.TemporaryDirectory() as tmpdirname:
model_config = CLIPConfig()

# Create a dummy config file with image_proceesor_type
# Create a dummy config file with image_processor_type
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
Expand Down
2 changes: 1 addition & 1 deletion tests/models/auto/test_video_processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_video_processor_from_local_directory_from_config(self):
with tempfile.TemporaryDirectory() as tmpdirname:
model_config = LlavaOnevisionConfig()

# Create a dummy config file with image_proceesor_type
# Create a dummy config file with image_processor_type
processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
Expand Down
2 changes: 1 addition & 1 deletion tests/models/bart/test_tokenization_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_special_tokens(self):
def test_pretokenized_inputs(self):
pass

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand Down
10 changes: 5 additions & 5 deletions tests/models/bert/test_tokenization_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ def test_offsets_with_special_characters(self):
self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])

def test_change_tokenize_chinese_chars(self):
list_of_commun_chinese_char = ["的", "人", "有"]
text_with_chinese_char = "".join(list_of_commun_chinese_char)
list_of_common_chinese_char = ["的", "人", "有"]
text_with_chinese_char = "".join(list_of_common_chinese_char)
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
kwargs["tokenize_chinese_chars"] = True
Expand All @@ -322,8 +322,8 @@ def test_change_tokenize_chinese_chars(self):
tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)

# it is expected that each Chinese character is not preceded by "##"
self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)

kwargs["tokenize_chinese_chars"] = False
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand All @@ -337,7 +337,7 @@ def test_change_tokenize_chinese_chars(self):

# it is expected that only the first Chinese character is not preceded by "##".
expected_tokens = [
f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
]
self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
4 changes: 2 additions & 2 deletions tests/models/blip_2/test_modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def test_sdpa_can_dispatch_composite_models(self):
"""
Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
See https://github.com/huggingface/transformers/pull/32238 for more info

Expand Down Expand Up @@ -949,7 +949,7 @@ def test_sdpa_can_dispatch_composite_models(self):
"""
Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
This tests only by looking at layer names, as usually SDPA layers are called "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
See https://github.com/huggingface/transformers/pull/32238 for more info

Expand Down
6 changes: 4 additions & 2 deletions tests/models/deepseek_v2/test_modeling_deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def test_model_rope_scaling_frequencies(self):
long_input_length = int(config.max_position_embeddings * 1.5)

# Inputs
x = torch.randn(1, dtype=torch.float32, device=torch_device) # used exlusively to get the dtype and the device
x = torch.randn(
1, dtype=torch.float32, device=torch_device
) # used exclusively to get the dtype and the device
position_ids_short = torch.arange(short_input_length, dtype=torch.long, device=torch_device)
position_ids_short = position_ids_short.unsqueeze(0)
position_ids_long = torch.arange(long_input_length, dtype=torch.long, device=torch_device)
Expand Down Expand Up @@ -161,7 +163,7 @@ def test_past_key_values_format(self):
super().test_past_key_values_format(custom_all_cache_shapes=all_cache_shapes)

def _check_past_key_values_for_generate(self, batch_size, decoder_past_key_values, cache_length, config):
"""Needs to be overriden as deepseek has special MLA cache format (though we don't really use the MLA)"""
"""Needs to be overridden as deepseek has special MLA cache format (though we don't really use the MLA)"""
self.assertIsInstance(decoder_past_key_values, Cache)

# (batch, head, seq_length, head_features)
Expand Down
2 changes: 1 addition & 1 deletion tests/models/dia/test_modeling_dia.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def skip_non_greedy_generate(self):
self.skipTest(reason="Dia only supports greedy search / sampling with one sequence.")

def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
"""Overriden to account for the 2D flattened structure"""
"""Overridden to account for the 2D flattened structure"""
inputs_dict = copy.deepcopy(inputs_dict)

if return_labels:
Expand Down
10 changes: 5 additions & 5 deletions tests/models/electra/test_tokenization_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,8 @@ def test_offsets_with_special_characters(self):
self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])

def test_change_tokenize_chinese_chars(self):
list_of_commun_chinese_char = ["的", "人", "有"]
text_with_chinese_char = "".join(list_of_commun_chinese_char)
list_of_common_chinese_char = ["的", "人", "有"]
text_with_chinese_char = "".join(list_of_common_chinese_char)
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
kwargs["tokenize_chinese_chars"] = True
Expand All @@ -315,8 +315,8 @@ def test_change_tokenize_chinese_chars(self):
tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)

# it is expected that each Chinese character is not preceded by "##"
self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
self.assertListEqual(tokens_without_spe_char_p, list_of_common_chinese_char)
self.assertListEqual(tokens_without_spe_char_r, list_of_common_chinese_char)

kwargs["tokenize_chinese_chars"] = False
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand All @@ -330,7 +330,7 @@ def test_change_tokenize_chinese_chars(self):

# it is expected that only the first Chinese character is not preceded by "##".
expected_tokens = [
f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_common_chinese_char)
]
self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
4 changes: 2 additions & 2 deletions tests/models/gemma2/test_modeling_gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def test_generation_beyond_sliding_window(self, attn_implementation: str):
self.skipTest("FlashAttention2 is required for this test.")

if torch_device == "xpu" and attn_implementation == "flash_attention_2":
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
self.skipTest(reason="Intel XPU doesn't support flash_attention_2 as of now.")

model_id = "google/gemma-2-2b"
EXPECTED_COMPLETIONS = [
Expand Down Expand Up @@ -433,7 +433,7 @@ def test_generation_beyond_sliding_window_dynamic(self, attn_implementation: str
self.skipTest("FlashAttention2 is required for this test.")

if torch_device == "xpu" and attn_implementation == "flash_attention_2":
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
self.skipTest(reason="Intel XPU doesn't support flash_attention_2 as of now.")

model_id = "google/gemma-2-2b"
EXPECTED_COMPLETIONS = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ def test_tied_weights_keys(self):

# GroundingDino when sharing weights also uses the shared ones in GroundingDinoDecoder
# Therefore, differently from DeformableDetr, we expect the group lens to be 2
# one for self.bbox_embed in GroundingDinoForObejectDetection and another one
# one for self.bbox_embed in GroundingDinoForObjectDetection and another one
# in the decoder
tied_params = [group for group in tied_params if len(group) > 2]
self.assertListEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -669,8 +669,8 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature):
def test_sdpa_can_dispatch_composite_models(self):
"""
Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
This tests only by looking at layer names, as usually SDPA layers call "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implementation" is a dict after the model
is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
See https://github.com/huggingface/transformers/pull/32238 for more info

Expand Down
2 changes: 1 addition & 1 deletion tests/models/kosmos2/test_modeling_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def test_sdpa_padding_matches_padding_free_with_position_ids(self):

@pytest.mark.generate
def test_left_padding_compatibility(self):
# Overwrite because Kosmos-2 need to padd pixel values and pad image-attn-mask
# Overwrite because Kosmos-2 need to pad pixel values and pad image-attn-mask

def _prepare_model_kwargs(input_ids, attention_mask, pad_size, signature):
model_kwargs = {"input_ids": input_ids, "attention_mask": attention_mask}
Expand Down
2 changes: 1 addition & 1 deletion tests/models/kosmos2_5/test_modeling_kosmos2_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ def test_generate_from_inputs_embeds(self):

@pytest.mark.generate
def test_left_padding_compatibility(self):
# Overwrite because Kosmos-2.5 need to padd pixel values and pad image-attn-mask
# Overwrite because Kosmos-2.5 need to pad pixel values and pad image-attn-mask

def _prepare_model_kwargs(input_ids, attention_mask, pad_size, signature):
model_kwargs = {"input_ids": input_ids, "attention_mask": attention_mask}
Expand Down
4 changes: 2 additions & 2 deletions tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ def test_tokenization_python_rust_equals(self):
):
self.assertSequenceEqual(input_p[key], input_r[key][0])

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
self.skipTest(reason="test_slow_tokenizer is set to False")
Expand Down Expand Up @@ -1733,7 +1733,7 @@ def test_batch_encode_dynamic_overflowing(self):
self.assertEqual(tokens[key].shape[-1], 4)

@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
def test_alignment_methods(self):
pass

def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
Expand Down
4 changes: 2 additions & 2 deletions tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ def test_tokenization_python_rust_equals(self):
):
self.assertSequenceEqual(input_p[key], input_r[key][0])

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
self.skipTest(reason="test_slow_tokenizer is set to False")
Expand Down Expand Up @@ -1623,7 +1623,7 @@ def test_batch_encode_dynamic_overflowing(self):
self.assertEqual(tokens[key].shape[-1], 4)

@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
def test_alignment_methods(self):
pass

def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
Expand Down
4 changes: 2 additions & 2 deletions tests/models/layoutxlm/test_tokenization_layoutxlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,7 @@ def test_tokenization_python_rust_equals(self):
):
self.assertSequenceEqual(input_p[key], input_r[key][0])

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
self.skipTest(reason="test_slow_tokenizer is set to False")
Expand Down Expand Up @@ -1734,7 +1734,7 @@ def test_save_pretrained(self):
shutil.rmtree(tmpdirname2)

@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
def test_alignment_methods(self):
pass

@unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
Expand Down
2 changes: 1 addition & 1 deletion tests/models/led/test_modeling_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def _long_tensor(tok_lst):
class LEDModelIntegrationTests(unittest.TestCase):
"""All the below results were obtained with the original checkpoints and code
base from https://github.com/allenai/longformer.
IMPORTANT: Note that the original checkpoints include a `postion_embeddings` "hack"
IMPORTANT: Note that the original checkpoints include a `position_embeddings` "hack"
and have to be cut to have the correct shape.
See: https://github.com/huggingface/transformers/pull/9278#issue-544709661.
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/models/led/test_tokenization_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_global_attention_mask(self):
def test_pretokenized_inputs(self):
pass

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion tests/models/longformer/test_tokenization_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_space_encoding(self):
def test_pretokenized_inputs(self):
pass

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion tests/models/luke/test_tokenization_luke.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_space_encoding(self):
def test_pretokenized_inputs(self):
pass

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
Expand Down
4 changes: 2 additions & 2 deletions tests/models/markuplm/test_tokenization_markuplm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ def test_tokenization_python_rust_equals(self):
):
self.assertSequenceEqual(input_p[key], input_r[key][0])

def test_embeded_special_tokens(self):
def test_embedded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
self.skipTest(reason="test_slow_tokenizer is set to False")
Expand Down Expand Up @@ -1508,7 +1508,7 @@ def test_batch_encode_dynamic_overflowing(self):
self.assertEqual(tokens[key].shape[-2], 6)

@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
def test_alignment_methods(self):
pass

def get_clean_sequence(self, tokenizer, with_prefix_space=False, max_length=20, min_length=5):
Expand Down
6 changes: 3 additions & 3 deletions tests/models/mask2former/test_image_processing_mask2former.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,16 +477,16 @@ def test_binary_mask_to_rle(self):

def test_post_process_semantic_segmentation(self):
for image_processing_class in self.image_processor_list:
fature_extractor = image_processing_class(num_labels=self.image_processor_tester.num_classes)
feature_extractor = image_processing_class(num_labels=self.image_processor_tester.num_classes)
outputs = self.image_processor_tester.get_fake_mask2former_outputs()

segmentation = fature_extractor.post_process_semantic_segmentation(outputs)
segmentation = feature_extractor.post_process_semantic_segmentation(outputs)

self.assertEqual(len(segmentation), self.image_processor_tester.batch_size)
self.assertEqual(segmentation[0].shape, (384, 384))

target_sizes = [(1, 4) for i in range(self.image_processor_tester.batch_size)]
segmentation = fature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
segmentation = feature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)

self.assertEqual(segmentation[0].shape, target_sizes[0])

Expand Down
Loading