diff --git a/setup.py b/setup.py index c635db02e33c..e4d73a3d956f 100644 --- a/setup.py +++ b/setup.py @@ -79,14 +79,12 @@ stale_egg_info = Path(__file__).parent / "transformers.egg-info" if stale_egg_info.exists(): print( - ( - "Warning: {} exists.\n\n" - "If you recently updated transformers to 3.0 or later, this is expected,\n" - "but it may prevent transformers from installing in editable mode.\n\n" - "This directory is automatically generated by Python's packaging tools.\n" - "I will remove it now.\n\n" - "See https://github.com/pypa/pip/issues/5466 for details.\n" - ).format(stale_egg_info) + f"Warning: {stale_egg_info} exists.\n\n" + "If you recently updated transformers to 3.0 or later, this is expected,\n" + "but it may prevent transformers from installing in editable mode.\n\n" + "This directory is automatically generated by Python's packaging tools.\n" + "I will remove it now.\n\n" + "See https://github.com/pypa/pip/issues/5466 for details.\n" ) shutil.rmtree(stale_egg_info) diff --git a/src/transformers/models/blip_2/configuration_blip_2.py b/src/transformers/models/blip_2/configuration_blip_2.py index ff7e629887c1..23145ffc543f 100644 --- a/src/transformers/models/blip_2/configuration_blip_2.py +++ b/src/transformers/models/blip_2/configuration_blip_2.py @@ -312,6 +312,7 @@ def __init__( self.image_token_index = image_token_index self.qformer_config.encoder_hidden_size = self.vision_config.hidden_size self.use_decoder_only_language_model = self.text_config.model_type in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES + self.is_encoder_decoder = self.text_config.is_encoder_decoder self.initializer_factor = 1.0 self.initializer_range = 0.02 diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 85841c557df1..b01733192cd9 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -336,7 +336,7 @@ def test_greedy_generate(self): model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate(model=model, inputs_dict=inputs_dict) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) else: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) @@ -360,7 +360,7 @@ def test_greedy_generate_dict_outputs(self): use_cache=False, ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) # Retrocompatibility check @@ -400,7 +400,7 @@ def test_greedy_generate_dict_outputs_use_cache(self): use_cache=True, # Enable cache ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) else: self.assertTrue( @@ -417,7 +417,7 @@ def test_sample_generate(self): model = model_class(config).to(torch_device).eval() output_generate = self._sample_generate(model=model, inputs_dict=inputs_dict, num_return_sequences=1) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) else: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) @@ -442,7 +442,7 @@ def test_sample_generate_dict_output(self): use_cache=False, ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) # Retrocompatibility check @@ -467,7 +467,7 @@ def test_beam_search_generate(self): beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_search_generate(model=model, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) else: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) @@ -492,7 +492,7 @@ def test_beam_search_generate_dict_output(self): return_dict_in_generate=True, use_cache=False, ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput) # Retrocompatibility check @@ -541,7 +541,7 @@ def test_beam_search_generate_dict_outputs_use_cache(self): use_cache=True, # Enable cache ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) else: self.assertTrue( @@ -594,7 +594,7 @@ def test_beam_sample_generate(self): beam_kwargs=beam_kwargs, ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) else: self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) @@ -621,7 +621,7 @@ def test_beam_sample_generate_dict_output(self): use_cache=False, ) - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput) # Retrocompatibility check @@ -1791,7 +1791,7 @@ def test_generate_compilation_all_outputs(self): else: self.assertTrue(hasattr(model, "_compiled_call")) # our auto compile should have been called - if model.config.get_text_config(decoder=True).is_encoder_decoder: + if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) else: diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index adc63c536b28..2f6df2aab27e 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -21,6 +21,7 @@ import numpy as np import pytest import requests +from parameterized import parameterized from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig from transformers.testing_utils import ( @@ -40,6 +41,7 @@ from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester from ...test_modeling_common import ( + TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION, ModelTesterMixin, _config_zero_init, floats_tensor, @@ -1094,6 +1096,11 @@ def test_initialization(self): def test_internal_model_config_and_subconfig_are_same(self): pass + @parameterized.expand(TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION) + @unittest.skip("Won't fix: Blip2 + T5 backbone needs custom input preparation for this test") + def test_eager_matches_sdpa_inference(self, *args): + pass + class Blip2TextModelWithProjectionTester: def __init__(self, parent, vision_kwargs=None, qformer_kwargs=None, is_training=True): @@ -1849,7 +1856,10 @@ def test_inference_t5_multi_accelerator(self): # Test output expected_ids_and_text = Expectations( { - ("cuda", None): ([0, 2335, 1556, 28, 1782, 30, 8, 2608, 1], "woman playing with dog on the beach"), + ("cuda", None): ( + [0, 3, 9, 2335, 19, 1556, 28, 160, 1782, 30, 8, 2608, 1], + "a woman is playing with her dog on the beach", + ), ("rocm", (9, 5)): ( [0, 3, 9, 2335, 19, 1556, 28, 160, 1782, 30, 8, 2608, 1], "a woman is playing with her dog on the beach", @@ -1869,11 +1879,8 @@ def test_inference_t5_multi_accelerator(self): # Test output expected_ids_and_text = Expectations( { - ("cuda", None): ([0, 3, 7, 152, 67, 839, 1], "san diego"), - ("rocm", (9, 5)): ( - [0, 3, 7, 152, 2515, 11389, 3523, 1], - "san francisco", # TODO: check if this is ok - ), + ("cuda", None): ([0, 3, 7, 152, 2515, 11389, 3523, 1], "san francisco"), + ("rocm", (9, 5)): ([0, 3, 7, 152, 2515, 11389, 3523, 1], "san francisco"), } ).get_expectation() self.assertEqual(predictions[0].tolist(), expected_ids_and_text[0])