-
Notifications
You must be signed in to change notification settings - Fork 30.6k
[tests] fix blip2 edge case #40699
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[tests] fix blip2 edge case #40699
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -336,7 +336,7 @@ def test_greedy_generate(self): | |
model = model_class(config).to(torch_device).eval() | ||
output_generate = self._greedy_generate(model=model, inputs_dict=inputs_dict) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) | ||
else: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) | ||
|
@@ -360,7 +360,7 @@ def test_greedy_generate_dict_outputs(self): | |
use_cache=False, | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) | ||
# Retrocompatibility check | ||
|
@@ -400,7 +400,7 @@ def test_greedy_generate_dict_outputs_use_cache(self): | |
use_cache=True, # Enable cache | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
Comment on lines
-403
to
404
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool thanks, can you check if Florence2 model works with these changes? It is same as blip and uses Bart as backbone |
||
else: | ||
self.assertTrue( | ||
|
@@ -417,7 +417,7 @@ def test_sample_generate(self): | |
model = model_class(config).to(torch_device).eval() | ||
output_generate = self._sample_generate(model=model, inputs_dict=inputs_dict, num_return_sequences=1) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) | ||
else: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) | ||
|
@@ -442,7 +442,7 @@ def test_sample_generate_dict_output(self): | |
use_cache=False, | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) | ||
# Retrocompatibility check | ||
|
@@ -467,7 +467,7 @@ def test_beam_search_generate(self): | |
beam_kwargs = self._get_beam_kwargs() | ||
output_generate = self._beam_search_generate(model=model, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) | ||
else: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) | ||
|
@@ -492,7 +492,7 @@ def test_beam_search_generate_dict_output(self): | |
return_dict_in_generate=True, | ||
use_cache=False, | ||
) | ||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput) | ||
# Retrocompatibility check | ||
|
@@ -541,7 +541,7 @@ def test_beam_search_generate_dict_outputs_use_cache(self): | |
use_cache=True, # Enable cache | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
else: | ||
self.assertTrue( | ||
|
@@ -594,7 +594,7 @@ def test_beam_sample_generate(self): | |
beam_kwargs=beam_kwargs, | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + 1) | ||
else: | ||
self.assertTrue(output_generate.shape[1] == self.max_new_tokens + inputs_dict["input_ids"].shape[1]) | ||
|
@@ -621,7 +621,7 @@ def test_beam_sample_generate_dict_output(self): | |
use_cache=False, | ||
) | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput) | ||
# Retrocompatibility check | ||
|
@@ -1791,7 +1791,7 @@ def test_generate_compilation_all_outputs(self): | |
else: | ||
self.assertTrue(hasattr(model, "_compiled_call")) # our auto compile should have been called | ||
|
||
if model.config.get_text_config(decoder=True).is_encoder_decoder: | ||
if model.config.is_encoder_decoder: | ||
self.assertTrue(output_generate.sequences.shape[1] == self.max_new_tokens + 1) | ||
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import numpy as np | ||
import pytest | ||
import requests | ||
from parameterized import parameterized | ||
|
||
from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig | ||
from transformers.testing_utils import ( | ||
|
@@ -40,6 +41,7 @@ | |
from ...generation.test_utils import GenerationTesterMixin | ||
from ...test_configuration_common import ConfigTester | ||
from ...test_modeling_common import ( | ||
TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION, | ||
ModelTesterMixin, | ||
_config_zero_init, | ||
floats_tensor, | ||
|
@@ -1094,6 +1096,11 @@ def test_initialization(self): | |
def test_internal_model_config_and_subconfig_are_same(self): | ||
pass | ||
|
||
@parameterized.expand(TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION) | ||
@unittest.skip("Won't fix: Blip2 + T5 backbone needs custom input preparation for this test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be fixed, but I don't think it's worth it 👀 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed, let's assume that t5 passes the test so we are fine :) |
||
def test_eager_matches_sdpa_inference(self, *args): | ||
pass | ||
|
||
|
||
class Blip2TextModelWithProjectionTester: | ||
def __init__(self, parent, vision_kwargs=None, qformer_kwargs=None, is_training=True): | ||
|
@@ -1849,7 +1856,10 @@ def test_inference_t5_multi_accelerator(self): | |
# Test output | ||
expected_ids_and_text = Expectations( | ||
{ | ||
("cuda", None): ([0, 2335, 1556, 28, 1782, 30, 8, 2608, 1], "woman playing with dog on the beach"), | ||
("cuda", None): ( | ||
[0, 3, 9, 2335, 19, 1556, 28, 160, 1782, 30, 8, 2608, 1], | ||
"a woman is playing with her dog on the beach", | ||
), | ||
("rocm", (9, 5)): ( | ||
[0, 3, 9, 2335, 19, 1556, 28, 160, 1782, 30, 8, 2608, 1], | ||
"a woman is playing with her dog on the beach", | ||
|
@@ -1869,11 +1879,8 @@ def test_inference_t5_multi_accelerator(self): | |
# Test output | ||
expected_ids_and_text = Expectations( | ||
{ | ||
("cuda", None): ([0, 3, 7, 152, 67, 839, 1], "san diego"), | ||
("rocm", (9, 5)): ( | ||
[0, 3, 7, 152, 2515, 11389, 3523, 1], | ||
"san francisco", # TODO: check if this is ok | ||
), | ||
("cuda", None): ([0, 3, 7, 152, 2515, 11389, 3523, 1], "san francisco"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i believe the CUDA expectation incorrect from the beginning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I agree 👀 |
||
("rocm", (9, 5)): ([0, 3, 7, 152, 2515, 11389, 3523, 1], "san francisco"), | ||
} | ||
).get_expectation() | ||
self.assertEqual(predictions[0].tolist(), expected_ids_and_text[0]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make fixup corrected this 👀