From 958224c13e50cc95fa493d46515bf1dc6d372d70 Mon Sep 17 00:00:00 2001 From: Aryan Date: Sat, 17 Aug 2024 00:59:15 +0200 Subject: [PATCH 1/3] fix xformers tests --- src/diffusers/models/attention_processor.py | 5 +++ .../test_animatediff_controlnet.py | 8 +++++ .../test_animatediff_sparsectrl.py | 8 +++++ tests/pipelines/cogvideox/test_cogvideox.py | 36 ++++++++++++++++++- tests/pipelines/test_pipelines_common.py | 10 +++++- 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py index e2ab1606b345..7733e3d2a366 100644 --- a/src/diffusers/models/attention_processor.py +++ b/src/diffusers/models/attention_processor.py @@ -2011,6 +2011,11 @@ def __call__( key = attn.head_to_batch_dim(key).contiguous() value = attn.head_to_batch_dim(value).contiguous() + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + hidden_states = xformers.ops.memory_efficient_attention( query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale ) diff --git a/tests/pipelines/animatediff/test_animatediff_controlnet.py b/tests/pipelines/animatediff/test_animatediff_controlnet.py index 72315bd0c965..3035fc1e3c61 100644 --- a/tests/pipelines/animatediff/test_animatediff_controlnet.py +++ b/tests/pipelines/animatediff/test_animatediff_controlnet.py @@ -20,6 +20,7 @@ ) from diffusers.models.attention import FreeNoiseTransformerBlock from diffusers.utils import logging +from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import torch_device from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS @@ -329,6 +330,13 @@ def test_prompt_embeds(self): inputs["prompt_embeds"] = torch.randn((1, 4, pipe.text_encoder.config.hidden_size), device=torch_device) pipe(**inputs) + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False) + def test_free_init(self): components = self.get_dummy_components() pipe: AnimateDiffControlNetPipeline = self.pipeline_class(**components) diff --git a/tests/pipelines/animatediff/test_animatediff_sparsectrl.py b/tests/pipelines/animatediff/test_animatediff_sparsectrl.py index 5d8a7228118d..e4cc06e1e797 100644 --- a/tests/pipelines/animatediff/test_animatediff_sparsectrl.py +++ b/tests/pipelines/animatediff/test_animatediff_sparsectrl.py @@ -19,6 +19,7 @@ UNetMotionModel, ) from diffusers.utils import logging +from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import torch_device from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS @@ -393,6 +394,13 @@ def test_prompt_embeds(self): inputs["prompt_embeds"] = torch.randn((1, 4, pipe.text_encoder.config.hidden_size), device=torch_device) pipe(**inputs) + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False) + def test_free_init(self): components = self.get_dummy_components() pipe: AnimateDiffSparseControlNetPipeline = self.pipeline_class(**components) diff --git a/tests/pipelines/cogvideox/test_cogvideox.py b/tests/pipelines/cogvideox/test_cogvideox.py index 3ae500eb9567..2196dcf86d40 100644 --- a/tests/pipelines/cogvideox/test_cogvideox.py +++ b/tests/pipelines/cogvideox/test_cogvideox.py @@ -30,7 +30,7 @@ ) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS -from ..test_pipelines_common import PipelineTesterMixin, to_np +from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference, to_np enable_full_determinism() @@ -275,6 +275,40 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2): "VAE tiling should not affect the inference results", ) + def test_xformers_attention_forwardGenerator_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-4 + ): + if not self.test_xformers_attention: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_without_offload = pipe(**inputs)[0].permute(0, 1, 3, 4, 2) + output_without_offload = ( + output_without_offload.cpu() if torch.is_tensor(output_without_offload) else output_without_offload + ) + + pipe.enable_xformers_memory_efficient_attention() + inputs = self.get_dummy_inputs(torch_device) + output_with_offload = pipe(**inputs)[0].permute(0, 1, 3, 4, 2) # [B, F, H, W, C] + output_with_offload = ( + output_with_offload.cpu() if torch.is_tensor(output_with_offload) else output_without_offload + ) + + if test_max_difference: + max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() + self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") + + if test_mean_pixel_difference: + assert_mean_pixel_difference(to_np(output_with_offload[0][0]), to_np(output_without_offload[0][0])) + @slow @require_torch_gpu diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index c3384e6b4664..cb6574802d00 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -1687,7 +1687,15 @@ def _test_xformers_attention_forwardGenerator_pass( self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") if test_mean_pixel_difference: - assert_mean_pixel_difference(output_with_offload[0], output_without_offload[0]) + if torch.is_tensor(output_without_offload): + if output_without_offload.ndim == 5: + # Educated guess that the original format here is [B, F, C, H, W] and we + # permute to [B, F, H, W, C] to make input compatible with mean pixel difference + output_without_offload = output_without_offload.permute(0, 1, 3, 4, 2)[0] + output_with_offload = output_with_offload.permute(0, 1, 3, 4, 2)[0] + output_without_offload = to_np(output_without_offload) + output_with_offload = to_np(output_with_offload) + assert_mean_pixel_difference(to_np(output_with_offload[0]), to_np(output_without_offload[0])) def test_progress_bar(self): components = self.get_dummy_components() From a360039ac1a33a179455354dadb3ef5d952c6965 Mon Sep 17 00:00:00 2001 From: Aryan Date: Sat, 17 Aug 2024 01:03:10 +0200 Subject: [PATCH 2/3] remove unnecessary modifications to cogvideox tests --- tests/pipelines/cogvideox/test_cogvideox.py | 36 +-------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/tests/pipelines/cogvideox/test_cogvideox.py b/tests/pipelines/cogvideox/test_cogvideox.py index 2196dcf86d40..3ae500eb9567 100644 --- a/tests/pipelines/cogvideox/test_cogvideox.py +++ b/tests/pipelines/cogvideox/test_cogvideox.py @@ -30,7 +30,7 @@ ) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS -from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference, to_np +from ..test_pipelines_common import PipelineTesterMixin, to_np enable_full_determinism() @@ -275,40 +275,6 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2): "VAE tiling should not affect the inference results", ) - def test_xformers_attention_forwardGenerator_pass( - self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-4 - ): - if not self.test_xformers_attention: - return - - components = self.get_dummy_components() - pipe = self.pipeline_class(**components) - for component in pipe.components.values(): - if hasattr(component, "set_default_attn_processor"): - component.set_default_attn_processor() - pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - - inputs = self.get_dummy_inputs(torch_device) - output_without_offload = pipe(**inputs)[0].permute(0, 1, 3, 4, 2) - output_without_offload = ( - output_without_offload.cpu() if torch.is_tensor(output_without_offload) else output_without_offload - ) - - pipe.enable_xformers_memory_efficient_attention() - inputs = self.get_dummy_inputs(torch_device) - output_with_offload = pipe(**inputs)[0].permute(0, 1, 3, 4, 2) # [B, F, H, W, C] - output_with_offload = ( - output_with_offload.cpu() if torch.is_tensor(output_with_offload) else output_without_offload - ) - - if test_max_difference: - max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() - self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") - - if test_mean_pixel_difference: - assert_mean_pixel_difference(to_np(output_with_offload[0][0]), to_np(output_without_offload[0][0])) - @slow @require_torch_gpu From 836bb0244fe195a9308b889b42eb636684c3b249 Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 21 Aug 2024 22:48:11 +0200 Subject: [PATCH 3/3] update --- src/diffusers/models/attention_processor.py | 5 ----- tests/pipelines/cogvideox/test_cogvideox.py | 4 ++++ tests/pipelines/latte/test_latte.py | 8 ++++++++ tests/pipelines/test_pipelines_common.py | 10 +--------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py index def6d6cd68f7..fc225567ddc1 100644 --- a/src/diffusers/models/attention_processor.py +++ b/src/diffusers/models/attention_processor.py @@ -1926,11 +1926,6 @@ def __call__( key = attn.head_to_batch_dim(key).contiguous() value = attn.head_to_batch_dim(value).contiguous() - if attn.norm_q is not None: - query = attn.norm_q(query) - if attn.norm_k is not None: - key = attn.norm_k(key) - hidden_states = xformers.ops.memory_efficient_attention( query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale ) diff --git a/tests/pipelines/cogvideox/test_cogvideox.py b/tests/pipelines/cogvideox/test_cogvideox.py index 3ae500eb9567..17d0d8f21d5c 100644 --- a/tests/pipelines/cogvideox/test_cogvideox.py +++ b/tests/pipelines/cogvideox/test_cogvideox.py @@ -275,6 +275,10 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2): "VAE tiling should not affect the inference results", ) + @unittest.skip("xformers attention processor does not exist for CogVideoX") + def test_xformers_attention_forwardGenerator_pass(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/latte/test_latte.py b/tests/pipelines/latte/test_latte.py index 94ff7fc0faf9..9667ebff249d 100644 --- a/tests/pipelines/latte/test_latte.py +++ b/tests/pipelines/latte/test_latte.py @@ -28,6 +28,7 @@ LattePipeline, LatteTransformer3DModel, ) +from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import ( enable_full_determinism, numpy_cosine_similarity_distance, @@ -256,6 +257,13 @@ def test_save_load_optional_components(self): max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() self.assertLess(max_diff, 1.0) + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False) + @slow @require_torch_gpu diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index cb6574802d00..c3384e6b4664 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -1687,15 +1687,7 @@ def _test_xformers_attention_forwardGenerator_pass( self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") if test_mean_pixel_difference: - if torch.is_tensor(output_without_offload): - if output_without_offload.ndim == 5: - # Educated guess that the original format here is [B, F, C, H, W] and we - # permute to [B, F, H, W, C] to make input compatible with mean pixel difference - output_without_offload = output_without_offload.permute(0, 1, 3, 4, 2)[0] - output_with_offload = output_with_offload.permute(0, 1, 3, 4, 2)[0] - output_without_offload = to_np(output_without_offload) - output_with_offload = to_np(output_with_offload) - assert_mean_pixel_difference(to_np(output_with_offload[0]), to_np(output_without_offload[0])) + assert_mean_pixel_difference(output_with_offload[0], output_without_offload[0]) def test_progress_bar(self): components = self.get_dummy_components()