From 33bce3044e94b610c7828d0e609621a0f566d774 Mon Sep 17 00:00:00 2001 From: Ilias A Date: Thu, 26 Oct 2023 18:38:37 +0200 Subject: [PATCH 1/5] Enable lora for sdxl adapters too. Issue #5516 --- .../pipeline_stable_diffusion_xl_adapter.py | 75 +++++++++++++++++++ .../test_stable_diffusion_xl_adapter.py | 70 ++++++++++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 2a3fca7f4603..5e4424bf3322 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import os from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -1067,3 +1068,77 @@ def __call__( return (image,) return StableDiffusionXLPipelineOutput(images=image) + + + # Overrride to properly handle the loading and unloading of the additional text encoder. + # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.load_lora_weights + def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs): + # We could have accessed the unet config from `lora_state_dict()` too. We pass + # it here explicitly to be able to tell that it's coming from an SDXL + # pipeline. + state_dict, network_alphas = self.lora_state_dict( + pretrained_model_name_or_path_or_dict, + unet_config=self.unet.config, + **kwargs, + ) + self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet) + + text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k} + if len(text_encoder_state_dict) > 0: + self.load_lora_into_text_encoder( + text_encoder_state_dict, + network_alphas=network_alphas, + text_encoder=self.text_encoder, + prefix="text_encoder", + lora_scale=self.lora_scale, + ) + + text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k} + if len(text_encoder_2_state_dict) > 0: + self.load_lora_into_text_encoder( + text_encoder_2_state_dict, + network_alphas=network_alphas, + text_encoder=self.text_encoder_2, + prefix="text_encoder_2", + lora_scale=self.lora_scale, + ) + + @classmethod + # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights + def save_lora_weights( + self, + save_directory: Union[str, os.PathLike], + unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None, + text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None, + text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + ): + state_dict = {} + + def pack_weights(layers, prefix): + layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers + layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()} + return layers_state_dict + + state_dict.update(pack_weights(unet_lora_layers, "unet")) + + if text_encoder_lora_layers and text_encoder_2_lora_layers: + state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder")) + state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2")) + + self.write_lora_layers( + state_dict=state_dict, + save_directory=save_directory, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline._remove_text_encoder_monkey_patch + def _remove_text_encoder_monkey_patch(self): + self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder) + self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2) \ No newline at end of file diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 616aec6392f6..20b1bcd03be9 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -14,6 +14,7 @@ # limitations under the License. import random +import gc import unittest import numpy as np @@ -29,10 +30,13 @@ StableDiffusionXLAdapterPipeline, T2IAdapter, UNet2DConditionModel, + EulerAncestralDiscreteScheduler, + ) from diffusers.utils import logging from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device - +from diffusers.utils import load_image, randn_tensor, torch_device +from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS from ..test_pipelines_common import ( PipelineTesterMixin, @@ -560,3 +564,67 @@ def test_inference_batch_single_identical( if test_mean_pixel_difference: assert_mean_pixel_difference(output_batch[0][0], output[0][0]) + + +@slow +@require_torch_gpu +class AdapterSDXLPipelineSlowTests(unittest.TestCase): + def tearDown(self): + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + def test_canny(self): + adapter = T2IAdapter.from_pretrained( + "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16" + ) + pipe = StableDiffusionXLAdapterPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0",adapter=adapter + ) + pipe.enable_sequential_cpu_offload() + pipe.set_progress_bar_config(disable=None) + + generator = torch.Generator(device="cpu").manual_seed(0) + prompt = "toy" + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png" + ) + + images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images + + assert images[0].shape == (768, 512, 3) + + original_image = images[0, -3:, -3:, -1].flatten() + expected_image = np.array([0.4185, 0.4127, 0.4089, 0.4046, 0.4115, 0.4096, 0.4081, 0.4112, 0.3913])#TODO : Get real values + assert np.allclose(original_image, expected_image, atol=1e-04) + + + def test_canny_lora(self): + # load adapter + adapter = T2IAdapter.from_pretrained( + "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16" + ) + + # load euler_a scheduler + model_id = 'stabilityai/stable-diffusion-xl-base-1.0' + euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler") + vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) + pipe = StableDiffusionXLAdapterPipeline.from_pretrained( + model_id, vae=vae, adapter=adapter, scheduler=euler_a, + ) + pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors") + pipe.enable_sequential_cpu_offload() + + generator = torch.Generator(device="cpu").manual_seed(0) + prompt = "toy" + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png" + ) + + images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images + + assert images[0].shape == (768, 512, 3) + + original_image = images[0, -3:, -3:, -1].flatten() + expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])#TODO : Get real values + assert np.allclose(original_image, expected_image, atol=1e-04) \ No newline at end of file From 17deb78d42aaaa8ffe890987b2f1772ae48dc1e2 Mon Sep 17 00:00:00 2001 From: Ilias A Date: Fri, 27 Oct 2023 12:27:04 +0200 Subject: [PATCH 2/5] fix: assertion values. --- .../test_stable_diffusion_xl_adapter.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 20b1bcd03be9..bd15635668cc 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -576,18 +576,19 @@ def tearDown(self): def test_canny(self): adapter = T2IAdapter.from_pretrained( - "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16" - ) + "TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16 + ).to("cpu") pipe = StableDiffusionXLAdapterPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0",adapter=adapter + 'stabilityai/stable-diffusion-xl-base-1.0', adapter=adapter, torch_dtype=torch.float16, variant="fp16", ) + pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors") pipe.enable_sequential_cpu_offload() pipe.set_progress_bar_config(disable=None) generator = torch.Generator(device="cpu").manual_seed(0) prompt = "toy" image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png" ) images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images @@ -595,30 +596,25 @@ def test_canny(self): assert images[0].shape == (768, 512, 3) original_image = images[0, -3:, -3:, -1].flatten() - expected_image = np.array([0.4185, 0.4127, 0.4089, 0.4046, 0.4115, 0.4096, 0.4081, 0.4112, 0.3913])#TODO : Get real values + expected_image = np.array([0.5226907, 0.5272584, 0.52255356, 0.50762856, 0.50497484, 0.50656825, 0.5065469, 0.51809096, 0.48973307]) assert np.allclose(original_image, expected_image, atol=1e-04) def test_canny_lora(self): - # load adapter adapter = T2IAdapter.from_pretrained( - "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16" - ) - - # load euler_a scheduler - model_id = 'stabilityai/stable-diffusion-xl-base-1.0' - euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler") - vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) + "TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16 + ).to("cpu") pipe = StableDiffusionXLAdapterPipeline.from_pretrained( - model_id, vae=vae, adapter=adapter, scheduler=euler_a, + 'stabilityai/stable-diffusion-xl-base-1.0', adapter=adapter, torch_dtype=torch.float16, variant="fp16", ) pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors") pipe.enable_sequential_cpu_offload() + pipe.set_progress_bar_config(disable=None) generator = torch.Generator(device="cpu").manual_seed(0) prompt = "toy" image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png" ) images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images @@ -626,5 +622,6 @@ def test_canny_lora(self): assert images[0].shape == (768, 512, 3) original_image = images[0, -3:, -3:, -1].flatten() - expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])#TODO : Get real values - assert np.allclose(original_image, expected_image, atol=1e-04) \ No newline at end of file + expected_image = np.array([0.50346327, 0.50708383, 0.50719553, 0.5135172, 0.5155377, 0.5066059, 0.49680984, 0.5005894, 0.48509413]) + assert np.allclose(original_image, expected_image, atol=1e-04) + From 0528498e14179a0575467b3a1d74d7968b4d4380 Mon Sep 17 00:00:00 2001 From: ilisparrow <4880273+ilisparrow@users.noreply.github.com> Date: Fri, 27 Oct 2023 15:46:45 +0200 Subject: [PATCH 3/5] Use numpy_cosine_similarity_distance on the arrays Co-authored-by: Dhruv Nair --- .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index bd15635668cc..90b3f44f0797 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -623,5 +623,5 @@ def test_canny_lora(self): original_image = images[0, -3:, -3:, -1].flatten() expected_image = np.array([0.50346327, 0.50708383, 0.50719553, 0.5135172, 0.5155377, 0.5066059, 0.49680984, 0.5005894, 0.48509413]) - assert np.allclose(original_image, expected_image, atol=1e-04) + assert numpy_cosine_similarity_distance(original_image, expected_image) < 1e-4 From ec6bf81c98f396dcedd1d4d3fef21a36deb280f9 Mon Sep 17 00:00:00 2001 From: ilisparrow <4880273+ilisparrow@users.noreply.github.com> Date: Fri, 27 Oct 2023 15:46:56 +0200 Subject: [PATCH 4/5] Use numpy_cosine_similarity_distance on the arrays Co-authored-by: Dhruv Nair --- .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 90b3f44f0797..3fba58883f8b 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -596,7 +596,7 @@ def test_canny(self): assert images[0].shape == (768, 512, 3) original_image = images[0, -3:, -3:, -1].flatten() - expected_image = np.array([0.5226907, 0.5272584, 0.52255356, 0.50762856, 0.50497484, 0.50656825, 0.5065469, 0.51809096, 0.48973307]) + assert numpy_cosine_similarity_distance(original_image, expected_image) < 1e-4 assert np.allclose(original_image, expected_image, atol=1e-04) From 8f0b5ffa01862be41312456cca68f08d9108010c Mon Sep 17 00:00:00 2001 From: ilisparrow <4880273+ilisparrow@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:19:53 +0100 Subject: [PATCH 5/5] Changed imports orders to pass tests Co-authored-by: Dhruv Nair --- .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 3fba58883f8b..10ff9ff36901 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -35,8 +35,9 @@ ) from diffusers.utils import logging from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device -from diffusers.utils import load_image, randn_tensor, torch_device -from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow +from diffusers.utils import load_image +from diffusers.utils.torch_utils import randn_tensor +from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS from ..test_pipelines_common import ( PipelineTesterMixin,