From 33bce3044e94b610c7828d0e609621a0f566d774 Mon Sep 17 00:00:00 2001
From: Ilias A <iliasamri00@gmail.com>
Date: Thu, 26 Oct 2023 18:38:37 +0200
Subject: [PATCH 1/5] Enable lora for sdxl adapters too.

Issue #5516
---
 .../pipeline_stable_diffusion_xl_adapter.py   | 75 +++++++++++++++++++
 .../test_stable_diffusion_xl_adapter.py       | 70 ++++++++++++++++-
 2 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 2a3fca7f4603..5e4424bf3322 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import inspect
+import os
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
@@ -1067,3 +1068,77 @@ def __call__(
             return (image,)
 
         return StableDiffusionXLPipelineOutput(images=image)
+
+
+    # Overrride to properly handle the loading and unloading of the additional text encoder.
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.load_lora_weights
+    def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+        # We could have accessed the unet config from `lora_state_dict()` too. We pass
+        # it here explicitly to be able to tell that it's coming from an SDXL
+        # pipeline.
+        state_dict, network_alphas = self.lora_state_dict(
+            pretrained_model_name_or_path_or_dict,
+            unet_config=self.unet.config,
+            **kwargs,
+        )
+        self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet)
+
+        text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
+        if len(text_encoder_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder,
+                prefix="text_encoder",
+                lora_scale=self.lora_scale,
+            )
+
+        text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
+        if len(text_encoder_2_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_2_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder_2,
+                prefix="text_encoder_2",
+                lora_scale=self.lora_scale,
+            )
+
+    @classmethod
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights
+    def save_lora_weights(
+        self,
+        save_directory: Union[str, os.PathLike],
+        unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        is_main_process: bool = True,
+        weight_name: str = None,
+        save_function: Callable = None,
+        safe_serialization: bool = True,
+    ):
+        state_dict = {}
+
+        def pack_weights(layers, prefix):
+            layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers
+            layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()}
+            return layers_state_dict
+
+        state_dict.update(pack_weights(unet_lora_layers, "unet"))
+
+        if text_encoder_lora_layers and text_encoder_2_lora_layers:
+            state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+            state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+
+        self.write_lora_layers(
+            state_dict=state_dict,
+            save_directory=save_directory,
+            is_main_process=is_main_process,
+            weight_name=weight_name,
+            save_function=save_function,
+            safe_serialization=safe_serialization,
+        )
+
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline._remove_text_encoder_monkey_patch
+    def _remove_text_encoder_monkey_patch(self):
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)
\ No newline at end of file
diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
index 616aec6392f6..20b1bcd03be9 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import random
+import gc
 import unittest
 
 import numpy as np
@@ -29,10 +30,13 @@
     StableDiffusionXLAdapterPipeline,
     T2IAdapter,
     UNet2DConditionModel,
+    EulerAncestralDiscreteScheduler,
+
 )
 from diffusers.utils import logging
 from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device
-
+from diffusers.utils import load_image, randn_tensor, torch_device
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow
 from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
 from ..test_pipelines_common import (
     PipelineTesterMixin,
@@ -560,3 +564,67 @@ def test_inference_batch_single_identical(
 
         if test_mean_pixel_difference:
             assert_mean_pixel_difference(output_batch[0][0], output[0][0])
+
+
+@slow
+@require_torch_gpu
+class AdapterSDXLPipelineSlowTests(unittest.TestCase):
+    def tearDown(self):
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def test_canny(self):
+        adapter = T2IAdapter.from_pretrained(
+            "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16"
+            )
+        pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0",adapter=adapter
+        )
+        pipe.enable_sequential_cpu_offload()
+        pipe.set_progress_bar_config(disable=None)
+
+        generator = torch.Generator(device="cpu").manual_seed(0)
+        prompt = "toy"
+        image = load_image(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png"
+        )
+
+        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
+
+        assert images[0].shape == (768, 512, 3)
+
+        original_image = images[0, -3:, -3:, -1].flatten()
+        expected_image = np.array([0.4185, 0.4127, 0.4089, 0.4046, 0.4115, 0.4096, 0.4081, 0.4112, 0.3913])#TODO : Get real values
+        assert np.allclose(original_image, expected_image, atol=1e-04)
+
+
+    def test_canny_lora(self):
+        # load adapter
+        adapter = T2IAdapter.from_pretrained(
+            "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16"
+        )
+
+        # load euler_a scheduler
+        model_id = 'stabilityai/stable-diffusion-xl-base-1.0'
+        euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
+        vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+        pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
+            model_id, vae=vae, adapter=adapter, scheduler=euler_a,
+        )
+        pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
+        pipe.enable_sequential_cpu_offload()
+
+        generator = torch.Generator(device="cpu").manual_seed(0)
+        prompt = "toy"
+        image = load_image(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png"
+        )
+
+        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
+
+        assert images[0].shape == (768, 512, 3)
+
+        original_image = images[0, -3:, -3:, -1].flatten()
+        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])#TODO : Get real values
+        assert np.allclose(original_image, expected_image, atol=1e-04)
\ No newline at end of file

From 17deb78d42aaaa8ffe890987b2f1772ae48dc1e2 Mon Sep 17 00:00:00 2001
From: Ilias A <iliasamri00@gmail.com>
Date: Fri, 27 Oct 2023 12:27:04 +0200
Subject: [PATCH 2/5] fix: assertion values.

---
 .../test_stable_diffusion_xl_adapter.py       | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
index 20b1bcd03be9..bd15635668cc 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
@@ -576,18 +576,19 @@ def tearDown(self):
 
     def test_canny(self):
         adapter = T2IAdapter.from_pretrained(
-            "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16"
-            )
+            "TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16
+        ).to("cpu")
         pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",adapter=adapter
+        'stabilityai/stable-diffusion-xl-base-1.0', adapter=adapter, torch_dtype=torch.float16, variant="fp16", 
         )
+        pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
         pipe.enable_sequential_cpu_offload()
         pipe.set_progress_bar_config(disable=None)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
         prompt = "toy"
         image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png"
+                    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
         )
 
         images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
@@ -595,30 +596,25 @@ def test_canny(self):
         assert images[0].shape == (768, 512, 3)
 
         original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4185, 0.4127, 0.4089, 0.4046, 0.4115, 0.4096, 0.4081, 0.4112, 0.3913])#TODO : Get real values
+        expected_image = np.array([0.5226907,  0.5272584,  0.52255356, 0.50762856, 0.50497484, 0.50656825, 0.5065469,  0.51809096, 0.48973307])
         assert np.allclose(original_image, expected_image, atol=1e-04)
 
 
     def test_canny_lora(self):
-        # load adapter
         adapter = T2IAdapter.from_pretrained(
-            "TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16"
-        )
-
-        # load euler_a scheduler
-        model_id = 'stabilityai/stable-diffusion-xl-base-1.0'
-        euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
-        vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+            "TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16
+        ).to("cpu")
         pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
-            model_id, vae=vae, adapter=adapter, scheduler=euler_a,
+        'stabilityai/stable-diffusion-xl-base-1.0', adapter=adapter, torch_dtype=torch.float16, variant="fp16", 
         )
         pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
         pipe.enable_sequential_cpu_offload()
+        pipe.set_progress_bar_config(disable=None)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
         prompt = "toy"
         image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/blob/main/t2i_adapter/toy_canny.png"
+                    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
         )
 
         images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
@@ -626,5 +622,6 @@ def test_canny_lora(self):
         assert images[0].shape == (768, 512, 3)
 
         original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])#TODO : Get real values
-        assert np.allclose(original_image, expected_image, atol=1e-04)
\ No newline at end of file
+        expected_image = np.array([0.50346327, 0.50708383, 0.50719553, 0.5135172,  0.5155377,  0.5066059, 0.49680984, 0.5005894,  0.48509413])
+        assert np.allclose(original_image, expected_image, atol=1e-04)
+

From 0528498e14179a0575467b3a1d74d7968b4d4380 Mon Sep 17 00:00:00 2001
From: ilisparrow <4880273+ilisparrow@users.noreply.github.com>
Date: Fri, 27 Oct 2023 15:46:45 +0200
Subject: [PATCH 3/5] Use numpy_cosine_similarity_distance on the arrays

Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
---
 .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
index bd15635668cc..90b3f44f0797 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
@@ -623,5 +623,5 @@ def test_canny_lora(self):
 
         original_image = images[0, -3:, -3:, -1].flatten()
         expected_image = np.array([0.50346327, 0.50708383, 0.50719553, 0.5135172,  0.5155377,  0.5066059, 0.49680984, 0.5005894,  0.48509413])
-        assert np.allclose(original_image, expected_image, atol=1e-04)
+        assert numpy_cosine_similarity_distance(original_image, expected_image) < 1e-4
 

From ec6bf81c98f396dcedd1d4d3fef21a36deb280f9 Mon Sep 17 00:00:00 2001
From: ilisparrow <4880273+ilisparrow@users.noreply.github.com>
Date: Fri, 27 Oct 2023 15:46:56 +0200
Subject: [PATCH 4/5] Use numpy_cosine_similarity_distance on the arrays

Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
---
 .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
index 90b3f44f0797..3fba58883f8b 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
@@ -596,7 +596,7 @@ def test_canny(self):
         assert images[0].shape == (768, 512, 3)
 
         original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.5226907,  0.5272584,  0.52255356, 0.50762856, 0.50497484, 0.50656825, 0.5065469,  0.51809096, 0.48973307])
+        assert numpy_cosine_similarity_distance(original_image, expected_image) < 1e-4
         assert np.allclose(original_image, expected_image, atol=1e-04)
 
 

From 8f0b5ffa01862be41312456cca68f08d9108010c Mon Sep 17 00:00:00 2001
From: ilisparrow <4880273+ilisparrow@users.noreply.github.com>
Date: Mon, 30 Oct 2023 15:19:53 +0100
Subject: [PATCH 5/5] Changed imports orders to pass tests

Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
---
 .../stable_diffusion_xl/test_stable_diffusion_xl_adapter.py  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
index 3fba58883f8b..10ff9ff36901 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py
@@ -35,8 +35,9 @@
 )
 from diffusers.utils import logging
 from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device
-from diffusers.utils import load_image, randn_tensor, torch_device
-from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow
+from diffusers.utils import load_image
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
 from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
 from ..test_pipelines_common import (
     PipelineTesterMixin,