From d54d6dbcd05ec2600819ed88f7c09136ad90c36d Mon Sep 17 00:00:00 2001
From: Dhruv Nair <dhruv.nair@gmail.com>
Date: Mon, 9 Oct 2023 16:52:08 +0000
Subject: [PATCH 1/3] move xformers to dedicated runner

---
 .github/workflows/build_docker_images.yml     |    1 +
 .github/workflows/push_tests.yml              |   40 +
 .../diffusers-pytorch-compile-cuda/Dockerfile |    3 +-
 docker/diffusers-pytorch-cuda/Dockerfile      |    7 +-
 .../Dockerfile                                |   47 +
 tests/lora/test_lora_layers_old_backend.py    |   56 +-
 tests/lora/test_lora_layers_peft.py           | 1296 +++++++++++++++--
 tests/models/test_modeling_common.py          |   31 +-
 8 files changed, 1360 insertions(+), 121 deletions(-)
 create mode 100644 docker/diffusers-pytorch-xformers-cuda/Dockerfile

diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml
index f8bd15c46cdd..937ad07496b9 100644
--- a/.github/workflows/build_docker_images.yml
+++ b/.github/workflows/build_docker_images.yml
@@ -27,6 +27,7 @@ jobs:
           - diffusers-pytorch-cpu
           - diffusers-pytorch-cuda
           - diffusers-pytorch-compile-cuda
+          - diffusers-pytorch-xformers-cuda
           - diffusers-flax-cpu
           - diffusers-flax-tpu
           - diffusers-onnxruntime-cpu
diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
index a15a5412c4e4..5fadd095be35 100644
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -290,6 +290,46 @@ jobs:
         name: torch_compile_test_reports
         path: reports
 
+  run_xformers_tests:
+    name: PyTorch xformers CUDA tests
+
+    runs-on: docker-gpu
+
+    container:
+      image: diffusers/diffusers-pytorch-xformers-cuda
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
+
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: NVIDIA-SMI
+      run: |
+        nvidia-smi
+    - name: Install dependencies
+      run: |
+        python -m pip install -e .[quality,test,training]
+    - name: Environment
+      run: |
+        python utils/print_env.py
+    - name: Run example tests on GPU
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: cat reports/tests_torch_xformers_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_xformers_test_reports
+        path: reports
+
   run_examples_tests:
     name: Examples PyTorch CUDA tests on Ubuntu
 
diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile
index a41be50f9d58..1f7fe063b70d 100644
--- a/docker/diffusers-pytorch-compile-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile
@@ -42,7 +42,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
         tensorboard \
         transformers \
         omegaconf \
-        pytorch-lightning \
-        xformers
+        pytorch-lightning
 
 CMD ["/bin/bash"]
diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile
index 4c447749da7b..b8e2af01f995 100644
--- a/docker/diffusers-pytorch-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-cuda/Dockerfile
@@ -25,8 +25,8 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3 -m pip install --no-cache-dir --upgrade pip && \
     python3 -m pip install --no-cache-dir \
-        torch==2.0.1 \
-        torchvision==0.15.2 \
+        torch \
+        torchvision \
         torchaudio \
         invisible_watermark && \
     python3 -m pip install --no-cache-dir \
@@ -41,7 +41,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
         tensorboard \
         transformers \
         omegaconf \
-        pytorch-lightning \
-        xformers
+        pytorch-lightning
 
 CMD ["/bin/bash"]
diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
new file mode 100644
index 000000000000..4c447749da7b
--- /dev/null
+++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
@@ -0,0 +1,47 @@
+FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
+LABEL maintainer="Hugging Face"
+LABEL repository="diffusers"
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt update && \
+    apt install -y bash \
+                   build-essential \
+                   git \
+                   git-lfs \
+                   curl \
+                   ca-certificates \
+                   libsndfile1-dev \
+                   libgl1 \
+                   python3.8 \
+                   python3-pip \
+                   python3.8-venv && \
+    rm -rf /var/lib/apt/lists
+
+# make sure to use venv
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
+RUN python3 -m pip install --no-cache-dir --upgrade pip && \
+    python3 -m pip install --no-cache-dir \
+        torch==2.0.1 \
+        torchvision==0.15.2 \
+        torchaudio \
+        invisible_watermark && \
+    python3 -m pip install --no-cache-dir \
+        accelerate \
+        datasets \
+        hf-doc-builder \
+        huggingface-hub \
+        Jinja2 \
+        librosa \
+        numpy \
+        scipy \
+        tensorboard \
+        transformers \
+        omegaconf \
+        pytorch-lightning \
+        xformers
+
+CMD ["/bin/bash"]
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index d616ef8c78b8..02353cdbbb4d 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -293,8 +293,11 @@ def create_lora_weight_file(self, tmpdirname):
         )
         self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
 
-    @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
-    def test_stable_diffusion_attn_processors(self):
+    @unittest.skipIf(
+        torch.cuda.is_available() != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
+    def test_stable_diffusion_xformers_attn_processors(self):
         # disable_full_determinism()
         device = "cuda"  # ensure determinism for the device-dependent torch.Generator
         components, _ = self.get_dummy_components()
@@ -304,12 +307,34 @@ def test_stable_diffusion_attn_processors(self):
 
         _, _, inputs = self.get_dummy_inputs()
 
-        # run normal sd pipe
+        # run xformers attention
+        sd_pipe.enable_xformers_memory_efficient_attention()
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
 
-        # run xformers attention
-        sd_pipe.enable_xformers_memory_efficient_attention()
+        # run lora xformers attention
+        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
+        attn_processors = {
+            k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim)
+            for k, v in attn_processors.items()
+        }
+        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
+        sd_pipe.unet.set_attn_processor(attn_processors)
+        image = sd_pipe(**inputs).images
+        assert image.shape == (1, 64, 64, 3)
+
+    @unittest.skipIf(not torch.cuda.is_available(), reason="Test needs to run on GPU")
+    def test_stable_diffusion_attn_processors(self):
+        # disable_full_determinism()
+        device = "cuda"  # ensure determinism for the device-dependent torch.Generator
+        components, _ = self.get_dummy_components()
+        sd_pipe = StableDiffusionPipeline(**components)
+        sd_pipe = sd_pipe.to(device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        _, _, inputs = self.get_dummy_inputs()
+
+        # run normal sd pipe
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
 
@@ -329,18 +354,6 @@ def test_stable_diffusion_attn_processors(self):
         sd_pipe.unet.set_attn_processor(attn_processors)
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
-
-        # run lora xformers attention
-        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
-        attn_processors = {
-            k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim)
-            for k, v in attn_processors.items()
-        }
-        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
-        sd_pipe.unet.set_attn_processor(attn_processors)
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
         # enable_full_determinism()
 
     def test_stable_diffusion_lora(self):
@@ -631,7 +644,10 @@ def test_lora_unet_attn_processors_with_xformers(self):
                 if isinstance(module, Attention):
                     self.assertIsInstance(module.processor, XFormersAttnProcessor)
 
-    @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
+    @unittest.skipIf(
+        torch.cuda.is_available() != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
     def test_lora_save_load_with_xformers(self):
         pipeline_components, lora_components = self.get_dummy_components()
         sd_pipe = StableDiffusionPipeline(**pipeline_components)
@@ -2209,7 +2225,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.enable_model_cpu_offload()
 
@@ -2223,7 +2239,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
 
         del pipe
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.fuse_lora()
         pipe.enable_model_cpu_offload()
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 1862437fce88..0b73a2551bc5 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -12,39 +12,65 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import os
 import tempfile
+import time
 import unittest
 
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from huggingface_hub.repocard import RepoCard
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
 
 from diffusers import (
     AutoencoderKL,
+    ControlNetModel,
     DDIMScheduler,
+    DiffusionPipeline,
     EulerDiscreteScheduler,
     StableDiffusionPipeline,
+    StableDiffusionXLControlNetPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
 )
 from diffusers.loaders import AttnProcsLayers
-from diffusers.models.attention_processor import (
-    LoRAAttnProcessor,
-    LoRAAttnProcessor2_0,
+from diffusers.models.attention_processor import LoRAAttnProcessor, LoRAAttnProcessor2_0
+from diffusers.utils.import_utils import is_accelerate_available, is_peft_available
+from diffusers.utils.testing_utils import (
+    floats_tensor,
+    load_image,
+    nightly,
+    require_peft_backend,
+    require_torch_gpu,
+    slow,
+    torch_device,
 )
-from diffusers.utils.import_utils import is_peft_available
-from diffusers.utils.testing_utils import floats_tensor, require_peft_backend, require_torch_gpu, slow
 
 
+if is_accelerate_available():
+    from accelerate.utils import release_memory
+
 if is_peft_available():
     from peft import LoraConfig
     from peft.tuners.tuners_utils import BaseTunerLayer
     from peft.utils import get_peft_model_state_dict
 
 
+def state_dicts_almost_equal(sd1, sd2):
+    sd1 = dict(sorted(sd1.items()))
+    sd2 = dict(sorted(sd2.items()))
+
+    models_are_equal = True
+    for ten1, ten2 in zip(sd1.values(), sd2.values()):
+        if (ten1 - ten2).abs().max() > 1e-3:
+            models_are_equal = False
+
+    return models_are_equal
+
+
 def create_unet_lora_layers(unet: nn.Module):
     lora_attn_procs = {}
     for name in unet.attn_processors.keys():
@@ -94,6 +120,10 @@ def get_dummy_components(self):
             r=4, lora_alpha=4, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"], init_lora_weights=False
         )
 
+        unet_lora_config = LoraConfig(
+            r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False
+        )
+
         unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet)
 
         if self.has_two_text_encoders:
@@ -120,7 +150,7 @@ def get_dummy_components(self):
             "unet_lora_layers": unet_lora_layers,
             "unet_lora_attn_procs": unet_lora_attn_procs,
         }
-        return pipeline_components, lora_components, text_lora_config
+        return pipeline_components, lora_components, text_lora_config, unet_lora_config
 
     def get_dummy_inputs(self, with_generator=True):
         batch_size = 1
@@ -166,7 +196,7 @@ def test_simple_inference(self):
         """
         Tests a simple inference and makes sure it works as expected
         """
-        components, _, _ = self.get_dummy_components()
+        components, _, _, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -180,7 +210,7 @@ def test_simple_inference_with_text_lora(self):
         Tests a simple inference with lora attached on the text encoder
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -208,7 +238,7 @@ def test_simple_inference_with_text_lora_and_scale(self):
         Tests a simple inference with lora attached on the text encoder + scale argument
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -252,7 +282,7 @@ def test_simple_inference_with_text_lora_fused(self):
         Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -289,7 +319,7 @@ def test_simple_inference_with_text_lora_unloaded(self):
         Tests a simple inference with lora attached to text encoder, then unloads the lora weights
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -327,7 +357,7 @@ def test_simple_inference_with_text_lora_save_load(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA.
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -387,7 +417,7 @@ def test_simple_inference_save_pretrained(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -431,109 +461,589 @@ def test_simple_inference_save_pretrained(self):
             "Loading from saved checkpoints should give same results.",
         )
 
+    def test_simple_inference_with_text_unet_lora_save_load(self):
+        """
+        Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
-    pipeline_class = StableDiffusionPipeline
-    scheduler_cls = DDIMScheduler
-    scheduler_kwargs = {
-        "beta_start": 0.00085,
-        "beta_end": 0.012,
-        "beta_schedule": "scaled_linear",
-        "clip_sample": False,
-        "set_alpha_to_one": False,
-        "steps_offset": 1,
-    }
-    unet_kwargs = {
-        "block_out_channels": (32, 64),
-        "layers_per_block": 2,
-        "sample_size": 32,
-        "in_channels": 4,
-        "out_channels": 4,
-        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
-        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
-        "cross_attention_dim": 32,
-    }
-    vae_kwargs = {
-        "block_out_channels": [32, 64],
-        "in_channels": 3,
-        "out_channels": 3,
-        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
-        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
-        "latent_channels": 4,
-    }
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
 
-    @slow
-    @require_torch_gpu
-    def test_integration_logits_with_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
-        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
 
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
-        pipe.load_lora_weights(lora_id)
-        pipe = pipe.to("cuda")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder)
+            unet_state_dict = get_peft_model_state_dict(pipe.unet)
+            if self.has_two_text_encoders:
+                text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
+
+                self.pipeline_class.save_lora_weights(
+                    save_directory=tmpdirname,
+                    text_encoder_lora_layers=text_encoder_state_dict,
+                    text_encoder_2_lora_layers=text_encoder_2_state_dict,
+                    unet_lora_layers=unet_state_dict,
+                    safe_serialization=False,
+                )
+            else:
+                self.pipeline_class.save_lora_weights(
+                    save_directory=tmpdirname,
+                    text_encoder_lora_layers=text_encoder_state_dict,
+                    unet_lora_layers=unet_state_dict,
+                    safe_serialization=False,
+                )
+
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
+            pipe.unload_lora_weights()
+
+            pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
+
+        images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
 
         self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder),
-            "Lora not correctly set in text encoder 2",
+            np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
+            "Loading from saved checkpoints should give same results.",
         )
 
-        prompt = "a red sks dog"
+    def test_simple_inference_with_text_unet_lora_and_scale(self):
+        """
+        Tests a simple inference with lora attached on the text encoder + Unet + scale argument
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-        images = pipe(
-            prompt=prompt,
-            num_inference_steps=15,
-            cross_attention_kwargs={"scale": 0.5},
-            generator=torch.manual_seed(0),
-            output_type="np",
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
+        )
+
+        output_lora_scale = pipe(
+            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
         ).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
+            "Lora + scale should change the output",
+        )
 
-        expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
+        output_lora_0_scale = pipe(
+            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0}
+        ).images
+        self.assertTrue(
+            np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
+            "Lora + 0 scale should lead to same result as no LoRA",
+        )
 
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        self.assertTrue(
+            pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0,
+            "The scaling parameter has not been correctly restored!",
+        )
 
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+    def test_simple_inference_with_text_lora_unet_fused(self):
+        """
+        Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
+        and makes sure it works as expected - with unet
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-    @slow
-    @require_torch_gpu
-    def test_integration_logits_no_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
-        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
 
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
-        pipe.load_lora_weights(lora_id)
-        pipe = pipe.to("cuda")
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.fuse_lora()
+        # Fusing should still keep the LoRA layers
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
+
+        if self.has_two_text_encoders:
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertFalse(
+            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+    def test_simple_inference_with_text_unet_lora_unloaded(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.unload_lora_weights()
+        # unloading should remove the LoRA layers
+        self.assertFalse(
+            self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
+        )
+        self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet")
+
+        if self.has_two_text_encoders:
+            self.assertFalse(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2"
+            )
 
+        ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images
         self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder),
-            "Lora not correctly set in text encoder",
+            np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
         )
 
-        prompt = "a red sks dog"
+    def test_simple_inference_with_text_unet_lora_unfused(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-        images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
 
-        expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
 
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
 
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+        pipe.fuse_lora()
 
+        output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
 
-class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
-    has_two_text_encoders = True
-    pipeline_class = StableDiffusionXLPipeline
-    scheduler_cls = EulerDiscreteScheduler
-    scheduler_kwargs = {
-        "beta_start": 0.00085,
-        "beta_end": 0.012,
-        "beta_schedule": "scaled_linear",
-        "timestep_spacing": "leading",
-        "steps_offset": 1,
-    }
-    unet_kwargs = {
-        "block_out_channels": (32, 64),
-        "layers_per_block": 2,
-        "sample_size": 32,
+        pipe.unfuse_lora()
+
+        output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        # unloading should remove the LoRA layers
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
+
+        if self.has_two_text_encoders:
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers"
+            )
+
+        # Fuse and unfuse should lead to the same results
+        self.assertTrue(
+            np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3),
+            "Fused lora should change the output",
+        )
+
+    def test_simple_inference_with_text_unet_multi_adapter(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, attaches
+        multiple adapters and set them
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+
+        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.set_adapters("adapter-1")
+
+        output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        pipe.set_adapters("adapter-2")
+        output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        pipe.set_adapters(["adapter-1", "adapter-2"])
+
+        output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        # Fuse and unfuse should lead to the same results
+        self.assertFalse(
+            np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
+            "Adapter 1 and 2 should give different results",
+        )
+
+        self.assertFalse(
+            np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+            "Adapter 1 and mixed adapters should give different results",
+        )
+
+        self.assertFalse(
+            np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+            "Adapter 2 and mixed adapters should give different results",
+        )
+
+        pipe.disable_lora()
+
+        output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        self.assertTrue(
+            np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+            "output with no lora and output with lora disabled should give same results",
+        )
+
+    @unittest.skip("This is failing for now - need to investigate")
+    def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+        pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
+
+        # Just makes sure it works..
+        _ = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+
+class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
+    pipeline_class = StableDiffusionPipeline
+    scheduler_cls = DDIMScheduler
+    scheduler_kwargs = {
+        "beta_start": 0.00085,
+        "beta_end": 0.012,
+        "beta_schedule": "scaled_linear",
+        "clip_sample": False,
+        "set_alpha_to_one": False,
+        "steps_offset": 1,
+    }
+    unet_kwargs = {
+        "block_out_channels": (32, 64),
+        "layers_per_block": 2,
+        "sample_size": 32,
+        "in_channels": 4,
+        "out_channels": 4,
+        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
+        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
+        "cross_attention_dim": 32,
+    }
+    vae_kwargs = {
+        "block_out_channels": [32, 64],
+        "in_channels": 3,
+        "out_channels": 3,
+        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
+        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
+        "latent_channels": 4,
+    }
+
+    @slow
+    @require_torch_gpu
+    def test_integration_move_lora_cpu(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
+        pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
+        pipe = pipe.to("cuda")
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder",
+        )
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.unet),
+            "Lora not correctly set in text encoder",
+        )
+
+        pipe.set_lora_device(["adapter-1"], "cpu")
+
+        for name, module in pipe.unet.named_modules():
+            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                self.assertTrue(module.weight.device == torch.device("cpu"))
+            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                # import pdb; pdb.set_trace()
+                self.assertTrue(module.weight.device != torch.device("cpu"))
+
+        for name, module in pipe.text_encoder.named_modules():
+            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                self.assertTrue(module.weight.device == torch.device("cpu"))
+            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                # import pdb; pdb.set_trace()
+                self.assertTrue(module.weight.device != torch.device("cpu"))
+
+        pipe.set_lora_device(["adapter-1"], 0)
+
+        for n, m in pipe.unet.named_modules():
+            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        for n, m in pipe.text_encoder.named_modules():
+            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda")
+
+        for n, m in pipe.unet.named_modules():
+            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        for n, m in pipe.text_encoder.named_modules():
+            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+    @slow
+    @require_torch_gpu
+    def test_integration_logits_with_scale(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id)
+        pipe = pipe.to("cuda")
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder 2",
+        )
+
+        prompt = "a red sks dog"
+
+        images = pipe(
+            prompt=prompt,
+            num_inference_steps=15,
+            cross_attention_kwargs={"scale": 0.5},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
+
+        expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
+
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+    @slow
+    @require_torch_gpu
+    def test_integration_logits_no_scale(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id)
+        pipe = pipe.to("cuda")
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder",
+        )
+
+        prompt = "a red sks dog"
+
+        images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
+
+        expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
+
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+    @nightly
+    @require_torch_gpu
+    def test_integration_logits_multi_adapter(self):
+        path = "stabilityai/stable-diffusion-xl-base-1.0"
+        lora_id = "CiroN2022/toy-face"
+
+        pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
+        pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
+        pipe = pipe.to("cuda")
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.unet),
+            "Lora not correctly set in Unet",
+        )
+
+        prompt = "toy_face of a hacker with a hoodie"
+
+        lora_scale = 0.9
+
+        images = pipe(
+            prompt=prompt,
+            num_inference_steps=30,
+            generator=torch.manual_seed(0),
+            cross_attention_kwargs={"scale": lora_scale},
+            output_type="np",
+        ).images
+        expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
+
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        # import pdb; pdb.set_trace()
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
+        pipe.set_adapters("pixel")
+
+        prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": lora_scale},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
+
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array(
+            [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
+        )
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+        # multi-adapter inference
+        pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0])
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": 1.0},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998])
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+        # Lora disabled
+        pipe.disable_lora()
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": lora_scale},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493])
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+
+class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
+    has_two_text_encoders = True
+    pipeline_class = StableDiffusionXLPipeline
+    scheduler_cls = EulerDiscreteScheduler
+    scheduler_kwargs = {
+        "beta_start": 0.00085,
+        "beta_end": 0.012,
+        "beta_schedule": "scaled_linear",
+        "timestep_spacing": "leading",
+        "steps_offset": 1,
+    }
+    unet_kwargs = {
+        "block_out_channels": (32, 64),
+        "layers_per_block": 2,
+        "sample_size": 32,
         "in_channels": 4,
         "out_channels": 4,
         "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
@@ -555,3 +1065,605 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
         "latent_channels": 4,
         "sample_size": 128,
     }
+
+
+@slow
+@require_torch_gpu
+class LoraIntegrationTests(unittest.TestCase):
+    def tearDown(self):
+        import gc
+
+        gc.collect()
+        torch.cuda.empty_cache()
+        gc.collect()
+
+    def test_dreambooth_old_format(self):
+        generator = torch.Generator("cpu").manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe(
+            "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_dreambooth_text_encoder_new_format(self):
+        generator = torch.Generator().manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/lora-trained"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_a1111(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
+            torch_device
+        )
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_lycoris(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
+        ).to(torch_device)
+        lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
+        lora_filename = "edgLycorisMugler-light.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_a1111_with_model_cpu_offload(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_a1111_with_sequential_cpu_offload(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
+        pipe.enable_sequential_cpu_offload()
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_kohya_sd_v15_with_higher_dimensions(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        lora_model_id = "hf-internal-testing/urushisato-lora"
+        lora_filename = "urushisato_v15.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_vanilla_funetuning(self):
+        generator = torch.Generator().manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_unload_kohya_lora(self):
+        generator = torch.manual_seed(0)
+        prompt = "masterpiece, best quality, mountain"
+        num_inference_steps = 2
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        initial_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        initial_images = initial_images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
+        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images = lora_images[0, -3:, -3:, -1].flatten()
+
+        pipe.unload_lora_weights()
+        generator = torch.manual_seed(0)
+        unloaded_lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
+
+        self.assertFalse(np.allclose(initial_images, lora_images))
+        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
+        release_memory(pipe)
+
+    def test_load_unload_load_kohya_lora(self):
+        # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
+        # without introducing any side-effects. Even though the test uses a Kohya-style
+        # LoRA, the underlying adapter handling mechanism is format-agnostic.
+        generator = torch.manual_seed(0)
+        prompt = "masterpiece, best quality, mountain"
+        num_inference_steps = 2
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        initial_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        initial_images = initial_images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
+        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images = lora_images[0, -3:, -3:, -1].flatten()
+
+        pipe.unload_lora_weights()
+        generator = torch.manual_seed(0)
+        unloaded_lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
+
+        self.assertFalse(np.allclose(initial_images, lora_images))
+        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
+
+        # make sure we can load a LoRA again after unloading and they don't have
+        # any undesired effects.
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images_again = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
+        release_memory(pipe)
+
+
+@slow
+@require_torch_gpu
+class LoraSDXLIntegrationTests(unittest.TestCase):
+    def tearDown(self):
+        import gc
+
+        gc.collect()
+        torch.cuda.empty_cache()
+        gc.collect()
+
+    def test_sdxl_0_9_lora_one(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
+        lora_filename = "daiton-xl-lora-test.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_0_9_lora_two(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
+        lora_filename = "saijo.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_0_9_lora_three(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
+        lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
+
+        self.assertTrue(np.allclose(images, expected, atol=5e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_fusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        pipe.fuse_lora()
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_unfusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.fuse_lora()
+
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_with_fusion = images[0, -3:, -3:, -1].flatten()
+
+        pipe.unfuse_lora()
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_unfusion_effectivity(self):
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        original_image_slice = images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.fuse_lora()
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+
+        generator = torch.Generator().manual_seed(0)
+        _ = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        pipe.unfuse_lora()
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_fusion_efficiency(self):
+        generator = torch.Generator().manual_seed(0)
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
+        pipe.enable_model_cpu_offload()
+
+        start_time = time.time()
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_non_fusion = end_time - start_time
+
+        del pipe
+
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
+        pipe.fuse_lora()
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+
+        pipe.enable_model_cpu_offload()
+
+        start_time = time.time()
+        generator = torch.Generator().manual_seed(0)
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_fusion = end_time - start_time
+
+        self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
+        release_memory(pipe)
+
+    def test_sdxl_1_0_last_ben(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "TheLastBen/Papercut_SDXL"
+        lora_filename = "papercut.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_fuse_unfuse_all(self):
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
+        text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
+        unet_sd = copy.deepcopy(pipe.unet.state_dict())
+
+        pipe.load_lora_weights(
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
+        )
+
+        fused_te_state_dict = pipe.text_encoder.state_dict()
+        fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
+        unet_state_dict = pipe.unet.state_dict()
+
+        for key, value in text_encoder_1_sd.items():
+            self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
+
+        for key, value in text_encoder_2_sd.items():
+            self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
+
+        for key, value in unet_state_dict.items():
+            self.assertTrue(torch.allclose(unet_state_dict[key], value))
+
+        pipe.fuse_lora()
+        pipe.unload_lora_weights()
+
+        assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
+        assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
+        assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_sequential_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_canny_lora(self):
+        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
+
+        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
+        )
+        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
+        pipe.enable_sequential_cpu_offload()
+
+        generator = torch.Generator(device="cpu").manual_seed(0)
+        prompt = "corgi"
+        image = load_image(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
+        )
+
+        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
+
+        assert images[0].shape == (768, 512, 3)
+
+        original_image = images[0, -3:, -3:, -1].flatten()
+        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
+        assert np.allclose(original_image, expected_image, atol=1e-04)
+        release_memory(pipe)
+
+    @nightly
+    def test_sequential_fuse_unfuse(self):
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+
+        # 1. round
+        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.fuse_lora()
+
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        image_slice = images[0, -3:, -3:, -1].flatten()
+
+        pipe.unfuse_lora()
+
+        # 2. round
+        pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
+        pipe.fuse_lora()
+        pipe.unfuse_lora()
+
+        # 3. round
+        pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
+        pipe.fuse_lora()
+        pipe.unfuse_lora()
+
+        # 4. back to 1st round
+        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.fuse_lora()
+
+        generator = torch.Generator().manual_seed(0)
+        images_2 = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
+        release_memory(pipe)
diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
index 8b6d279bbe6d..80c97978723c 100644
--- a/tests/models/test_modeling_common.py
+++ b/tests/models/test_modeling_common.py
@@ -30,7 +30,7 @@
 from diffusers.models import UNet2DConditionModel
 from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor
 from diffusers.training_utils import EMAModel
-from diffusers.utils import logging
+from diffusers.utils import is_xformers_available, logging
 from diffusers.utils.testing_utils import (
     CaptureLogger,
     require_python39_or_higher,
@@ -269,6 +269,32 @@ def test_getattr_is_correct(self):
 
         assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'"
 
+    @unittest.skipIf(
+        torch_device != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
+    def test_set_xformers_attn_processor_for_determinism(self):
+        torch.use_deterministic_algorithms(False)
+        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
+        model = self.model_class(**init_dict)
+        model.to(torch_device)
+
+        if not hasattr(model, "set_attn_processor"):
+            # If not has `set_attn_processor`, skip test
+            return
+
+        model.set_default_attn_processor()
+        assert all(type(proc) == AttnProcessor for proc in model.attn_processors.values())
+        with torch.no_grad():
+            output = model(**inputs_dict)[0]
+
+        model.enable_xformers_memory_efficient_attention()
+        assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
+        with torch.no_grad():
+            output_2 = model(**inputs_dict)[0]
+
+        assert torch.allclose(output, output_2, atol=self.base_precision)
+
     @require_torch_gpu
     def test_set_attn_processor_for_determinism(self):
         torch.use_deterministic_algorithms(False)
@@ -292,7 +318,7 @@ def test_set_attn_processor_for_determinism(self):
         model.enable_xformers_memory_efficient_attention()
         assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
         with torch.no_grad():
-            output_3 = model(**inputs_dict)[0]
+            model(**inputs_dict)[0]
 
         model.set_attn_processor(AttnProcessor2_0())
         assert all(type(proc) == AttnProcessor2_0 for proc in model.attn_processors.values())
@@ -313,7 +339,6 @@ def test_set_attn_processor_for_determinism(self):
 
         # make sure that outputs match
         assert torch.allclose(output_2, output_1, atol=self.base_precision)
-        assert torch.allclose(output_2, output_3, atol=self.base_precision)
         assert torch.allclose(output_2, output_4, atol=self.base_precision)
         assert torch.allclose(output_2, output_5, atol=self.base_precision)
         assert torch.allclose(output_2, output_6, atol=self.base_precision)

From 4a484f80ba58c7a7ed4fcf044c780e1746117520 Mon Sep 17 00:00:00 2001
From: Dhruv Nair <dhruv.nair@gmail.com>
Date: Mon, 9 Oct 2023 17:42:18 +0000
Subject: [PATCH 2/3] fix

---
 tests/lora/test_lora_layers_old_backend.py |   39 +-
 tests/lora/test_lora_layers_peft.py        | 1266 ++------------------
 2 files changed, 94 insertions(+), 1211 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 02353cdbbb4d..893429670daa 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -293,10 +293,7 @@ def create_lora_weight_file(self, tmpdirname):
         )
         self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
 
-    @unittest.skipIf(
-        torch.cuda.is_available() != "cuda" or not is_xformers_available(),
-        reason="XFormers attention is only available with CUDA and `xformers` installed",
-    )
+    @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda")
     def test_stable_diffusion_xformers_attn_processors(self):
         # disable_full_determinism()
         device = "cuda"  # ensure determinism for the device-dependent torch.Generator
@@ -312,18 +309,7 @@ def test_stable_diffusion_xformers_attn_processors(self):
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
 
-        # run lora xformers attention
-        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
-        attn_processors = {
-            k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim)
-            for k, v in attn_processors.items()
-        }
-        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
-        sd_pipe.unet.set_attn_processor(attn_processors)
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-    @unittest.skipIf(not torch.cuda.is_available(), reason="Test needs to run on GPU")
+    @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
     def test_stable_diffusion_attn_processors(self):
         # disable_full_determinism()
         device = "cuda"  # ensure determinism for the device-dependent torch.Generator
@@ -354,6 +340,18 @@ def test_stable_diffusion_attn_processors(self):
         sd_pipe.unet.set_attn_processor(attn_processors)
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
+
+        # run lora xformers attention
+        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
+        attn_processors = {
+            k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim)
+            for k, v in attn_processors.items()
+        }
+        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
+        sd_pipe.unet.set_attn_processor(attn_processors)
+        image = sd_pipe(**inputs).images
+        assert image.shape == (1, 64, 64, 3)
+
         # enable_full_determinism()
 
     def test_stable_diffusion_lora(self):
@@ -644,10 +642,7 @@ def test_lora_unet_attn_processors_with_xformers(self):
                 if isinstance(module, Attention):
                     self.assertIsInstance(module.processor, XFormersAttnProcessor)
 
-    @unittest.skipIf(
-        torch.cuda.is_available() != "cuda" or not is_xformers_available(),
-        reason="XFormers attention is only available with CUDA and `xformers` installed",
-    )
+    @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
     def test_lora_save_load_with_xformers(self):
         pipeline_components, lora_components = self.get_dummy_components()
         sd_pipe = StableDiffusionPipeline(**pipeline_components)
@@ -2225,7 +2220,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.enable_model_cpu_offload()
 
@@ -2239,7 +2234,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
 
         del pipe
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.fuse_lora()
         pipe.enable_model_cpu_offload()
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 0b73a2551bc5..1862437fce88 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -12,65 +12,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import copy
 import os
 import tempfile
-import time
 import unittest
 
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from huggingface_hub.repocard import RepoCard
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
 
 from diffusers import (
     AutoencoderKL,
-    ControlNetModel,
     DDIMScheduler,
-    DiffusionPipeline,
     EulerDiscreteScheduler,
     StableDiffusionPipeline,
-    StableDiffusionXLControlNetPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
 )
 from diffusers.loaders import AttnProcsLayers
-from diffusers.models.attention_processor import LoRAAttnProcessor, LoRAAttnProcessor2_0
-from diffusers.utils.import_utils import is_accelerate_available, is_peft_available
-from diffusers.utils.testing_utils import (
-    floats_tensor,
-    load_image,
-    nightly,
-    require_peft_backend,
-    require_torch_gpu,
-    slow,
-    torch_device,
+from diffusers.models.attention_processor import (
+    LoRAAttnProcessor,
+    LoRAAttnProcessor2_0,
 )
+from diffusers.utils.import_utils import is_peft_available
+from diffusers.utils.testing_utils import floats_tensor, require_peft_backend, require_torch_gpu, slow
 
 
-if is_accelerate_available():
-    from accelerate.utils import release_memory
-
 if is_peft_available():
     from peft import LoraConfig
     from peft.tuners.tuners_utils import BaseTunerLayer
     from peft.utils import get_peft_model_state_dict
 
 
-def state_dicts_almost_equal(sd1, sd2):
-    sd1 = dict(sorted(sd1.items()))
-    sd2 = dict(sorted(sd2.items()))
-
-    models_are_equal = True
-    for ten1, ten2 in zip(sd1.values(), sd2.values()):
-        if (ten1 - ten2).abs().max() > 1e-3:
-            models_are_equal = False
-
-    return models_are_equal
-
-
 def create_unet_lora_layers(unet: nn.Module):
     lora_attn_procs = {}
     for name in unet.attn_processors.keys():
@@ -120,10 +94,6 @@ def get_dummy_components(self):
             r=4, lora_alpha=4, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"], init_lora_weights=False
         )
 
-        unet_lora_config = LoraConfig(
-            r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False
-        )
-
         unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet)
 
         if self.has_two_text_encoders:
@@ -150,7 +120,7 @@ def get_dummy_components(self):
             "unet_lora_layers": unet_lora_layers,
             "unet_lora_attn_procs": unet_lora_attn_procs,
         }
-        return pipeline_components, lora_components, text_lora_config, unet_lora_config
+        return pipeline_components, lora_components, text_lora_config
 
     def get_dummy_inputs(self, with_generator=True):
         batch_size = 1
@@ -196,7 +166,7 @@ def test_simple_inference(self):
         """
         Tests a simple inference and makes sure it works as expected
         """
-        components, _, _, _ = self.get_dummy_components()
+        components, _, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -210,7 +180,7 @@ def test_simple_inference_with_text_lora(self):
         Tests a simple inference with lora attached on the text encoder
         and makes sure it works as expected
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -238,7 +208,7 @@ def test_simple_inference_with_text_lora_and_scale(self):
         Tests a simple inference with lora attached on the text encoder + scale argument
         and makes sure it works as expected
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -282,7 +252,7 @@ def test_simple_inference_with_text_lora_fused(self):
         Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
         and makes sure it works as expected
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -319,7 +289,7 @@ def test_simple_inference_with_text_lora_unloaded(self):
         Tests a simple inference with lora attached to text encoder, then unloads the lora weights
         and makes sure it works as expected
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -357,7 +327,7 @@ def test_simple_inference_with_text_lora_save_load(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA.
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -417,7 +387,7 @@ def test_simple_inference_save_pretrained(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained
         """
-        components, _, text_lora_config, _ = self.get_dummy_components()
+        components, _, text_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -461,572 +431,92 @@ def test_simple_inference_save_pretrained(self):
             "Loading from saved checkpoints should give same results.",
         )
 
-    def test_simple_inference_with_text_unet_lora_save_load(self):
-        """
-        Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder)
-            unet_state_dict = get_peft_model_state_dict(pipe.unet)
-            if self.has_two_text_encoders:
-                text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
-
-                self.pipeline_class.save_lora_weights(
-                    save_directory=tmpdirname,
-                    text_encoder_lora_layers=text_encoder_state_dict,
-                    text_encoder_2_lora_layers=text_encoder_2_state_dict,
-                    unet_lora_layers=unet_state_dict,
-                    safe_serialization=False,
-                )
-            else:
-                self.pipeline_class.save_lora_weights(
-                    save_directory=tmpdirname,
-                    text_encoder_lora_layers=text_encoder_state_dict,
-                    unet_lora_layers=unet_state_dict,
-                    safe_serialization=False,
-                )
-
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
-            pipe.unload_lora_weights()
-
-            pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
-
-        images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
-        if self.has_two_text_encoders:
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        self.assertTrue(
-            np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
-            "Loading from saved checkpoints should give same results.",
-        )
-
-    def test_simple_inference_with_text_unet_lora_and_scale(self):
-        """
-        Tests a simple inference with lora attached on the text encoder + Unet + scale argument
-        and makes sure it works as expected
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
+    pipeline_class = StableDiffusionPipeline
+    scheduler_cls = DDIMScheduler
+    scheduler_kwargs = {
+        "beta_start": 0.00085,
+        "beta_end": 0.012,
+        "beta_schedule": "scaled_linear",
+        "clip_sample": False,
+        "set_alpha_to_one": False,
+        "steps_offset": 1,
+    }
+    unet_kwargs = {
+        "block_out_channels": (32, 64),
+        "layers_per_block": 2,
+        "sample_size": 32,
+        "in_channels": 4,
+        "out_channels": 4,
+        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
+        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
+        "cross_attention_dim": 32,
+    }
+    vae_kwargs = {
+        "block_out_channels": [32, 64],
+        "in_channels": 3,
+        "out_channels": 3,
+        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
+        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
+        "latent_channels": 4,
+    }
 
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+    @slow
+    @require_torch_gpu
+    def test_integration_logits_with_scale(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id)
+        pipe = pipe.to("cuda")
 
-        output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
         self.assertTrue(
-            not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder 2",
         )
 
-        output_lora_scale = pipe(
-            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
-        ).images
-        self.assertTrue(
-            not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
-            "Lora + scale should change the output",
-        )
+        prompt = "a red sks dog"
 
-        output_lora_0_scale = pipe(
-            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0}
+        images = pipe(
+            prompt=prompt,
+            num_inference_steps=15,
+            cross_attention_kwargs={"scale": 0.5},
+            generator=torch.manual_seed(0),
+            output_type="np",
         ).images
-        self.assertTrue(
-            np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
-            "Lora + 0 scale should lead to same result as no LoRA",
-        )
-
-        self.assertTrue(
-            pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0,
-            "The scaling parameter has not been correctly restored!",
-        )
-
-    def test_simple_inference_with_text_lora_unet_fused(self):
-        """
-        Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
-        and makes sure it works as expected - with unet
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
 
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        pipe.fuse_lora()
-        # Fusing should still keep the LoRA layers
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
-
-        if self.has_two_text_encoders:
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertFalse(
-            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
-        )
-
-    def test_simple_inference_with_text_unet_lora_unloaded(self):
-        """
-        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
-        and makes sure it works as expected
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+        expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
 
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
 
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
-        pipe.unload_lora_weights()
-        # unloading should remove the LoRA layers
-        self.assertFalse(
-            self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
-        )
-        self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet")
+    @slow
+    @require_torch_gpu
+    def test_integration_logits_no_scale(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
-        if self.has_two_text_encoders:
-            self.assertFalse(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2"
-            )
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id)
+        pipe = pipe.to("cuda")
 
-        ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images
         self.assertTrue(
-            np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder",
         )
 
-    def test_simple_inference_with_text_unet_lora_unfused(self):
-        """
-        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
-        and makes sure it works as expected
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        pipe.fuse_lora()
+        prompt = "a red sks dog"
 
-        output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
 
-        pipe.unfuse_lora()
+        expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
 
-        output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-        # unloading should remove the LoRA layers
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
 
-        if self.has_two_text_encoders:
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers"
-            )
-
-        # Fuse and unfuse should lead to the same results
-        self.assertTrue(
-            np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3),
-            "Fused lora should change the output",
-        )
-
-    def test_simple_inference_with_text_unet_multi_adapter(self):
-        """
-        Tests a simple inference with lora attached to text encoder and unet, attaches
-        multiple adapters and set them
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
-        pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
-        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-        pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
-            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        pipe.set_adapters("adapter-1")
-
-        output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        pipe.set_adapters("adapter-2")
-        output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        pipe.set_adapters(["adapter-1", "adapter-2"])
-
-        output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        # Fuse and unfuse should lead to the same results
-        self.assertFalse(
-            np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
-            "Adapter 1 and 2 should give different results",
-        )
-
-        self.assertFalse(
-            np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
-            "Adapter 1 and mixed adapters should give different results",
-        )
-
-        self.assertFalse(
-            np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
-            "Adapter 2 and mixed adapters should give different results",
-        )
-
-        pipe.disable_lora()
-
-        output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-        self.assertTrue(
-            np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
-            "output with no lora and output with lora disabled should give same results",
-        )
-
-    @unittest.skip("This is failing for now - need to investigate")
-    def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
-        """
-        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
-        and makes sure it works as expected
-        """
-        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(self.torch_device)
-        pipe.set_progress_bar_config(disable=None)
-        _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
-        pipe.text_encoder.add_adapter(text_lora_config)
-        pipe.unet.add_adapter(unet_lora_config)
-
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
-        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2.add_adapter(text_lora_config)
-            self.assertTrue(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
-            )
-
-        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
-        pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
-
-        if self.has_two_text_encoders:
-            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
-
-        # Just makes sure it works..
-        _ = pipe(**inputs, generator=torch.manual_seed(0)).images
-
-
-class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
-    pipeline_class = StableDiffusionPipeline
-    scheduler_cls = DDIMScheduler
-    scheduler_kwargs = {
-        "beta_start": 0.00085,
-        "beta_end": 0.012,
-        "beta_schedule": "scaled_linear",
-        "clip_sample": False,
-        "set_alpha_to_one": False,
-        "steps_offset": 1,
-    }
-    unet_kwargs = {
-        "block_out_channels": (32, 64),
-        "layers_per_block": 2,
-        "sample_size": 32,
-        "in_channels": 4,
-        "out_channels": 4,
-        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
-        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
-        "cross_attention_dim": 32,
-    }
-    vae_kwargs = {
-        "block_out_channels": [32, 64],
-        "in_channels": 3,
-        "out_channels": 3,
-        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
-        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
-        "latent_channels": 4,
-    }
-
-    @slow
-    @require_torch_gpu
-    def test_integration_move_lora_cpu(self):
-        path = "runwayml/stable-diffusion-v1-5"
-        lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
-        pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
-        pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
-        pipe = pipe.to("cuda")
-
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder),
-            "Lora not correctly set in text encoder",
-        )
-
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.unet),
-            "Lora not correctly set in text encoder",
-        )
-
-        pipe.set_lora_device(["adapter-1"], "cpu")
-
-        for name, module in pipe.unet.named_modules():
-            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                self.assertTrue(module.weight.device == torch.device("cpu"))
-            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                # import pdb; pdb.set_trace()
-                self.assertTrue(module.weight.device != torch.device("cpu"))
-
-        for name, module in pipe.text_encoder.named_modules():
-            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                self.assertTrue(module.weight.device == torch.device("cpu"))
-            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                # import pdb; pdb.set_trace()
-                self.assertTrue(module.weight.device != torch.device("cpu"))
-
-        pipe.set_lora_device(["adapter-1"], 0)
-
-        for n, m in pipe.unet.named_modules():
-            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
-                self.assertTrue(m.weight.device != torch.device("cpu"))
-
-        for n, m in pipe.text_encoder.named_modules():
-            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
-                self.assertTrue(m.weight.device != torch.device("cpu"))
-
-        pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda")
-
-        for n, m in pipe.unet.named_modules():
-            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
-                self.assertTrue(m.weight.device != torch.device("cpu"))
-
-        for n, m in pipe.text_encoder.named_modules():
-            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
-                self.assertTrue(m.weight.device != torch.device("cpu"))
-
-    @slow
-    @require_torch_gpu
-    def test_integration_logits_with_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
-        lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
-        pipe.load_lora_weights(lora_id)
-        pipe = pipe.to("cuda")
-
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder),
-            "Lora not correctly set in text encoder 2",
-        )
-
-        prompt = "a red sks dog"
-
-        images = pipe(
-            prompt=prompt,
-            num_inference_steps=15,
-            cross_attention_kwargs={"scale": 0.5},
-            generator=torch.manual_seed(0),
-            output_type="np",
-        ).images
-
-        expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
-
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-    @slow
-    @require_torch_gpu
-    def test_integration_logits_no_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
-        lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
-        pipe.load_lora_weights(lora_id)
-        pipe = pipe.to("cuda")
-
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder),
-            "Lora not correctly set in text encoder",
-        )
-
-        prompt = "a red sks dog"
-
-        images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
-
-        expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
-
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-    @nightly
-    @require_torch_gpu
-    def test_integration_logits_multi_adapter(self):
-        path = "stabilityai/stable-diffusion-xl-base-1.0"
-        lora_id = "CiroN2022/toy-face"
-
-        pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
-        pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
-        pipe = pipe.to("cuda")
-
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.unet),
-            "Lora not correctly set in Unet",
-        )
-
-        prompt = "toy_face of a hacker with a hoodie"
-
-        lora_scale = 0.9
-
-        images = pipe(
-            prompt=prompt,
-            num_inference_steps=30,
-            generator=torch.manual_seed(0),
-            cross_attention_kwargs={"scale": lora_scale},
-            output_type="np",
-        ).images
-        expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
-
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-        # import pdb; pdb.set_trace()
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
-        pipe.set_adapters("pixel")
-
-        prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
-        images = pipe(
-            prompt,
-            num_inference_steps=30,
-            guidance_scale=7.5,
-            cross_attention_kwargs={"scale": lora_scale},
-            generator=torch.manual_seed(0),
-            output_type="np",
-        ).images
-
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-        expected_slice_scale = np.array(
-            [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
-        )
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-        # multi-adapter inference
-        pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0])
-        images = pipe(
-            prompt,
-            num_inference_steps=30,
-            guidance_scale=7.5,
-            cross_attention_kwargs={"scale": 1.0},
-            generator=torch.manual_seed(0),
-            output_type="np",
-        ).images
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-        expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998])
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-        # Lora disabled
-        pipe.disable_lora()
-        images = pipe(
-            prompt,
-            num_inference_steps=30,
-            guidance_scale=7.5,
-            cross_attention_kwargs={"scale": lora_scale},
-            generator=torch.manual_seed(0),
-            output_type="np",
-        ).images
-        predicted_slice = images[0, -3:, -3:, -1].flatten()
-        expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493])
-        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
 
 class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
@@ -1065,605 +555,3 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
         "latent_channels": 4,
         "sample_size": 128,
     }
-
-
-@slow
-@require_torch_gpu
-class LoraIntegrationTests(unittest.TestCase):
-    def tearDown(self):
-        import gc
-
-        gc.collect()
-        torch.cuda.empty_cache()
-        gc.collect()
-
-    def test_dreambooth_old_format(self):
-        generator = torch.Generator("cpu").manual_seed(0)
-
-        lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
-
-        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
-        pipe = pipe.to(torch_device)
-        pipe.load_lora_weights(lora_model_id)
-
-        images = pipe(
-            "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-
-        expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
-        release_memory(pipe)
-
-    def test_dreambooth_text_encoder_new_format(self):
-        generator = torch.Generator().manual_seed(0)
-
-        lora_model_id = "hf-internal-testing/lora-trained"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
-
-        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
-        pipe = pipe.to(torch_device)
-        pipe.load_lora_weights(lora_model_id)
-
-        images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-
-        expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
-        release_memory(pipe)
-
-    def test_a1111(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
-            torch_device
-        )
-        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
-        lora_filename = "light_and_shadow.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_lycoris(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
-        ).to(torch_device)
-        lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
-        lora_filename = "edgLycorisMugler-light.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_a1111_with_model_cpu_offload(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
-        pipe.enable_model_cpu_offload()
-        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
-        lora_filename = "light_and_shadow.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_a1111_with_sequential_cpu_offload(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
-        pipe.enable_sequential_cpu_offload()
-        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
-        lora_filename = "light_and_shadow.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_kohya_sd_v15_with_higher_dimensions(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
-            torch_device
-        )
-        lora_model_id = "hf-internal-testing/urushisato-lora"
-        lora_filename = "urushisato_v15.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_vanilla_funetuning(self):
-        generator = torch.Generator().manual_seed(0)
-
-        lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
-
-        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
-        pipe = pipe.to(torch_device)
-        pipe.load_lora_weights(lora_model_id)
-
-        images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-
-        expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
-        release_memory(pipe)
-
-    def test_unload_kohya_lora(self):
-        generator = torch.manual_seed(0)
-        prompt = "masterpiece, best quality, mountain"
-        num_inference_steps = 2
-
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
-            torch_device
-        )
-        initial_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        initial_images = initial_images[0, -3:, -3:, -1].flatten()
-
-        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
-        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
-
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        generator = torch.manual_seed(0)
-        lora_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        lora_images = lora_images[0, -3:, -3:, -1].flatten()
-
-        pipe.unload_lora_weights()
-        generator = torch.manual_seed(0)
-        unloaded_lora_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
-
-        self.assertFalse(np.allclose(initial_images, lora_images))
-        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
-        release_memory(pipe)
-
-    def test_load_unload_load_kohya_lora(self):
-        # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
-        # without introducing any side-effects. Even though the test uses a Kohya-style
-        # LoRA, the underlying adapter handling mechanism is format-agnostic.
-        generator = torch.manual_seed(0)
-        prompt = "masterpiece, best quality, mountain"
-        num_inference_steps = 2
-
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
-            torch_device
-        )
-        initial_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        initial_images = initial_images[0, -3:, -3:, -1].flatten()
-
-        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
-        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
-
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        generator = torch.manual_seed(0)
-        lora_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        lora_images = lora_images[0, -3:, -3:, -1].flatten()
-
-        pipe.unload_lora_weights()
-        generator = torch.manual_seed(0)
-        unloaded_lora_images = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
-
-        self.assertFalse(np.allclose(initial_images, lora_images))
-        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
-
-        # make sure we can load a LoRA again after unloading and they don't have
-        # any undesired effects.
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        generator = torch.manual_seed(0)
-        lora_images_again = pipe(
-            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
-        ).images
-        lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
-        release_memory(pipe)
-
-
-@slow
-@require_torch_gpu
-class LoraSDXLIntegrationTests(unittest.TestCase):
-    def tearDown(self):
-        import gc
-
-        gc.collect()
-        torch.cuda.empty_cache()
-        gc.collect()
-
-    def test_sdxl_0_9_lora_one(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
-        lora_filename = "daiton-xl-lora-test.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.enable_model_cpu_offload()
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_sdxl_0_9_lora_two(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
-        lora_filename = "saijo.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.enable_model_cpu_offload()
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_sdxl_0_9_lora_three(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
-        lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.enable_model_cpu_offload()
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
-
-        self.assertTrue(np.allclose(images, expected, atol=5e-3))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora_fusion(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        pipe.fuse_lora()
-        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
-        # silently deleted - otherwise this will CPU OOM
-        pipe.unload_lora_weights()
-
-        pipe.enable_model_cpu_offload()
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
-        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora_unfusion(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.fuse_lora()
-
-        pipe.enable_model_cpu_offload()
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        images_with_fusion = images[0, -3:, -3:, -1].flatten()
-
-        pipe.unfuse_lora()
-        generator = torch.Generator().manual_seed(0)
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        images_without_fusion = images[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora_unfusion_effectivity(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
-
-        generator = torch.Generator().manual_seed(0)
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        original_image_slice = images[0, -3:, -3:, -1].flatten()
-
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.fuse_lora()
-        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
-        # silently deleted - otherwise this will CPU OOM
-        pipe.unload_lora_weights()
-
-        generator = torch.Generator().manual_seed(0)
-        _ = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        pipe.unfuse_lora()
-        generator = torch.Generator().manual_seed(0)
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora_fusion_efficiency(self):
-        generator = torch.Generator().manual_seed(0)
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
-        pipe.enable_model_cpu_offload()
-
-        start_time = time.time()
-        for _ in range(3):
-            pipe(
-                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-            ).images
-        end_time = time.time()
-        elapsed_time_non_fusion = end_time - start_time
-
-        del pipe
-
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
-        pipe.fuse_lora()
-        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
-        # silently deleted - otherwise this will CPU OOM
-        pipe.unload_lora_weights()
-
-        pipe.enable_model_cpu_offload()
-
-        start_time = time.time()
-        generator = torch.Generator().manual_seed(0)
-        for _ in range(3):
-            pipe(
-                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-            ).images
-        end_time = time.time()
-        elapsed_time_fusion = end_time - start_time
-
-        self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
-        release_memory(pipe)
-
-    def test_sdxl_1_0_last_ben(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
-        pipe.enable_model_cpu_offload()
-        lora_model_id = "TheLastBen/Papercut_SDXL"
-        lora_filename = "papercut.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
-        text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
-        text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
-        unet_sd = copy.deepcopy(pipe.unet.state_dict())
-
-        pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
-        )
-
-        fused_te_state_dict = pipe.text_encoder.state_dict()
-        fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
-        unet_state_dict = pipe.unet.state_dict()
-
-        for key, value in text_encoder_1_sd.items():
-            self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
-
-        for key, value in text_encoder_2_sd.items():
-            self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
-
-        for key, value in unet_state_dict.items():
-            self.assertTrue(torch.allclose(unet_state_dict[key], value))
-
-        pipe.fuse_lora()
-        pipe.unload_lora_weights()
-
-        assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
-        assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
-        assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
-        release_memory(pipe)
-
-    def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
-        generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_sequential_cpu_offload()
-        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
-        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-
-        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
-        self.assertTrue(np.allclose(images, expected, atol=1e-3))
-        release_memory(pipe)
-
-    def test_canny_lora(self):
-        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
-
-        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
-        )
-        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
-        pipe.enable_sequential_cpu_offload()
-
-        generator = torch.Generator(device="cpu").manual_seed(0)
-        prompt = "corgi"
-        image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
-        )
-
-        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
-        assert images[0].shape == (768, 512, 3)
-
-        original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
-        assert np.allclose(original_image, expected_image, atol=1e-04)
-        release_memory(pipe)
-
-    @nightly
-    def test_sequential_fuse_unfuse(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-
-        # 1. round
-        pipe.load_lora_weights("Pclanglais/TintinIA")
-        pipe.fuse_lora()
-
-        generator = torch.Generator().manual_seed(0)
-        images = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        image_slice = images[0, -3:, -3:, -1].flatten()
-
-        pipe.unfuse_lora()
-
-        # 2. round
-        pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
-        pipe.fuse_lora()
-        pipe.unfuse_lora()
-
-        # 3. round
-        pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
-        pipe.fuse_lora()
-        pipe.unfuse_lora()
-
-        # 4. back to 1st round
-        pipe.load_lora_weights("Pclanglais/TintinIA")
-        pipe.fuse_lora()
-
-        generator = torch.Generator().manual_seed(0)
-        images_2 = pipe(
-            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
-        ).images
-        image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
-
-        self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
-        release_memory(pipe)

From 06d0b8ac92dc77525048ea4b486b3d62257f78b5 Mon Sep 17 00:00:00 2001
From: Dhruv Nair <dhruv.nair@gmail.com>
Date: Thu, 12 Oct 2023 15:54:21 +0000
Subject: [PATCH 3/3] remove ptl from test runner images

---
 docker/diffusers-pytorch-compile-cuda/Dockerfile  | 3 +--
 docker/diffusers-pytorch-cuda/Dockerfile          | 3 +--
 docker/diffusers-pytorch-xformers-cuda/Dockerfile | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile
index 1f7fe063b70d..ecdd5f7b9785 100644
--- a/docker/diffusers-pytorch-compile-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile
@@ -41,7 +41,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
         scipy \
         tensorboard \
         transformers \
-        omegaconf \
-        pytorch-lightning
+        omegaconf
 
 CMD ["/bin/bash"]
diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile
index b8e2af01f995..3a2de5167946 100644
--- a/docker/diffusers-pytorch-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-cuda/Dockerfile
@@ -40,7 +40,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
         scipy \
         tensorboard \
         transformers \
-        omegaconf \
-        pytorch-lightning
+        omegaconf
 
 CMD ["/bin/bash"]
diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
index 4c447749da7b..95fe933798bc 100644
--- a/docker/diffusers-pytorch-xformers-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
@@ -41,7 +41,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
         tensorboard \
         transformers \
         omegaconf \
-        pytorch-lightning \
         xformers
 
 CMD ["/bin/bash"]