From bb0ac6dbf73732ca325a5352abf65f7587aaba05 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Sun, 24 Dec 2023 20:04:35 +0530
Subject: [PATCH 1/2] fix: lora peft dummy components

---
 tests/lora/test_lora_layers_peft.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 180d45b6803e..92c5d7ff15b9 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -113,11 +113,14 @@ def get_dummy_components(self, scheduler_cls=None):
         scheduler_cls = self.scheduler_cls if scheduler_cls is None else LCMScheduler
         rank = 4
 
-        torch.manual_seed(0)
-        unet = UNet2DConditionModel(**self.unet_kwargs)
-        scheduler = scheduler_cls(**self.scheduler_kwargs)
-        torch.manual_seed(0)
-        vae = AutoencoderKL(**self.vae_kwargs)
+        if self.unet_kwargs is not None:
+            torch.manual_seed(0)
+            unet = UNet2DConditionModel(**self.unet_kwargs)
+        if self.scheduler_kwargs is not None:
+            scheduler = scheduler_cls(**self.scheduler_kwargs)
+        if self.vae_kwargs is not None:
+            torch.manual_seed(0)
+            vae = AutoencoderKL(**self.vae_kwargs)
         text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
         tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
 

From d40bea464e6db60066e8e523f1e21cc6ddb43922 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Sun, 24 Dec 2023 20:19:19 +0530
Subject: [PATCH 2/2] fix: dummy components

---
 tests/lora/test_lora_layers_peft.py | 81 ++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 92c5d7ff15b9..38e55b9ed7b4 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -113,14 +113,14 @@ def get_dummy_components(self, scheduler_cls=None):
         scheduler_cls = self.scheduler_cls if scheduler_cls is None else LCMScheduler
         rank = 4
 
-        if self.unet_kwargs is not None:
-            torch.manual_seed(0)
-            unet = UNet2DConditionModel(**self.unet_kwargs)
-        if self.scheduler_kwargs is not None:
-            scheduler = scheduler_cls(**self.scheduler_kwargs)
-        if self.vae_kwargs is not None:
-            torch.manual_seed(0)
-            vae = AutoencoderKL(**self.vae_kwargs)
+        torch.manual_seed(0)
+        unet = UNet2DConditionModel(**self.unet_kwargs)
+
+        scheduler = scheduler_cls(**self.scheduler_kwargs)
+
+        torch.manual_seed(0)
+        vae = AutoencoderKL(**self.vae_kwargs)
+
         text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
         tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
 
@@ -1405,6 +1405,35 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
 @slow
 @require_torch_gpu
 class LoraIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
+    pipeline_class = StableDiffusionPipeline
+    scheduler_cls = DDIMScheduler
+    scheduler_kwargs = {
+        "beta_start": 0.00085,
+        "beta_end": 0.012,
+        "beta_schedule": "scaled_linear",
+        "clip_sample": False,
+        "set_alpha_to_one": False,
+        "steps_offset": 1,
+    }
+    unet_kwargs = {
+        "block_out_channels": (32, 64),
+        "layers_per_block": 2,
+        "sample_size": 32,
+        "in_channels": 4,
+        "out_channels": 4,
+        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
+        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
+        "cross_attention_dim": 32,
+    }
+    vae_kwargs = {
+        "block_out_channels": [32, 64],
+        "in_channels": 3,
+        "out_channels": 3,
+        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
+        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
+        "latent_channels": 4,
+    }
+
     def tearDown(self):
         import gc
 
@@ -1658,6 +1687,42 @@ def test_load_unload_load_kohya_lora(self):
 @slow
 @require_torch_gpu
 class LoraSDXLIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
+    has_two_text_encoders = True
+    pipeline_class = StableDiffusionXLPipeline
+    scheduler_cls = EulerDiscreteScheduler
+    scheduler_kwargs = {
+        "beta_start": 0.00085,
+        "beta_end": 0.012,
+        "beta_schedule": "scaled_linear",
+        "timestep_spacing": "leading",
+        "steps_offset": 1,
+    }
+    unet_kwargs = {
+        "block_out_channels": (32, 64),
+        "layers_per_block": 2,
+        "sample_size": 32,
+        "in_channels": 4,
+        "out_channels": 4,
+        "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
+        "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
+        "attention_head_dim": (2, 4),
+        "use_linear_projection": True,
+        "addition_embed_type": "text_time",
+        "addition_time_embed_dim": 8,
+        "transformer_layers_per_block": (1, 2),
+        "projection_class_embeddings_input_dim": 80,  # 6 * 8 + 32
+        "cross_attention_dim": 64,
+    }
+    vae_kwargs = {
+        "block_out_channels": [32, 64],
+        "in_channels": 3,
+        "out_channels": 3,
+        "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
+        "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
+        "latent_channels": 4,
+        "sample_size": 128,
+    }
+
     def tearDown(self):
         import gc