From bb0ac6dbf73732ca325a5352abf65f7587aaba05 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 24 Dec 2023 20:04:35 +0530 Subject: [PATCH 1/2] fix: lora peft dummy components --- tests/lora/test_lora_layers_peft.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 180d45b6803e..92c5d7ff15b9 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -113,11 +113,14 @@ def get_dummy_components(self, scheduler_cls=None): scheduler_cls = self.scheduler_cls if scheduler_cls is None else LCMScheduler rank = 4 - torch.manual_seed(0) - unet = UNet2DConditionModel(**self.unet_kwargs) - scheduler = scheduler_cls(**self.scheduler_kwargs) - torch.manual_seed(0) - vae = AutoencoderKL(**self.vae_kwargs) + if self.unet_kwargs is not None: + torch.manual_seed(0) + unet = UNet2DConditionModel(**self.unet_kwargs) + if self.scheduler_kwargs is not None: + scheduler = scheduler_cls(**self.scheduler_kwargs) + if self.vae_kwargs is not None: + torch.manual_seed(0) + vae = AutoencoderKL(**self.vae_kwargs) text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2") tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2") From d40bea464e6db60066e8e523f1e21cc6ddb43922 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 24 Dec 2023 20:19:19 +0530 Subject: [PATCH 2/2] fix: dummy components --- tests/lora/test_lora_layers_peft.py | 81 ++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 92c5d7ff15b9..38e55b9ed7b4 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -113,14 +113,14 @@ def get_dummy_components(self, scheduler_cls=None): scheduler_cls = self.scheduler_cls if scheduler_cls is None else LCMScheduler rank = 4 - if self.unet_kwargs is not None: - torch.manual_seed(0) - unet = UNet2DConditionModel(**self.unet_kwargs) - if self.scheduler_kwargs is not None: - scheduler = scheduler_cls(**self.scheduler_kwargs) - if self.vae_kwargs is not None: - torch.manual_seed(0) - vae = AutoencoderKL(**self.vae_kwargs) + torch.manual_seed(0) + unet = UNet2DConditionModel(**self.unet_kwargs) + + scheduler = scheduler_cls(**self.scheduler_kwargs) + + torch.manual_seed(0) + vae = AutoencoderKL(**self.vae_kwargs) + text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2") tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2") @@ -1405,6 +1405,35 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): @slow @require_torch_gpu class LoraIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase): + pipeline_class = StableDiffusionPipeline + scheduler_cls = DDIMScheduler + scheduler_kwargs = { + "beta_start": 0.00085, + "beta_end": 0.012, + "beta_schedule": "scaled_linear", + "clip_sample": False, + "set_alpha_to_one": False, + "steps_offset": 1, + } + unet_kwargs = { + "block_out_channels": (32, 64), + "layers_per_block": 2, + "sample_size": 32, + "in_channels": 4, + "out_channels": 4, + "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), + "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), + "cross_attention_dim": 32, + } + vae_kwargs = { + "block_out_channels": [32, 64], + "in_channels": 3, + "out_channels": 3, + "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], + "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], + "latent_channels": 4, + } + def tearDown(self): import gc @@ -1658,6 +1687,42 @@ def test_load_unload_load_kohya_lora(self): @slow @require_torch_gpu class LoraSDXLIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase): + has_two_text_encoders = True + pipeline_class = StableDiffusionXLPipeline + scheduler_cls = EulerDiscreteScheduler + scheduler_kwargs = { + "beta_start": 0.00085, + "beta_end": 0.012, + "beta_schedule": "scaled_linear", + "timestep_spacing": "leading", + "steps_offset": 1, + } + unet_kwargs = { + "block_out_channels": (32, 64), + "layers_per_block": 2, + "sample_size": 32, + "in_channels": 4, + "out_channels": 4, + "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), + "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), + "attention_head_dim": (2, 4), + "use_linear_projection": True, + "addition_embed_type": "text_time", + "addition_time_embed_dim": 8, + "transformer_layers_per_block": (1, 2), + "projection_class_embeddings_input_dim": 80, # 6 * 8 + 32 + "cross_attention_dim": 64, + } + vae_kwargs = { + "block_out_channels": [32, 64], + "in_channels": 3, + "out_channels": 3, + "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], + "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], + "latent_channels": 4, + "sample_size": 128, + } + def tearDown(self): import gc