From d54d6dbcd05ec2600819ed88f7c09136ad90c36d Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Mon, 9 Oct 2023 16:52:08 +0000 Subject: [PATCH 1/3] move xformers to dedicated runner --- .github/workflows/build_docker_images.yml | 1 + .github/workflows/push_tests.yml | 40 + .../diffusers-pytorch-compile-cuda/Dockerfile | 3 +- docker/diffusers-pytorch-cuda/Dockerfile | 7 +- .../Dockerfile | 47 + tests/lora/test_lora_layers_old_backend.py | 56 +- tests/lora/test_lora_layers_peft.py | 1296 +++++++++++++++-- tests/models/test_modeling_common.py | 31 +- 8 files changed, 1360 insertions(+), 121 deletions(-) create mode 100644 docker/diffusers-pytorch-xformers-cuda/Dockerfile diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index f8bd15c46cdd..937ad07496b9 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -27,6 +27,7 @@ jobs: - diffusers-pytorch-cpu - diffusers-pytorch-cuda - diffusers-pytorch-compile-cuda + - diffusers-pytorch-xformers-cuda - diffusers-flax-cpu - diffusers-flax-tpu - diffusers-onnxruntime-cpu diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index a15a5412c4e4..5fadd095be35 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -290,6 +290,46 @@ jobs: name: torch_compile_test_reports path: reports + run_xformers_tests: + name: PyTorch xformers CUDA tests + + runs-on: docker-gpu + + container: + image: diffusers/diffusers-pytorch-xformers-cuda + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ + + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: NVIDIA-SMI + run: | + nvidia-smi + - name: Install dependencies + run: | + python -m pip install -e .[quality,test,training] + - name: Environment + run: | + python utils/print_env.py + - name: Run example tests on GPU + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ + - name: Failure short reports + if: ${{ failure() }} + run: cat reports/tests_torch_xformers_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: torch_xformers_test_reports + path: reports + run_examples_tests: name: Examples PyTorch CUDA tests on Ubuntu diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile index a41be50f9d58..1f7fe063b70d 100644 --- a/docker/diffusers-pytorch-compile-cuda/Dockerfile +++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile @@ -42,7 +42,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \ tensorboard \ transformers \ omegaconf \ - pytorch-lightning \ - xformers + pytorch-lightning CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile index 4c447749da7b..b8e2af01f995 100644 --- a/docker/diffusers-pytorch-cuda/Dockerfile +++ b/docker/diffusers-pytorch-cuda/Dockerfile @@ -25,8 +25,8 @@ ENV PATH="/opt/venv/bin:$PATH" # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) RUN python3 -m pip install --no-cache-dir --upgrade pip && \ python3 -m pip install --no-cache-dir \ - torch==2.0.1 \ - torchvision==0.15.2 \ + torch \ + torchvision \ torchaudio \ invisible_watermark && \ python3 -m pip install --no-cache-dir \ @@ -41,7 +41,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \ tensorboard \ transformers \ omegaconf \ - pytorch-lightning \ - xformers + pytorch-lightning CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile new file mode 100644 index 000000000000..4c447749da7b --- /dev/null +++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile @@ -0,0 +1,47 @@ +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + libsndfile1-dev \ + libgl1 \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch==2.0.1 \ + torchvision==0.15.2 \ + torchaudio \ + invisible_watermark && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + Jinja2 \ + librosa \ + numpy \ + scipy \ + tensorboard \ + transformers \ + omegaconf \ + pytorch-lightning \ + xformers + +CMD ["/bin/bash"] diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index d616ef8c78b8..02353cdbbb4d 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -293,8 +293,11 @@ def create_lora_weight_file(self, tmpdirname): ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda") - def test_stable_diffusion_attn_processors(self): + @unittest.skipIf( + torch.cuda.is_available() != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_stable_diffusion_xformers_attn_processors(self): # disable_full_determinism() device = "cuda" # ensure determinism for the device-dependent torch.Generator components, _ = self.get_dummy_components() @@ -304,12 +307,34 @@ def test_stable_diffusion_attn_processors(self): _, _, inputs = self.get_dummy_inputs() - # run normal sd pipe + # run xformers attention + sd_pipe.enable_xformers_memory_efficient_attention() image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) - # run xformers attention - sd_pipe.enable_xformers_memory_efficient_attention() + # run lora xformers attention + attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) + attn_processors = { + k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim) + for k, v in attn_processors.items() + } + attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} + sd_pipe.unet.set_attn_processor(attn_processors) + image = sd_pipe(**inputs).images + assert image.shape == (1, 64, 64, 3) + + @unittest.skipIf(not torch.cuda.is_available(), reason="Test needs to run on GPU") + def test_stable_diffusion_attn_processors(self): + # disable_full_determinism() + device = "cuda" # ensure determinism for the device-dependent torch.Generator + components, _ = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + _, _, inputs = self.get_dummy_inputs() + + # run normal sd pipe image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) @@ -329,18 +354,6 @@ def test_stable_diffusion_attn_processors(self): sd_pipe.unet.set_attn_processor(attn_processors) image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) - - # run lora xformers attention - attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) - attn_processors = { - k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim) - for k, v in attn_processors.items() - } - attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} - sd_pipe.unet.set_attn_processor(attn_processors) - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - # enable_full_determinism() def test_stable_diffusion_lora(self): @@ -631,7 +644,10 @@ def test_lora_unet_attn_processors_with_xformers(self): if isinstance(module, Attention): self.assertIsInstance(module.processor, XFormersAttnProcessor) - @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") + @unittest.skipIf( + torch.cuda.is_available() != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) def test_lora_save_load_with_xformers(self): pipeline_components, lora_components = self.get_dummy_components() sd_pipe = StableDiffusionPipeline(**pipeline_components) @@ -2209,7 +2225,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): lora_model_id = "hf-internal-testing/sdxl-1.0-lora" lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.enable_model_cpu_offload() @@ -2223,7 +2239,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): del pipe - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.fuse_lora() pipe.enable_model_cpu_offload() diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 1862437fce88..0b73a2551bc5 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -12,39 +12,65 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import copy import os import tempfile +import time import unittest import numpy as np import torch import torch.nn as nn import torch.nn.functional as F +from huggingface_hub.repocard import RepoCard from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer from diffusers import ( AutoencoderKL, + ControlNetModel, DDIMScheduler, + DiffusionPipeline, EulerDiscreteScheduler, StableDiffusionPipeline, + StableDiffusionXLControlNetPipeline, StableDiffusionXLPipeline, UNet2DConditionModel, ) from diffusers.loaders import AttnProcsLayers -from diffusers.models.attention_processor import ( - LoRAAttnProcessor, - LoRAAttnProcessor2_0, +from diffusers.models.attention_processor import LoRAAttnProcessor, LoRAAttnProcessor2_0 +from diffusers.utils.import_utils import is_accelerate_available, is_peft_available +from diffusers.utils.testing_utils import ( + floats_tensor, + load_image, + nightly, + require_peft_backend, + require_torch_gpu, + slow, + torch_device, ) -from diffusers.utils.import_utils import is_peft_available -from diffusers.utils.testing_utils import floats_tensor, require_peft_backend, require_torch_gpu, slow +if is_accelerate_available(): + from accelerate.utils import release_memory + if is_peft_available(): from peft import LoraConfig from peft.tuners.tuners_utils import BaseTunerLayer from peft.utils import get_peft_model_state_dict +def state_dicts_almost_equal(sd1, sd2): + sd1 = dict(sorted(sd1.items())) + sd2 = dict(sorted(sd2.items())) + + models_are_equal = True + for ten1, ten2 in zip(sd1.values(), sd2.values()): + if (ten1 - ten2).abs().max() > 1e-3: + models_are_equal = False + + return models_are_equal + + def create_unet_lora_layers(unet: nn.Module): lora_attn_procs = {} for name in unet.attn_processors.keys(): @@ -94,6 +120,10 @@ def get_dummy_components(self): r=4, lora_alpha=4, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"], init_lora_weights=False ) + unet_lora_config = LoraConfig( + r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False + ) + unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet) if self.has_two_text_encoders: @@ -120,7 +150,7 @@ def get_dummy_components(self): "unet_lora_layers": unet_lora_layers, "unet_lora_attn_procs": unet_lora_attn_procs, } - return pipeline_components, lora_components, text_lora_config + return pipeline_components, lora_components, text_lora_config, unet_lora_config def get_dummy_inputs(self, with_generator=True): batch_size = 1 @@ -166,7 +196,7 @@ def test_simple_inference(self): """ Tests a simple inference and makes sure it works as expected """ - components, _, _ = self.get_dummy_components() + components, _, _, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -180,7 +210,7 @@ def test_simple_inference_with_text_lora(self): Tests a simple inference with lora attached on the text encoder and makes sure it works as expected """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -208,7 +238,7 @@ def test_simple_inference_with_text_lora_and_scale(self): Tests a simple inference with lora attached on the text encoder + scale argument and makes sure it works as expected """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -252,7 +282,7 @@ def test_simple_inference_with_text_lora_fused(self): Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model and makes sure it works as expected """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -289,7 +319,7 @@ def test_simple_inference_with_text_lora_unloaded(self): Tests a simple inference with lora attached to text encoder, then unloads the lora weights and makes sure it works as expected """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -327,7 +357,7 @@ def test_simple_inference_with_text_lora_save_load(self): """ Tests a simple usecase where users could use saving utilities for LoRA. """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -387,7 +417,7 @@ def test_simple_inference_save_pretrained(self): """ Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained """ - components, _, text_lora_config = self.get_dummy_components() + components, _, text_lora_config, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -431,109 +461,589 @@ def test_simple_inference_save_pretrained(self): "Loading from saved checkpoints should give same results.", ) + def test_simple_inference_with_text_unet_lora_save_load(self): + """ + Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) -class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): - pipeline_class = StableDiffusionPipeline - scheduler_cls = DDIMScheduler - scheduler_kwargs = { - "beta_start": 0.00085, - "beta_end": 0.012, - "beta_schedule": "scaled_linear", - "clip_sample": False, - "set_alpha_to_one": False, - "steps_offset": 1, - } - unet_kwargs = { - "block_out_channels": (32, 64), - "layers_per_block": 2, - "sample_size": 32, - "in_channels": 4, - "out_channels": 4, - "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), - "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), - "cross_attention_dim": 32, - } - vae_kwargs = { - "block_out_channels": [32, 64], - "in_channels": 3, - "out_channels": 3, - "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], - "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], - "latent_channels": 4, - } + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) - @slow - @require_torch_gpu - def test_integration_logits_with_scale(self): - path = "runwayml/stable-diffusion-v1-5" - lora_id = "takuma104/lora-test-text-encoder-lora-target" + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) - pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) - pipe.load_lora_weights(lora_id) - pipe = pipe.to("cuda") + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + + with tempfile.TemporaryDirectory() as tmpdirname: + text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder) + unet_state_dict = get_peft_model_state_dict(pipe.unet) + if self.has_two_text_encoders: + text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2) + + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, + text_encoder_lora_layers=text_encoder_state_dict, + text_encoder_2_lora_layers=text_encoder_2_state_dict, + unet_lora_layers=unet_state_dict, + safe_serialization=False, + ) + else: + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, + text_encoder_lora_layers=text_encoder_state_dict, + unet_lora_layers=unet_state_dict, + safe_serialization=False, + ) + + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) + pipe.unload_lora_weights() + + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) + + images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder), - "Lora not correctly set in text encoder 2", + np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", ) - prompt = "a red sks dog" + def test_simple_inference_with_text_unet_lora_and_scale(self): + """ + Tests a simple inference with lora attached on the text encoder + Unet + scale argument + and makes sure it works as expected + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - images = pipe( - prompt=prompt, - num_inference_steps=15, - cross_attention_kwargs={"scale": 0.5}, - generator=torch.manual_seed(0), - output_type="np", + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) + + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue( + not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + ) + + output_lora_scale = pipe( + **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5} ).images + self.assertTrue( + not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), + "Lora + scale should change the output", + ) - expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321]) + output_lora_0_scale = pipe( + **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0} + ).images + self.assertTrue( + np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), + "Lora + 0 scale should lead to same result as no LoRA", + ) - predicted_slice = images[0, -3:, -3:, -1].flatten() + self.assertTrue( + pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0, + "The scaling parameter has not been correctly restored!", + ) - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + def test_simple_inference_with_text_lora_unet_fused(self): + """ + Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model + and makes sure it works as expected - with unet + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - @slow - @require_torch_gpu - def test_integration_logits_no_scale(self): - path = "runwayml/stable-diffusion-v1-5" - lora_id = "takuma104/lora-test-text-encoder-lora-target" + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) - pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) - pipe.load_lora_weights(lora_id) - pipe = pipe.to("cuda") + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) + + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + pipe.fuse_lora() + # Fusing should still keep the LoRA layers + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet") + + if self.has_two_text_encoders: + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertFalse( + np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" + ) + + def test_simple_inference_with_text_unet_lora_unloaded(self): + """ + Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights + and makes sure it works as expected + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) + + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + pipe.unload_lora_weights() + # unloading should remove the LoRA layers + self.assertFalse( + self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" + ) + self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet") + + if self.has_two_text_encoders: + self.assertFalse( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2" + ) + ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder), - "Lora not correctly set in text encoder", + np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" ) - prompt = "a red sks dog" + def test_simple_inference_with_text_unet_lora_unfused(self): + """ + Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights + and makes sure it works as expected + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) - expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084]) + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - predicted_slice = images[0, -3:, -3:, -1].flatten() + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + pipe.fuse_lora() + output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images -class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): - has_two_text_encoders = True - pipeline_class = StableDiffusionXLPipeline - scheduler_cls = EulerDiscreteScheduler - scheduler_kwargs = { - "beta_start": 0.00085, - "beta_end": 0.012, - "beta_schedule": "scaled_linear", - "timestep_spacing": "leading", - "steps_offset": 1, - } - unet_kwargs = { - "block_out_channels": (32, 64), - "layers_per_block": 2, - "sample_size": 32, + pipe.unfuse_lora() + + output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + # unloading should remove the LoRA layers + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers") + + if self.has_two_text_encoders: + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" + ) + + # Fuse and unfuse should lead to the same results + self.assertTrue( + np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3), + "Fused lora should change the output", + ) + + def test_simple_inference_with_text_unet_multi_adapter(self): + """ + Tests a simple inference with lora attached to text encoder and unet, attaches + multiple adapters and set them + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + + pipe.unet.add_adapter(unet_lora_config, "adapter-1") + pipe.unet.add_adapter(unet_lora_config, "adapter-2") + + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + pipe.set_adapters("adapter-1") + + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.set_adapters("adapter-2") + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.set_adapters(["adapter-1", "adapter-2"]) + + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images + + # Fuse and unfuse should lead to the same results + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) + + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) + + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) + + pipe.disable_lora() + + output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images + + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) + + @unittest.skip("This is failing for now - need to investigate") + def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self): + """ + Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights + and makes sure it works as expected + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + pipe.text_encoder.add_adapter(text_lora_config) + pipe.unet.add_adapter(unet_lora_config) + + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) + pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True) + + if self.has_two_text_encoders: + pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True) + + # Just makes sure it works.. + _ = pipe(**inputs, generator=torch.manual_seed(0)).images + + +class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): + pipeline_class = StableDiffusionPipeline + scheduler_cls = DDIMScheduler + scheduler_kwargs = { + "beta_start": 0.00085, + "beta_end": 0.012, + "beta_schedule": "scaled_linear", + "clip_sample": False, + "set_alpha_to_one": False, + "steps_offset": 1, + } + unet_kwargs = { + "block_out_channels": (32, 64), + "layers_per_block": 2, + "sample_size": 32, + "in_channels": 4, + "out_channels": 4, + "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), + "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), + "cross_attention_dim": 32, + } + vae_kwargs = { + "block_out_channels": [32, 64], + "in_channels": 3, + "out_channels": 3, + "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], + "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], + "latent_channels": 4, + } + + @slow + @require_torch_gpu + def test_integration_move_lora_cpu(self): + path = "runwayml/stable-diffusion-v1-5" + lora_id = "takuma104/lora-test-text-encoder-lora-target" + + pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) + pipe.load_lora_weights(lora_id, adapter_name="adapter-1") + pipe.load_lora_weights(lora_id, adapter_name="adapter-2") + pipe = pipe.to("cuda") + + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder), + "Lora not correctly set in text encoder", + ) + + self.assertTrue( + self.check_if_lora_correctly_set(pipe.unet), + "Lora not correctly set in text encoder", + ) + + pipe.set_lora_device(["adapter-1"], "cpu") + + for name, module in pipe.unet.named_modules(): + if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)): + self.assertTrue(module.weight.device == torch.device("cpu")) + elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)): + # import pdb; pdb.set_trace() + self.assertTrue(module.weight.device != torch.device("cpu")) + + for name, module in pipe.text_encoder.named_modules(): + if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)): + self.assertTrue(module.weight.device == torch.device("cpu")) + elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)): + # import pdb; pdb.set_trace() + self.assertTrue(module.weight.device != torch.device("cpu")) + + pipe.set_lora_device(["adapter-1"], 0) + + for n, m in pipe.unet.named_modules(): + if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)): + self.assertTrue(m.weight.device != torch.device("cpu")) + + for n, m in pipe.text_encoder.named_modules(): + if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)): + self.assertTrue(m.weight.device != torch.device("cpu")) + + pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda") + + for n, m in pipe.unet.named_modules(): + if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)): + self.assertTrue(m.weight.device != torch.device("cpu")) + + for n, m in pipe.text_encoder.named_modules(): + if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)): + self.assertTrue(m.weight.device != torch.device("cpu")) + + @slow + @require_torch_gpu + def test_integration_logits_with_scale(self): + path = "runwayml/stable-diffusion-v1-5" + lora_id = "takuma104/lora-test-text-encoder-lora-target" + + pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) + pipe.load_lora_weights(lora_id) + pipe = pipe.to("cuda") + + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder), + "Lora not correctly set in text encoder 2", + ) + + prompt = "a red sks dog" + + images = pipe( + prompt=prompt, + num_inference_steps=15, + cross_attention_kwargs={"scale": 0.5}, + generator=torch.manual_seed(0), + output_type="np", + ).images + + expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321]) + + predicted_slice = images[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + @slow + @require_torch_gpu + def test_integration_logits_no_scale(self): + path = "runwayml/stable-diffusion-v1-5" + lora_id = "takuma104/lora-test-text-encoder-lora-target" + + pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) + pipe.load_lora_weights(lora_id) + pipe = pipe.to("cuda") + + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder), + "Lora not correctly set in text encoder", + ) + + prompt = "a red sks dog" + + images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images + + expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084]) + + predicted_slice = images[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + @nightly + @require_torch_gpu + def test_integration_logits_multi_adapter(self): + path = "stabilityai/stable-diffusion-xl-base-1.0" + lora_id = "CiroN2022/toy-face" + + pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16) + pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy") + pipe = pipe.to("cuda") + + self.assertTrue( + self.check_if_lora_correctly_set(pipe.unet), + "Lora not correctly set in Unet", + ) + + prompt = "toy_face of a hacker with a hoodie" + + lora_scale = 0.9 + + images = pipe( + prompt=prompt, + num_inference_steps=30, + generator=torch.manual_seed(0), + cross_attention_kwargs={"scale": lora_scale}, + output_type="np", + ).images + expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539]) + + predicted_slice = images[0, -3:, -3:, -1].flatten() + # import pdb; pdb.set_trace() + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") + pipe.set_adapters("pixel") + + prompt = "pixel art, a hacker with a hoodie, simple, flat colors" + images = pipe( + prompt, + num_inference_steps=30, + guidance_scale=7.5, + cross_attention_kwargs={"scale": lora_scale}, + generator=torch.manual_seed(0), + output_type="np", + ).images + + predicted_slice = images[0, -3:, -3:, -1].flatten() + expected_slice_scale = np.array( + [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889] + ) + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + # multi-adapter inference + pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0]) + images = pipe( + prompt, + num_inference_steps=30, + guidance_scale=7.5, + cross_attention_kwargs={"scale": 1.0}, + generator=torch.manual_seed(0), + output_type="np", + ).images + predicted_slice = images[0, -3:, -3:, -1].flatten() + expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998]) + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + # Lora disabled + pipe.disable_lora() + images = pipe( + prompt, + num_inference_steps=30, + guidance_scale=7.5, + cross_attention_kwargs={"scale": lora_scale}, + generator=torch.manual_seed(0), + output_type="np", + ).images + predicted_slice = images[0, -3:, -3:, -1].flatten() + expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493]) + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + + +class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): + has_two_text_encoders = True + pipeline_class = StableDiffusionXLPipeline + scheduler_cls = EulerDiscreteScheduler + scheduler_kwargs = { + "beta_start": 0.00085, + "beta_end": 0.012, + "beta_schedule": "scaled_linear", + "timestep_spacing": "leading", + "steps_offset": 1, + } + unet_kwargs = { + "block_out_channels": (32, 64), + "layers_per_block": 2, + "sample_size": 32, "in_channels": 4, "out_channels": 4, "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), @@ -555,3 +1065,605 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): "latent_channels": 4, "sample_size": 128, } + + +@slow +@require_torch_gpu +class LoraIntegrationTests(unittest.TestCase): + def tearDown(self): + import gc + + gc.collect() + torch.cuda.empty_cache() + gc.collect() + + def test_dreambooth_old_format(self): + generator = torch.Generator("cpu").manual_seed(0) + + lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example" + card = RepoCard.load(lora_model_id) + base_model_id = card.data.to_dict()["base_model"] + + pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) + pipe = pipe.to(torch_device) + pipe.load_lora_weights(lora_model_id) + + images = pipe( + "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + + expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785]) + + self.assertTrue(np.allclose(images, expected, atol=1e-4)) + release_memory(pipe) + + def test_dreambooth_text_encoder_new_format(self): + generator = torch.Generator().manual_seed(0) + + lora_model_id = "hf-internal-testing/lora-trained" + card = RepoCard.load(lora_model_id) + base_model_id = card.data.to_dict()["base_model"] + + pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) + pipe = pipe.to(torch_device) + pipe.load_lora_weights(lora_model_id) + + images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images + + images = images[0, -3:, -3:, -1].flatten() + + expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359]) + + self.assertTrue(np.allclose(images, expected, atol=1e-4)) + release_memory(pipe) + + def test_a1111(self): + generator = torch.Generator().manual_seed(0) + + pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to( + torch_device + ) + lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" + lora_filename = "light_and_shadow.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_lycoris(self): + generator = torch.Generator().manual_seed(0) + + pipe = StableDiffusionPipeline.from_pretrained( + "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16" + ).to(torch_device) + lora_model_id = "hf-internal-testing/edgLycorisMugler-light" + lora_filename = "edgLycorisMugler-light.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_a1111_with_model_cpu_offload(self): + generator = torch.Generator().manual_seed(0) + + pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) + pipe.enable_model_cpu_offload() + lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" + lora_filename = "light_and_shadow.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_a1111_with_sequential_cpu_offload(self): + generator = torch.Generator().manual_seed(0) + + pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) + pipe.enable_sequential_cpu_offload() + lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" + lora_filename = "light_and_shadow.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_kohya_sd_v15_with_higher_dimensions(self): + generator = torch.Generator().manual_seed(0) + + pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( + torch_device + ) + lora_model_id = "hf-internal-testing/urushisato-lora" + lora_filename = "urushisato_v15.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_vanilla_funetuning(self): + generator = torch.Generator().manual_seed(0) + + lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4" + card = RepoCard.load(lora_model_id) + base_model_id = card.data.to_dict()["base_model"] + + pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) + pipe = pipe.to(torch_device) + pipe.load_lora_weights(lora_model_id) + + images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images + + images = images[0, -3:, -3:, -1].flatten() + + expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583]) + + self.assertTrue(np.allclose(images, expected, atol=1e-4)) + release_memory(pipe) + + def test_unload_kohya_lora(self): + generator = torch.manual_seed(0) + prompt = "masterpiece, best quality, mountain" + num_inference_steps = 2 + + pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( + torch_device + ) + initial_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + initial_images = initial_images[0, -3:, -3:, -1].flatten() + + lora_model_id = "hf-internal-testing/civitai-colored-icons-lora" + lora_filename = "Colored_Icons_by_vizsumit.safetensors" + + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + generator = torch.manual_seed(0) + lora_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + lora_images = lora_images[0, -3:, -3:, -1].flatten() + + pipe.unload_lora_weights() + generator = torch.manual_seed(0) + unloaded_lora_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten() + + self.assertFalse(np.allclose(initial_images, lora_images)) + self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3)) + release_memory(pipe) + + def test_load_unload_load_kohya_lora(self): + # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded + # without introducing any side-effects. Even though the test uses a Kohya-style + # LoRA, the underlying adapter handling mechanism is format-agnostic. + generator = torch.manual_seed(0) + prompt = "masterpiece, best quality, mountain" + num_inference_steps = 2 + + pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( + torch_device + ) + initial_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + initial_images = initial_images[0, -3:, -3:, -1].flatten() + + lora_model_id = "hf-internal-testing/civitai-colored-icons-lora" + lora_filename = "Colored_Icons_by_vizsumit.safetensors" + + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + generator = torch.manual_seed(0) + lora_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + lora_images = lora_images[0, -3:, -3:, -1].flatten() + + pipe.unload_lora_weights() + generator = torch.manual_seed(0) + unloaded_lora_images = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten() + + self.assertFalse(np.allclose(initial_images, lora_images)) + self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3)) + + # make sure we can load a LoRA again after unloading and they don't have + # any undesired effects. + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + generator = torch.manual_seed(0) + lora_images_again = pipe( + prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps + ).images + lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3)) + release_memory(pipe) + + +@slow +@require_torch_gpu +class LoraSDXLIntegrationTests(unittest.TestCase): + def tearDown(self): + import gc + + gc.collect() + torch.cuda.empty_cache() + gc.collect() + + def test_sdxl_0_9_lora_one(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") + lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora" + lora_filename = "daiton-xl-lora-test.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.enable_model_cpu_offload() + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_sdxl_0_9_lora_two(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") + lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora" + lora_filename = "saijo.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.enable_model_cpu_offload() + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_sdxl_0_9_lora_three(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") + lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora" + lora_filename = "kame_sdxl_v2-000020-16rank.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.enable_model_cpu_offload() + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468]) + + self.assertTrue(np.allclose(images, expected, atol=5e-3)) + release_memory(pipe) + + def test_sdxl_1_0_lora(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + pipe.enable_model_cpu_offload() + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) + + self.assertTrue(np.allclose(images, expected, atol=1e-4)) + release_memory(pipe) + + def test_sdxl_1_0_lora_fusion(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + pipe.fuse_lora() + # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being + # silently deleted - otherwise this will CPU OOM + pipe.unload_lora_weights() + + pipe.enable_model_cpu_offload() + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + # This way we also test equivalence between LoRA fusion and the non-fusion behaviour. + expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) + + self.assertTrue(np.allclose(images, expected, atol=1e-4)) + release_memory(pipe) + + def test_sdxl_1_0_lora_unfusion(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.fuse_lora() + + pipe.enable_model_cpu_offload() + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + images_with_fusion = images[0, -3:, -3:, -1].flatten() + + pipe.unfuse_lora() + generator = torch.Generator().manual_seed(0) + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + images_without_fusion = images[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3)) + release_memory(pipe) + + def test_sdxl_1_0_lora_unfusion_effectivity(self): + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + pipe.enable_model_cpu_offload() + + generator = torch.Generator().manual_seed(0) + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + original_image_slice = images[0, -3:, -3:, -1].flatten() + + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.fuse_lora() + # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being + # silently deleted - otherwise this will CPU OOM + pipe.unload_lora_weights() + + generator = torch.Generator().manual_seed(0) + _ = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + pipe.unfuse_lora() + generator = torch.Generator().manual_seed(0) + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + images_without_fusion_slice = images[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3)) + release_memory(pipe) + + def test_sdxl_1_0_lora_fusion_efficiency(self): + generator = torch.Generator().manual_seed(0) + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + + pipe = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 + ) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) + pipe.enable_model_cpu_offload() + + start_time = time.time() + for _ in range(3): + pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + end_time = time.time() + elapsed_time_non_fusion = end_time - start_time + + del pipe + + pipe = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 + ) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) + pipe.fuse_lora() + # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being + # silently deleted - otherwise this will CPU OOM + pipe.unload_lora_weights() + + pipe.enable_model_cpu_offload() + + start_time = time.time() + generator = torch.Generator().manual_seed(0) + for _ in range(3): + pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + end_time = time.time() + elapsed_time_fusion = end_time - start_time + + self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion) + release_memory(pipe) + + def test_sdxl_1_0_last_ben(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda") + pipe.enable_model_cpu_offload() + lora_model_id = "TheLastBen/Papercut_SDXL" + lora_filename = "papercut.safetensors" + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_sdxl_1_0_fuse_unfuse_all(self): + pipe = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 + ) + text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict()) + text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict()) + unet_sd = copy.deepcopy(pipe.unet.state_dict()) + + pipe.load_lora_weights( + "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16 + ) + + fused_te_state_dict = pipe.text_encoder.state_dict() + fused_te_2_state_dict = pipe.text_encoder_2.state_dict() + unet_state_dict = pipe.unet.state_dict() + + for key, value in text_encoder_1_sd.items(): + self.assertTrue(torch.allclose(fused_te_state_dict[key], value)) + + for key, value in text_encoder_2_sd.items(): + self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value)) + + for key, value in unet_state_dict.items(): + self.assertTrue(torch.allclose(unet_state_dict[key], value)) + + pipe.fuse_lora() + pipe.unload_lora_weights() + + assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict()) + assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict()) + assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict()) + release_memory(pipe) + + def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self): + generator = torch.Generator().manual_seed(0) + + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + pipe.enable_sequential_cpu_offload() + lora_model_id = "hf-internal-testing/sdxl-1.0-lora" + lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" + + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + + images = images[0, -3:, -3:, -1].flatten() + expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) + + self.assertTrue(np.allclose(images, expected, atol=1e-3)) + release_memory(pipe) + + def test_canny_lora(self): + controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0") + + pipe = StableDiffusionXLControlNetPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet + ) + pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors") + pipe.enable_sequential_cpu_offload() + + generator = torch.Generator(device="cpu").manual_seed(0) + prompt = "corgi" + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" + ) + + images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images + + assert images[0].shape == (768, 512, 3) + + original_image = images[0, -3:, -3:, -1].flatten() + expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333]) + assert np.allclose(original_image, expected_image, atol=1e-04) + release_memory(pipe) + + @nightly + def test_sequential_fuse_unfuse(self): + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + + # 1. round + pipe.load_lora_weights("Pclanglais/TintinIA") + pipe.fuse_lora() + + generator = torch.Generator().manual_seed(0) + images = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + image_slice = images[0, -3:, -3:, -1].flatten() + + pipe.unfuse_lora() + + # 2. round + pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style") + pipe.fuse_lora() + pipe.unfuse_lora() + + # 3. round + pipe.load_lora_weights("ostris/crayon_style_lora_sdxl") + pipe.fuse_lora() + pipe.unfuse_lora() + + # 4. back to 1st round + pipe.load_lora_weights("Pclanglais/TintinIA") + pipe.fuse_lora() + + generator = torch.Generator().manual_seed(0) + images_2 = pipe( + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + ).images + image_slice_2 = images_2[0, -3:, -3:, -1].flatten() + + self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3)) + release_memory(pipe) diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py index 8b6d279bbe6d..80c97978723c 100644 --- a/tests/models/test_modeling_common.py +++ b/tests/models/test_modeling_common.py @@ -30,7 +30,7 @@ from diffusers.models import UNet2DConditionModel from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor from diffusers.training_utils import EMAModel -from diffusers.utils import logging +from diffusers.utils import is_xformers_available, logging from diffusers.utils.testing_utils import ( CaptureLogger, require_python39_or_higher, @@ -269,6 +269,32 @@ def test_getattr_is_correct(self): assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'" + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_set_xformers_attn_processor_for_determinism(self): + torch.use_deterministic_algorithms(False) + init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + model = self.model_class(**init_dict) + model.to(torch_device) + + if not hasattr(model, "set_attn_processor"): + # If not has `set_attn_processor`, skip test + return + + model.set_default_attn_processor() + assert all(type(proc) == AttnProcessor for proc in model.attn_processors.values()) + with torch.no_grad(): + output = model(**inputs_dict)[0] + + model.enable_xformers_memory_efficient_attention() + assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values()) + with torch.no_grad(): + output_2 = model(**inputs_dict)[0] + + assert torch.allclose(output, output_2, atol=self.base_precision) + @require_torch_gpu def test_set_attn_processor_for_determinism(self): torch.use_deterministic_algorithms(False) @@ -292,7 +318,7 @@ def test_set_attn_processor_for_determinism(self): model.enable_xformers_memory_efficient_attention() assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values()) with torch.no_grad(): - output_3 = model(**inputs_dict)[0] + model(**inputs_dict)[0] model.set_attn_processor(AttnProcessor2_0()) assert all(type(proc) == AttnProcessor2_0 for proc in model.attn_processors.values()) @@ -313,7 +339,6 @@ def test_set_attn_processor_for_determinism(self): # make sure that outputs match assert torch.allclose(output_2, output_1, atol=self.base_precision) - assert torch.allclose(output_2, output_3, atol=self.base_precision) assert torch.allclose(output_2, output_4, atol=self.base_precision) assert torch.allclose(output_2, output_5, atol=self.base_precision) assert torch.allclose(output_2, output_6, atol=self.base_precision) From 4a484f80ba58c7a7ed4fcf044c780e1746117520 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Mon, 9 Oct 2023 17:42:18 +0000 Subject: [PATCH 2/3] fix --- tests/lora/test_lora_layers_old_backend.py | 39 +- tests/lora/test_lora_layers_peft.py | 1266 ++------------------ 2 files changed, 94 insertions(+), 1211 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 02353cdbbb4d..893429670daa 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -293,10 +293,7 @@ def create_lora_weight_file(self, tmpdirname): ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - @unittest.skipIf( - torch.cuda.is_available() != "cuda" or not is_xformers_available(), - reason="XFormers attention is only available with CUDA and `xformers` installed", - ) + @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda") def test_stable_diffusion_xformers_attn_processors(self): # disable_full_determinism() device = "cuda" # ensure determinism for the device-dependent torch.Generator @@ -312,18 +309,7 @@ def test_stable_diffusion_xformers_attn_processors(self): image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) - # run lora xformers attention - attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) - attn_processors = { - k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim) - for k, v in attn_processors.items() - } - attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} - sd_pipe.unet.set_attn_processor(attn_processors) - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - @unittest.skipIf(not torch.cuda.is_available(), reason="Test needs to run on GPU") + @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda") def test_stable_diffusion_attn_processors(self): # disable_full_determinism() device = "cuda" # ensure determinism for the device-dependent torch.Generator @@ -354,6 +340,18 @@ def test_stable_diffusion_attn_processors(self): sd_pipe.unet.set_attn_processor(attn_processors) image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) + + # run lora xformers attention + attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) + attn_processors = { + k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim) + for k, v in attn_processors.items() + } + attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} + sd_pipe.unet.set_attn_processor(attn_processors) + image = sd_pipe(**inputs).images + assert image.shape == (1, 64, 64, 3) + # enable_full_determinism() def test_stable_diffusion_lora(self): @@ -644,10 +642,7 @@ def test_lora_unet_attn_processors_with_xformers(self): if isinstance(module, Attention): self.assertIsInstance(module.processor, XFormersAttnProcessor) - @unittest.skipIf( - torch.cuda.is_available() != "cuda" or not is_xformers_available(), - reason="XFormers attention is only available with CUDA and `xformers` installed", - ) + @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") def test_lora_save_load_with_xformers(self): pipeline_components, lora_components = self.get_dummy_components() sd_pipe = StableDiffusionPipeline(**pipeline_components) @@ -2225,7 +2220,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): lora_model_id = "hf-internal-testing/sdxl-1.0-lora" lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.enable_model_cpu_offload() @@ -2239,7 +2234,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): del pipe - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.fuse_lora() pipe.enable_model_cpu_offload() diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 0b73a2551bc5..1862437fce88 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -12,65 +12,39 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import copy import os import tempfile -import time import unittest import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -from huggingface_hub.repocard import RepoCard from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer from diffusers import ( AutoencoderKL, - ControlNetModel, DDIMScheduler, - DiffusionPipeline, EulerDiscreteScheduler, StableDiffusionPipeline, - StableDiffusionXLControlNetPipeline, StableDiffusionXLPipeline, UNet2DConditionModel, ) from diffusers.loaders import AttnProcsLayers -from diffusers.models.attention_processor import LoRAAttnProcessor, LoRAAttnProcessor2_0 -from diffusers.utils.import_utils import is_accelerate_available, is_peft_available -from diffusers.utils.testing_utils import ( - floats_tensor, - load_image, - nightly, - require_peft_backend, - require_torch_gpu, - slow, - torch_device, +from diffusers.models.attention_processor import ( + LoRAAttnProcessor, + LoRAAttnProcessor2_0, ) +from diffusers.utils.import_utils import is_peft_available +from diffusers.utils.testing_utils import floats_tensor, require_peft_backend, require_torch_gpu, slow -if is_accelerate_available(): - from accelerate.utils import release_memory - if is_peft_available(): from peft import LoraConfig from peft.tuners.tuners_utils import BaseTunerLayer from peft.utils import get_peft_model_state_dict -def state_dicts_almost_equal(sd1, sd2): - sd1 = dict(sorted(sd1.items())) - sd2 = dict(sorted(sd2.items())) - - models_are_equal = True - for ten1, ten2 in zip(sd1.values(), sd2.values()): - if (ten1 - ten2).abs().max() > 1e-3: - models_are_equal = False - - return models_are_equal - - def create_unet_lora_layers(unet: nn.Module): lora_attn_procs = {} for name in unet.attn_processors.keys(): @@ -120,10 +94,6 @@ def get_dummy_components(self): r=4, lora_alpha=4, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"], init_lora_weights=False ) - unet_lora_config = LoraConfig( - r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False - ) - unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet) if self.has_two_text_encoders: @@ -150,7 +120,7 @@ def get_dummy_components(self): "unet_lora_layers": unet_lora_layers, "unet_lora_attn_procs": unet_lora_attn_procs, } - return pipeline_components, lora_components, text_lora_config, unet_lora_config + return pipeline_components, lora_components, text_lora_config def get_dummy_inputs(self, with_generator=True): batch_size = 1 @@ -196,7 +166,7 @@ def test_simple_inference(self): """ Tests a simple inference and makes sure it works as expected """ - components, _, _, _ = self.get_dummy_components() + components, _, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -210,7 +180,7 @@ def test_simple_inference_with_text_lora(self): Tests a simple inference with lora attached on the text encoder and makes sure it works as expected """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -238,7 +208,7 @@ def test_simple_inference_with_text_lora_and_scale(self): Tests a simple inference with lora attached on the text encoder + scale argument and makes sure it works as expected """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -282,7 +252,7 @@ def test_simple_inference_with_text_lora_fused(self): Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model and makes sure it works as expected """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -319,7 +289,7 @@ def test_simple_inference_with_text_lora_unloaded(self): Tests a simple inference with lora attached to text encoder, then unloads the lora weights and makes sure it works as expected """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -357,7 +327,7 @@ def test_simple_inference_with_text_lora_save_load(self): """ Tests a simple usecase where users could use saving utilities for LoRA. """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -417,7 +387,7 @@ def test_simple_inference_save_pretrained(self): """ Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained """ - components, _, text_lora_config, _ = self.get_dummy_components() + components, _, text_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(self.torch_device) pipe.set_progress_bar_config(disable=None) @@ -461,572 +431,92 @@ def test_simple_inference_save_pretrained(self): "Loading from saved checkpoints should give same results.", ) - def test_simple_inference_with_text_unet_lora_save_load(self): - """ - Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) - - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - - with tempfile.TemporaryDirectory() as tmpdirname: - text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder) - unet_state_dict = get_peft_model_state_dict(pipe.unet) - if self.has_two_text_encoders: - text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2) - - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, - text_encoder_lora_layers=text_encoder_state_dict, - text_encoder_2_lora_layers=text_encoder_2_state_dict, - unet_lora_layers=unet_state_dict, - safe_serialization=False, - ) - else: - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, - text_encoder_lora_layers=text_encoder_state_dict, - unet_lora_layers=unet_state_dict, - safe_serialization=False, - ) - - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - pipe.unload_lora_weights() - - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) - - images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - - if self.has_two_text_encoders: - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - self.assertTrue( - np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", - ) - - def test_simple_inference_with_text_unet_lora_and_scale(self): - """ - Tests a simple inference with lora attached on the text encoder + Unet + scale argument - and makes sure it works as expected - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) +class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): + pipeline_class = StableDiffusionPipeline + scheduler_cls = DDIMScheduler + scheduler_kwargs = { + "beta_start": 0.00085, + "beta_end": 0.012, + "beta_schedule": "scaled_linear", + "clip_sample": False, + "set_alpha_to_one": False, + "steps_offset": 1, + } + unet_kwargs = { + "block_out_channels": (32, 64), + "layers_per_block": 2, + "sample_size": 32, + "in_channels": 4, + "out_channels": 4, + "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), + "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), + "cross_attention_dim": 32, + } + vae_kwargs = { + "block_out_channels": [32, 64], + "in_channels": 3, + "out_channels": 3, + "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], + "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], + "latent_channels": 4, + } - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + @slow + @require_torch_gpu + def test_integration_logits_with_scale(self): + path = "runwayml/stable-diffusion-v1-5" + lora_id = "takuma104/lora-test-text-encoder-lora-target" - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) + pipe.load_lora_weights(lora_id) + pipe = pipe.to("cuda") - output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images self.assertTrue( - not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + self.check_if_lora_correctly_set(pipe.text_encoder), + "Lora not correctly set in text encoder 2", ) - output_lora_scale = pipe( - **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5} - ).images - self.assertTrue( - not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), - "Lora + scale should change the output", - ) + prompt = "a red sks dog" - output_lora_0_scale = pipe( - **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0} + images = pipe( + prompt=prompt, + num_inference_steps=15, + cross_attention_kwargs={"scale": 0.5}, + generator=torch.manual_seed(0), + output_type="np", ).images - self.assertTrue( - np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), - "Lora + 0 scale should lead to same result as no LoRA", - ) - - self.assertTrue( - pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0, - "The scaling parameter has not been correctly restored!", - ) - - def test_simple_inference_with_text_lora_unet_fused(self): - """ - Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model - and makes sure it works as expected - with unet - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) - - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - pipe.fuse_lora() - # Fusing should still keep the LoRA layers - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet") - - if self.has_two_text_encoders: - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertFalse( - np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" - ) - - def test_simple_inference_with_text_unet_lora_unloaded(self): - """ - Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights - and makes sure it works as expected - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) + expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321]) - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + predicted_slice = images[0, -3:, -3:, -1].flatten() - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - pipe.unload_lora_weights() - # unloading should remove the LoRA layers - self.assertFalse( - self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" - ) - self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet") + @slow + @require_torch_gpu + def test_integration_logits_no_scale(self): + path = "runwayml/stable-diffusion-v1-5" + lora_id = "takuma104/lora-test-text-encoder-lora-target" - if self.has_two_text_encoders: - self.assertFalse( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2" - ) + pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) + pipe.load_lora_weights(lora_id) + pipe = pipe.to("cuda") - ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images self.assertTrue( - np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" + self.check_if_lora_correctly_set(pipe.text_encoder), + "Lora not correctly set in text encoder", ) - def test_simple_inference_with_text_unet_lora_unfused(self): - """ - Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights - and makes sure it works as expected - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - pipe.fuse_lora() + prompt = "a red sks dog" - output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images - pipe.unfuse_lora() + expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084]) - output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - # unloading should remove the LoRA layers - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers") + predicted_slice = images[0, -3:, -3:, -1].flatten() - if self.has_two_text_encoders: - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" - ) - - # Fuse and unfuse should lead to the same results - self.assertTrue( - np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3), - "Fused lora should change the output", - ) - - def test_simple_inference_with_text_unet_multi_adapter(self): - """ - Tests a simple inference with lora attached to text encoder and unet, attaches - multiple adapters and set them - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images - - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") - - pipe.unet.add_adapter(unet_lora_config, "adapter-1") - pipe.unet.add_adapter(unet_lora_config, "adapter-2") - - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - pipe.set_adapters("adapter-1") - - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images - - pipe.set_adapters("adapter-2") - output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images - - pipe.set_adapters(["adapter-1", "adapter-2"]) - - output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images - - # Fuse and unfuse should lead to the same results - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) - - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 1 and mixed adapters should give different results", - ) - - self.assertFalse( - np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 2 and mixed adapters should give different results", - ) - - pipe.disable_lora() - - output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images - - self.assertTrue( - np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) - - @unittest.skip("This is failing for now - need to investigate") - def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self): - """ - Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights - and makes sure it works as expected - """ - components, _, text_lora_config, unet_lora_config = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe = pipe.to(self.torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - pipe.text_encoder.add_adapter(text_lora_config) - pipe.unet.add_adapter(unet_lora_config) - - self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") - - if self.has_two_text_encoders: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) - pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True) - - if self.has_two_text_encoders: - pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True) - - # Just makes sure it works.. - _ = pipe(**inputs, generator=torch.manual_seed(0)).images - - -class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): - pipeline_class = StableDiffusionPipeline - scheduler_cls = DDIMScheduler - scheduler_kwargs = { - "beta_start": 0.00085, - "beta_end": 0.012, - "beta_schedule": "scaled_linear", - "clip_sample": False, - "set_alpha_to_one": False, - "steps_offset": 1, - } - unet_kwargs = { - "block_out_channels": (32, 64), - "layers_per_block": 2, - "sample_size": 32, - "in_channels": 4, - "out_channels": 4, - "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"), - "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"), - "cross_attention_dim": 32, - } - vae_kwargs = { - "block_out_channels": [32, 64], - "in_channels": 3, - "out_channels": 3, - "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], - "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], - "latent_channels": 4, - } - - @slow - @require_torch_gpu - def test_integration_move_lora_cpu(self): - path = "runwayml/stable-diffusion-v1-5" - lora_id = "takuma104/lora-test-text-encoder-lora-target" - - pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) - pipe.load_lora_weights(lora_id, adapter_name="adapter-1") - pipe.load_lora_weights(lora_id, adapter_name="adapter-2") - pipe = pipe.to("cuda") - - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder), - "Lora not correctly set in text encoder", - ) - - self.assertTrue( - self.check_if_lora_correctly_set(pipe.unet), - "Lora not correctly set in text encoder", - ) - - pipe.set_lora_device(["adapter-1"], "cpu") - - for name, module in pipe.unet.named_modules(): - if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)): - self.assertTrue(module.weight.device == torch.device("cpu")) - elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)): - # import pdb; pdb.set_trace() - self.assertTrue(module.weight.device != torch.device("cpu")) - - for name, module in pipe.text_encoder.named_modules(): - if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)): - self.assertTrue(module.weight.device == torch.device("cpu")) - elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)): - # import pdb; pdb.set_trace() - self.assertTrue(module.weight.device != torch.device("cpu")) - - pipe.set_lora_device(["adapter-1"], 0) - - for n, m in pipe.unet.named_modules(): - if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)): - self.assertTrue(m.weight.device != torch.device("cpu")) - - for n, m in pipe.text_encoder.named_modules(): - if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)): - self.assertTrue(m.weight.device != torch.device("cpu")) - - pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda") - - for n, m in pipe.unet.named_modules(): - if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)): - self.assertTrue(m.weight.device != torch.device("cpu")) - - for n, m in pipe.text_encoder.named_modules(): - if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)): - self.assertTrue(m.weight.device != torch.device("cpu")) - - @slow - @require_torch_gpu - def test_integration_logits_with_scale(self): - path = "runwayml/stable-diffusion-v1-5" - lora_id = "takuma104/lora-test-text-encoder-lora-target" - - pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) - pipe.load_lora_weights(lora_id) - pipe = pipe.to("cuda") - - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder), - "Lora not correctly set in text encoder 2", - ) - - prompt = "a red sks dog" - - images = pipe( - prompt=prompt, - num_inference_steps=15, - cross_attention_kwargs={"scale": 0.5}, - generator=torch.manual_seed(0), - output_type="np", - ).images - - expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321]) - - predicted_slice = images[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - - @slow - @require_torch_gpu - def test_integration_logits_no_scale(self): - path = "runwayml/stable-diffusion-v1-5" - lora_id = "takuma104/lora-test-text-encoder-lora-target" - - pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32) - pipe.load_lora_weights(lora_id) - pipe = pipe.to("cuda") - - self.assertTrue( - self.check_if_lora_correctly_set(pipe.text_encoder), - "Lora not correctly set in text encoder", - ) - - prompt = "a red sks dog" - - images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images - - expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084]) - - predicted_slice = images[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - - @nightly - @require_torch_gpu - def test_integration_logits_multi_adapter(self): - path = "stabilityai/stable-diffusion-xl-base-1.0" - lora_id = "CiroN2022/toy-face" - - pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16) - pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy") - pipe = pipe.to("cuda") - - self.assertTrue( - self.check_if_lora_correctly_set(pipe.unet), - "Lora not correctly set in Unet", - ) - - prompt = "toy_face of a hacker with a hoodie" - - lora_scale = 0.9 - - images = pipe( - prompt=prompt, - num_inference_steps=30, - generator=torch.manual_seed(0), - cross_attention_kwargs={"scale": lora_scale}, - output_type="np", - ).images - expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539]) - - predicted_slice = images[0, -3:, -3:, -1].flatten() - # import pdb; pdb.set_trace() - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - - pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") - pipe.set_adapters("pixel") - - prompt = "pixel art, a hacker with a hoodie, simple, flat colors" - images = pipe( - prompt, - num_inference_steps=30, - guidance_scale=7.5, - cross_attention_kwargs={"scale": lora_scale}, - generator=torch.manual_seed(0), - output_type="np", - ).images - - predicted_slice = images[0, -3:, -3:, -1].flatten() - expected_slice_scale = np.array( - [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889] - ) - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - - # multi-adapter inference - pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0]) - images = pipe( - prompt, - num_inference_steps=30, - guidance_scale=7.5, - cross_attention_kwargs={"scale": 1.0}, - generator=torch.manual_seed(0), - output_type="np", - ).images - predicted_slice = images[0, -3:, -3:, -1].flatten() - expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998]) - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) - - # Lora disabled - pipe.disable_lora() - images = pipe( - prompt, - num_inference_steps=30, - guidance_scale=7.5, - cross_attention_kwargs={"scale": lora_scale}, - generator=torch.manual_seed(0), - output_type="np", - ).images - predicted_slice = images[0, -3:, -3:, -1].flatten() - expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493]) - self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) + self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): @@ -1065,605 +555,3 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): "latent_channels": 4, "sample_size": 128, } - - -@slow -@require_torch_gpu -class LoraIntegrationTests(unittest.TestCase): - def tearDown(self): - import gc - - gc.collect() - torch.cuda.empty_cache() - gc.collect() - - def test_dreambooth_old_format(self): - generator = torch.Generator("cpu").manual_seed(0) - - lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example" - card = RepoCard.load(lora_model_id) - base_model_id = card.data.to_dict()["base_model"] - - pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) - pipe = pipe.to(torch_device) - pipe.load_lora_weights(lora_model_id) - - images = pipe( - "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - - expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785]) - - self.assertTrue(np.allclose(images, expected, atol=1e-4)) - release_memory(pipe) - - def test_dreambooth_text_encoder_new_format(self): - generator = torch.Generator().manual_seed(0) - - lora_model_id = "hf-internal-testing/lora-trained" - card = RepoCard.load(lora_model_id) - base_model_id = card.data.to_dict()["base_model"] - - pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) - pipe = pipe.to(torch_device) - pipe.load_lora_weights(lora_model_id) - - images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images - - images = images[0, -3:, -3:, -1].flatten() - - expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359]) - - self.assertTrue(np.allclose(images, expected, atol=1e-4)) - release_memory(pipe) - - def test_a1111(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to( - torch_device - ) - lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" - lora_filename = "light_and_shadow.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_lycoris(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained( - "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16" - ).to(torch_device) - lora_model_id = "hf-internal-testing/edgLycorisMugler-light" - lora_filename = "edgLycorisMugler-light.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_a1111_with_model_cpu_offload(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) - pipe.enable_model_cpu_offload() - lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" - lora_filename = "light_and_shadow.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_a1111_with_sequential_cpu_offload(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) - pipe.enable_sequential_cpu_offload() - lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" - lora_filename = "light_and_shadow.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_kohya_sd_v15_with_higher_dimensions(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( - torch_device - ) - lora_model_id = "hf-internal-testing/urushisato-lora" - lora_filename = "urushisato_v15.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_vanilla_funetuning(self): - generator = torch.Generator().manual_seed(0) - - lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4" - card = RepoCard.load(lora_model_id) - base_model_id = card.data.to_dict()["base_model"] - - pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None) - pipe = pipe.to(torch_device) - pipe.load_lora_weights(lora_model_id) - - images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images - - images = images[0, -3:, -3:, -1].flatten() - - expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583]) - - self.assertTrue(np.allclose(images, expected, atol=1e-4)) - release_memory(pipe) - - def test_unload_kohya_lora(self): - generator = torch.manual_seed(0) - prompt = "masterpiece, best quality, mountain" - num_inference_steps = 2 - - pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( - torch_device - ) - initial_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - initial_images = initial_images[0, -3:, -3:, -1].flatten() - - lora_model_id = "hf-internal-testing/civitai-colored-icons-lora" - lora_filename = "Colored_Icons_by_vizsumit.safetensors" - - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - generator = torch.manual_seed(0) - lora_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - lora_images = lora_images[0, -3:, -3:, -1].flatten() - - pipe.unload_lora_weights() - generator = torch.manual_seed(0) - unloaded_lora_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten() - - self.assertFalse(np.allclose(initial_images, lora_images)) - self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3)) - release_memory(pipe) - - def test_load_unload_load_kohya_lora(self): - # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded - # without introducing any side-effects. Even though the test uses a Kohya-style - # LoRA, the underlying adapter handling mechanism is format-agnostic. - generator = torch.manual_seed(0) - prompt = "masterpiece, best quality, mountain" - num_inference_steps = 2 - - pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to( - torch_device - ) - initial_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - initial_images = initial_images[0, -3:, -3:, -1].flatten() - - lora_model_id = "hf-internal-testing/civitai-colored-icons-lora" - lora_filename = "Colored_Icons_by_vizsumit.safetensors" - - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - generator = torch.manual_seed(0) - lora_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - lora_images = lora_images[0, -3:, -3:, -1].flatten() - - pipe.unload_lora_weights() - generator = torch.manual_seed(0) - unloaded_lora_images = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten() - - self.assertFalse(np.allclose(initial_images, lora_images)) - self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3)) - - # make sure we can load a LoRA again after unloading and they don't have - # any undesired effects. - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - generator = torch.manual_seed(0) - lora_images_again = pipe( - prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps - ).images - lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3)) - release_memory(pipe) - - -@slow -@require_torch_gpu -class LoraSDXLIntegrationTests(unittest.TestCase): - def tearDown(self): - import gc - - gc.collect() - torch.cuda.empty_cache() - gc.collect() - - def test_sdxl_0_9_lora_one(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora" - lora_filename = "daiton-xl-lora-test.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_sdxl_0_9_lora_two(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora" - lora_filename = "saijo.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_sdxl_0_9_lora_three(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora" - lora_filename = "kame_sdxl_v2-000020-16rank.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468]) - - self.assertTrue(np.allclose(images, expected, atol=5e-3)) - release_memory(pipe) - - def test_sdxl_1_0_lora(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.enable_model_cpu_offload() - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) - - self.assertTrue(np.allclose(images, expected, atol=1e-4)) - release_memory(pipe) - - def test_sdxl_1_0_lora_fusion(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - pipe.fuse_lora() - # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being - # silently deleted - otherwise this will CPU OOM - pipe.unload_lora_weights() - - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - # This way we also test equivalence between LoRA fusion and the non-fusion behaviour. - expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) - - self.assertTrue(np.allclose(images, expected, atol=1e-4)) - release_memory(pipe) - - def test_sdxl_1_0_lora_unfusion(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.fuse_lora() - - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - images_with_fusion = images[0, -3:, -3:, -1].flatten() - - pipe.unfuse_lora() - generator = torch.Generator().manual_seed(0) - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - images_without_fusion = images[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3)) - release_memory(pipe) - - def test_sdxl_1_0_lora_unfusion_effectivity(self): - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.enable_model_cpu_offload() - - generator = torch.Generator().manual_seed(0) - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - original_image_slice = images[0, -3:, -3:, -1].flatten() - - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.fuse_lora() - # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being - # silently deleted - otherwise this will CPU OOM - pipe.unload_lora_weights() - - generator = torch.Generator().manual_seed(0) - _ = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - pipe.unfuse_lora() - generator = torch.Generator().manual_seed(0) - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - images_without_fusion_slice = images[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3)) - release_memory(pipe) - - def test_sdxl_1_0_lora_fusion_efficiency(self): - generator = torch.Generator().manual_seed(0) - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - - pipe = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 - ) - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) - pipe.enable_model_cpu_offload() - - start_time = time.time() - for _ in range(3): - pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - end_time = time.time() - elapsed_time_non_fusion = end_time - start_time - - del pipe - - pipe = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 - ) - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) - pipe.fuse_lora() - # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being - # silently deleted - otherwise this will CPU OOM - pipe.unload_lora_weights() - - pipe.enable_model_cpu_offload() - - start_time = time.time() - generator = torch.Generator().manual_seed(0) - for _ in range(3): - pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - end_time = time.time() - elapsed_time_fusion = end_time - start_time - - self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion) - release_memory(pipe) - - def test_sdxl_1_0_last_ben(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda") - pipe.enable_model_cpu_offload() - lora_model_id = "TheLastBen/Papercut_SDXL" - lora_filename = "papercut.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_sdxl_1_0_fuse_unfuse_all(self): - pipe = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 - ) - text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict()) - text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict()) - unet_sd = copy.deepcopy(pipe.unet.state_dict()) - - pipe.load_lora_weights( - "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16 - ) - - fused_te_state_dict = pipe.text_encoder.state_dict() - fused_te_2_state_dict = pipe.text_encoder_2.state_dict() - unet_state_dict = pipe.unet.state_dict() - - for key, value in text_encoder_1_sd.items(): - self.assertTrue(torch.allclose(fused_te_state_dict[key], value)) - - for key, value in text_encoder_2_sd.items(): - self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value)) - - for key, value in unet_state_dict.items(): - self.assertTrue(torch.allclose(unet_state_dict[key], value)) - - pipe.fuse_lora() - pipe.unload_lora_weights() - - assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict()) - assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict()) - assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict()) - release_memory(pipe) - - def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.enable_sequential_cpu_offload() - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - release_memory(pipe) - - def test_canny_lora(self): - controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0") - - pipe = StableDiffusionXLControlNetPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet - ) - pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors") - pipe.enable_sequential_cpu_offload() - - generator = torch.Generator(device="cpu").manual_seed(0) - prompt = "corgi" - image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" - ) - - images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images - - assert images[0].shape == (768, 512, 3) - - original_image = images[0, -3:, -3:, -1].flatten() - expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333]) - assert np.allclose(original_image, expected_image, atol=1e-04) - release_memory(pipe) - - @nightly - def test_sequential_fuse_unfuse(self): - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - - # 1. round - pipe.load_lora_weights("Pclanglais/TintinIA") - pipe.fuse_lora() - - generator = torch.Generator().manual_seed(0) - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - image_slice = images[0, -3:, -3:, -1].flatten() - - pipe.unfuse_lora() - - # 2. round - pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style") - pipe.fuse_lora() - pipe.unfuse_lora() - - # 3. round - pipe.load_lora_weights("ostris/crayon_style_lora_sdxl") - pipe.fuse_lora() - pipe.unfuse_lora() - - # 4. back to 1st round - pipe.load_lora_weights("Pclanglais/TintinIA") - pipe.fuse_lora() - - generator = torch.Generator().manual_seed(0) - images_2 = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - image_slice_2 = images_2[0, -3:, -3:, -1].flatten() - - self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3)) - release_memory(pipe) From 06d0b8ac92dc77525048ea4b486b3d62257f78b5 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Thu, 12 Oct 2023 15:54:21 +0000 Subject: [PATCH 3/3] remove ptl from test runner images --- docker/diffusers-pytorch-compile-cuda/Dockerfile | 3 +-- docker/diffusers-pytorch-cuda/Dockerfile | 3 +-- docker/diffusers-pytorch-xformers-cuda/Dockerfile | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile index 1f7fe063b70d..ecdd5f7b9785 100644 --- a/docker/diffusers-pytorch-compile-cuda/Dockerfile +++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile @@ -41,7 +41,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \ scipy \ tensorboard \ transformers \ - omegaconf \ - pytorch-lightning + omegaconf CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile index b8e2af01f995..3a2de5167946 100644 --- a/docker/diffusers-pytorch-cuda/Dockerfile +++ b/docker/diffusers-pytorch-cuda/Dockerfile @@ -40,7 +40,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \ scipy \ tensorboard \ transformers \ - omegaconf \ - pytorch-lightning + omegaconf CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile index 4c447749da7b..95fe933798bc 100644 --- a/docker/diffusers-pytorch-xformers-cuda/Dockerfile +++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile @@ -41,7 +41,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \ tensorboard \ transformers \ omegaconf \ - pytorch-lightning \ xformers CMD ["/bin/bash"]