diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index f8bd15c46cdd..937ad07496b9 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -27,6 +27,7 @@ jobs: - diffusers-pytorch-cpu - diffusers-pytorch-cuda - diffusers-pytorch-compile-cuda + - diffusers-pytorch-xformers-cuda - diffusers-flax-cpu - diffusers-flax-tpu - diffusers-onnxruntime-cpu diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index a15a5412c4e4..5fadd095be35 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -290,6 +290,46 @@ jobs: name: torch_compile_test_reports path: reports + run_xformers_tests: + name: PyTorch xformers CUDA tests + + runs-on: docker-gpu + + container: + image: diffusers/diffusers-pytorch-xformers-cuda + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ + + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: NVIDIA-SMI + run: | + nvidia-smi + - name: Install dependencies + run: | + python -m pip install -e .[quality,test,training] + - name: Environment + run: | + python utils/print_env.py + - name: Run example tests on GPU + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ + - name: Failure short reports + if: ${{ failure() }} + run: cat reports/tests_torch_xformers_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: torch_xformers_test_reports + path: reports + run_examples_tests: name: Examples PyTorch CUDA tests on Ubuntu diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile index a41be50f9d58..ecdd5f7b9785 100644 --- a/docker/diffusers-pytorch-compile-cuda/Dockerfile +++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile @@ -41,8 +41,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \ scipy \ tensorboard \ transformers \ - omegaconf \ - pytorch-lightning \ - xformers + omegaconf CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile index 4c447749da7b..3a2de5167946 100644 --- a/docker/diffusers-pytorch-cuda/Dockerfile +++ b/docker/diffusers-pytorch-cuda/Dockerfile @@ -25,8 +25,8 @@ ENV PATH="/opt/venv/bin:$PATH" # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) RUN python3 -m pip install --no-cache-dir --upgrade pip && \ python3 -m pip install --no-cache-dir \ - torch==2.0.1 \ - torchvision==0.15.2 \ + torch \ + torchvision \ torchaudio \ invisible_watermark && \ python3 -m pip install --no-cache-dir \ @@ -40,8 +40,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \ scipy \ tensorboard \ transformers \ - omegaconf \ - pytorch-lightning \ - xformers + omegaconf CMD ["/bin/bash"] diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile new file mode 100644 index 000000000000..95fe933798bc --- /dev/null +++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile @@ -0,0 +1,46 @@ +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + libsndfile1-dev \ + libgl1 \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch==2.0.1 \ + torchvision==0.15.2 \ + torchaudio \ + invisible_watermark && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + Jinja2 \ + librosa \ + numpy \ + scipy \ + tensorboard \ + transformers \ + omegaconf \ + xformers + +CMD ["/bin/bash"] diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index d616ef8c78b8..893429670daa 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -293,8 +293,8 @@ def create_lora_weight_file(self, tmpdirname): ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda") - def test_stable_diffusion_attn_processors(self): + @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda") + def test_stable_diffusion_xformers_attn_processors(self): # disable_full_determinism() device = "cuda" # ensure determinism for the device-dependent torch.Generator components, _ = self.get_dummy_components() @@ -304,12 +304,23 @@ def test_stable_diffusion_attn_processors(self): _, _, inputs = self.get_dummy_inputs() - # run normal sd pipe + # run xformers attention + sd_pipe.enable_xformers_memory_efficient_attention() image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) - # run xformers attention - sd_pipe.enable_xformers_memory_efficient_attention() + @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda") + def test_stable_diffusion_attn_processors(self): + # disable_full_determinism() + device = "cuda" # ensure determinism for the device-dependent torch.Generator + components, _ = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + _, _, inputs = self.get_dummy_inputs() + + # run normal sd pipe image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py index 8b6d279bbe6d..80c97978723c 100644 --- a/tests/models/test_modeling_common.py +++ b/tests/models/test_modeling_common.py @@ -30,7 +30,7 @@ from diffusers.models import UNet2DConditionModel from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor from diffusers.training_utils import EMAModel -from diffusers.utils import logging +from diffusers.utils import is_xformers_available, logging from diffusers.utils.testing_utils import ( CaptureLogger, require_python39_or_higher, @@ -269,6 +269,32 @@ def test_getattr_is_correct(self): assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'" + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_set_xformers_attn_processor_for_determinism(self): + torch.use_deterministic_algorithms(False) + init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + model = self.model_class(**init_dict) + model.to(torch_device) + + if not hasattr(model, "set_attn_processor"): + # If not has `set_attn_processor`, skip test + return + + model.set_default_attn_processor() + assert all(type(proc) == AttnProcessor for proc in model.attn_processors.values()) + with torch.no_grad(): + output = model(**inputs_dict)[0] + + model.enable_xformers_memory_efficient_attention() + assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values()) + with torch.no_grad(): + output_2 = model(**inputs_dict)[0] + + assert torch.allclose(output, output_2, atol=self.base_precision) + @require_torch_gpu def test_set_attn_processor_for_determinism(self): torch.use_deterministic_algorithms(False) @@ -292,7 +318,7 @@ def test_set_attn_processor_for_determinism(self): model.enable_xformers_memory_efficient_attention() assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values()) with torch.no_grad(): - output_3 = model(**inputs_dict)[0] + model(**inputs_dict)[0] model.set_attn_processor(AttnProcessor2_0()) assert all(type(proc) == AttnProcessor2_0 for proc in model.attn_processors.values()) @@ -313,7 +339,6 @@ def test_set_attn_processor_for_determinism(self): # make sure that outputs match assert torch.allclose(output_2, output_1, atol=self.base_precision) - assert torch.allclose(output_2, output_3, atol=self.base_precision) assert torch.allclose(output_2, output_4, atol=self.base_precision) assert torch.allclose(output_2, output_5, atol=self.base_precision) assert torch.allclose(output_2, output_6, atol=self.base_precision)