Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
- diffusers-pytorch-cpu
- diffusers-pytorch-cuda
- diffusers-pytorch-compile-cuda
- diffusers-pytorch-xformers-cuda
- diffusers-flax-cpu
- diffusers-flax-tpu
- diffusers-onnxruntime-cpu
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/push_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,46 @@ jobs:
name: torch_compile_test_reports
path: reports

run_xformers_tests:
name: PyTorch xformers CUDA tests

runs-on: docker-gpu

container:
image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/

steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Install dependencies
run: |
python -m pip install -e .[quality,test,training]
- name: Environment
run: |
python utils/print_env.py
- name: Run example tests on GPU
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/

ok for now, but I'm not a huge fan of using the -k "xformers" option in our runners as this is quite brittle. I'd prefer to start marking the tests like we do in Transformers: https://github.com/huggingface/transformers/blob/1e3c9ddacc7fc4142253bc9ddcba85c4d5b977e7/tests/test_modeling_common.py#L2746 so that we can then run:

pytest -m xformers tests/....

- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: torch_xformers_test_reports
path: reports

run_examples_tests:
name: Examples PyTorch CUDA tests on Ubuntu

Expand Down
4 changes: 1 addition & 3 deletions docker/diffusers-pytorch-compile-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
scipy \
tensorboard \
transformers \
omegaconf \
pytorch-lightning \
xformers
omegaconf

CMD ["/bin/bash"]
8 changes: 3 additions & 5 deletions docker/diffusers-pytorch-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.0.1 \
torchvision==0.15.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
Expand All @@ -40,8 +40,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
scipy \
tensorboard \
transformers \
omegaconf \
pytorch-lightning \
xformers
omegaconf

CMD ["/bin/bash"]
46 changes: 46 additions & 0 deletions docker/diffusers-pytorch-xformers-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"

ENV DEBIAN_FRONTEND=noninteractive

RUN apt update && \
apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.8 \
python3-pip \
python3.8-venv && \
rm -rf /var/lib/apt/lists

# make sure to use venv
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.0.1 \
torchvision==0.15.2 \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy \
scipy \
tensorboard \
transformers \
omegaconf \
xformers

CMD ["/bin/bash"]
21 changes: 16 additions & 5 deletions tests/lora/test_lora_layers_old_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ def create_lora_weight_file(self, tmpdirname):
)
self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))

@unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
def test_stable_diffusion_attn_processors(self):
@unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda")
def test_stable_diffusion_xformers_attn_processors(self):
# disable_full_determinism()
device = "cuda" # ensure determinism for the device-dependent torch.Generator
components, _ = self.get_dummy_components()
Expand All @@ -304,12 +304,23 @@ def test_stable_diffusion_attn_processors(self):

_, _, inputs = self.get_dummy_inputs()

# run normal sd pipe
# run xformers attention
sd_pipe.enable_xformers_memory_efficient_attention()
image = sd_pipe(**inputs).images
assert image.shape == (1, 64, 64, 3)

# run xformers attention
sd_pipe.enable_xformers_memory_efficient_attention()
@unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
def test_stable_diffusion_attn_processors(self):
# disable_full_determinism()
device = "cuda" # ensure determinism for the device-dependent torch.Generator
components, _ = self.get_dummy_components()
sd_pipe = StableDiffusionPipeline(**components)
sd_pipe = sd_pipe.to(device)
sd_pipe.set_progress_bar_config(disable=None)

_, _, inputs = self.get_dummy_inputs()

# run normal sd pipe
image = sd_pipe(**inputs).images
assert image.shape == (1, 64, 64, 3)

Expand Down
31 changes: 28 additions & 3 deletions tests/models/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from diffusers.models import UNet2DConditionModel
from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor
from diffusers.training_utils import EMAModel
from diffusers.utils import logging
from diffusers.utils import is_xformers_available, logging
from diffusers.utils.testing_utils import (
CaptureLogger,
require_python39_or_higher,
Expand Down Expand Up @@ -269,6 +269,32 @@ def test_getattr_is_correct(self):

assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'"

@unittest.skipIf(
torch_device != "cuda" or not is_xformers_available(),
reason="XFormers attention is only available with CUDA and `xformers` installed",
)
def test_set_xformers_attn_processor_for_determinism(self):
torch.use_deterministic_algorithms(False)
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict)
model.to(torch_device)

if not hasattr(model, "set_attn_processor"):
# If not has `set_attn_processor`, skip test
return

model.set_default_attn_processor()
assert all(type(proc) == AttnProcessor for proc in model.attn_processors.values())
with torch.no_grad():
output = model(**inputs_dict)[0]

model.enable_xformers_memory_efficient_attention()
assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
with torch.no_grad():
output_2 = model(**inputs_dict)[0]

assert torch.allclose(output, output_2, atol=self.base_precision)

@require_torch_gpu
def test_set_attn_processor_for_determinism(self):
torch.use_deterministic_algorithms(False)
Expand All @@ -292,7 +318,7 @@ def test_set_attn_processor_for_determinism(self):
model.enable_xformers_memory_efficient_attention()
assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
with torch.no_grad():
output_3 = model(**inputs_dict)[0]
model(**inputs_dict)[0]

model.set_attn_processor(AttnProcessor2_0())
assert all(type(proc) == AttnProcessor2_0 for proc in model.attn_processors.values())
Expand All @@ -313,7 +339,6 @@ def test_set_attn_processor_for_determinism(self):

# make sure that outputs match
assert torch.allclose(output_2, output_1, atol=self.base_precision)
assert torch.allclose(output_2, output_3, atol=self.base_precision)
assert torch.allclose(output_2, output_4, atol=self.base_precision)
assert torch.allclose(output_2, output_5, atol=self.base_precision)
assert torch.allclose(output_2, output_6, atol=self.base_precision)
Expand Down