diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index c31e179c7628..a15a5412c4e4 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -1,10 +1,11 @@ -name: Slow tests on main +name: Slow Tests on main on: push: branches: - main + env: DIFFUSERS_IS_CI: yes HF_HOME: /mnt/cache @@ -12,53 +13,115 @@ env: MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 600 RUN_SLOW: yes + PIPELINE_USAGE_CUTOFF: 50000 jobs: - run_slow_tests: + setup_torch_cuda_pipeline_matrix: + name: Setup Torch Pipelines CUDA Slow Tests Matrix + runs-on: docker-gpu + container: + image: diffusers/diffusers-pytorch-cpu # this is a CPU image, but we need it to fetch the matrix + options: --shm-size "16gb" --ipc host + outputs: + pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }} + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate.git + + - name: Environment + run: | + python utils/print_env.py + + - name: Fetch Pipeline Matrix + id: fetch_pipeline_matrix + run: | + matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py) + echo $matrix + echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT + + - name: Pipeline Tests Artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: test-pipelines.json + path: reports + + torch_pipelines_cuda_tests: + name: Torch Pipelines CUDA Slow Tests + needs: setup_torch_cuda_pipeline_matrix strategy: fail-fast: false max-parallel: 1 matrix: - config: - - name: Slow PyTorch CUDA tests on Ubuntu - framework: pytorch - runner: docker-gpu - image: diffusers/diffusers-pytorch-cuda - report: torch_cuda - - name: Slow Flax TPU tests on Ubuntu - framework: flax - runner: docker-tpu - image: diffusers/diffusers-flax-tpu - report: flax_tpu - - name: Slow ONNXRuntime CUDA tests on Ubuntu - framework: onnxruntime - runner: docker-gpu - image: diffusers/diffusers-onnxruntime-cuda - report: onnx_cuda - - name: ${{ matrix.config.name }} - - runs-on: ${{ matrix.config.runner }} - + module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }} + runs-on: docker-gpu container: - image: ${{ matrix.config.image }} - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}} - + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - name: NVIDIA-SMI + run: | + nvidia-smi + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate.git + - name: Environment + run: | + python utils/print_env.py + - name: Slow PyTorch CUDA checkpoint tests on Ubuntu + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_pipeline_${{ matrix.module }}_cuda \ + tests/pipelines/${{ matrix.module }} + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt + cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: pipeline_${{ matrix.module }}_test_reports + path: reports + + torch_cuda_tests: + name: Torch CUDA Tests + runs-on: docker-gpu + container: + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 defaults: run: shell: bash - + strategy: + matrix: + module: [models, schedulers, lora, others] steps: - name: Checkout diffusers uses: actions/checkout@v3 with: fetch-depth: 2 - - name: NVIDIA-SMI - if : ${{ matrix.config.runner == 'docker-gpu' }} - run: | - nvidia-smi - - name: Install dependencies run: | apt-get update && apt-get install libsndfile1-dev libgl1 -y @@ -70,47 +133,121 @@ jobs: python utils/print_env.py - name: Run slow PyTorch CUDA tests - if: ${{ matrix.config.framework == 'pytorch' }} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms CUBLAS_WORKSPACE_CONFIG: :16:8 - run: | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ - -s -v -k "not Flax and not Onnx and not compile" \ - --make-reports=tests_${{ matrix.config.report }} \ - tests/ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_torch_cuda \ + tests/${{ matrix.module }} + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_torch_cuda_stats.txt + cat reports/tests_torch_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: torch_cuda_test_reports + path: reports + + flax_tpu_tests: + name: Flax TPU Tests + runs-on: docker-tpu + container: + image: diffusers/diffusers-flax-tpu + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate.git + + - name: Environment + run: | + python utils/print_env.py - name: Run slow Flax TPU tests - if: ${{ matrix.config.framework == 'flax' }} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 0 \ -s -v -k "Flax" \ - --make-reports=tests_${{ matrix.config.report }} \ + --make-reports=tests_flax_tpu \ tests/ + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_flax_tpu_stats.txt + cat reports/tests_flax_tpu_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: flax_tpu_test_reports + path: reports + + onnx_cuda_tests: + name: ONNX CUDA Tests + runs-on: docker-gpu + container: + image: diffusers/diffusers-onnxruntime-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate.git + + - name: Environment + run: | + python utils/print_env.py + - name: Run slow ONNXRuntime CUDA tests - if: ${{ matrix.config.framework == 'onnxruntime' }} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ -s -v -k "Onnx" \ - --make-reports=tests_${{ matrix.config.report }} \ + --make-reports=tests_onnx_cuda \ tests/ - name: Failure short reports if: ${{ failure() }} - run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt + run: | + cat reports/tests_onnx_cuda_stats.txt + cat reports/tests_onnx_cuda_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: - name: ${{ matrix.config.report }}_test_reports + name: onnx_cuda_test_reports path: reports run_torch_compile_tests: @@ -131,21 +268,17 @@ jobs: - name: NVIDIA-SMI run: | nvidia-smi - - name: Install dependencies run: | python -m pip install -e .[quality,test,training] - - name: Environment run: | python utils/print_env.py - - name: Run example tests on GPU env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/ - - name: Failure short reports if: ${{ failure() }} run: cat reports/tests_torch_compile_cuda_failures_short.txt @@ -192,11 +325,13 @@ jobs: - name: Failure short reports if: ${{ failure() }} - run: cat reports/examples_torch_cuda_failures_short.txt + run: | + cat reports/examples_torch_cuda_stats.txt + cat reports/examples_torch_cuda_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: name: examples_test_reports - path: reports + path: reports \ No newline at end of file diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py index e10a8624f068..58f003960e99 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py @@ -213,7 +213,7 @@ def prepare_control_image( do_center_crop=False, do_normalize=False, return_tensors="pt", - )["pixel_values"].to(self.device) + )["pixel_values"].to(device) image_batch_size = image.shape[0] if image_batch_size == 1: @@ -365,7 +365,7 @@ def __call__( height=height, batch_size=batch_size, num_images_per_prompt=1, - device=self.device, + device=device, dtype=self.controlnet.dtype, do_classifier_free_guidance=do_classifier_free_guidance, ) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 9fbdbafd52dd..adf950a1df29 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -765,8 +765,9 @@ def __call__( if needs_upcasting: self.upcast_vae() - latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype) + # Ensure latents are always the same type as the VAE + latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype) image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] # cast back to fp16 if needed diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index ae90f8b6a4b8..d616ef8c78b8 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1554,7 +1554,7 @@ def test_lora_on_off(self, expected_max_diff=1e-3): torch_device != "cuda" or not is_xformers_available(), reason="XFormers attention is only available with CUDA and `xformers` installed", ) - def test_lora_xformers_on_off(self, expected_max_diff=1e-4): + def test_lora_xformers_on_off(self, expected_max_diff=6e-4): # enable deterministic behavior for gradient checkpointing init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() diff --git a/tests/pipelines/controlnet/test_controlnet_inpaint.py b/tests/pipelines/controlnet/test_controlnet_inpaint.py index 1ec1f493b9f0..a9140f3d5a31 100644 --- a/tests/pipelines/controlnet/test_controlnet_inpaint.py +++ b/tests/pipelines/controlnet/test_controlnet_inpaint.py @@ -39,6 +39,7 @@ enable_full_determinism, floats_tensor, load_numpy, + numpy_cosine_similarity_distance, require_torch_gpu, slow, torch_device, @@ -550,7 +551,7 @@ def make_inpaint_condition(image, image_mask): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/boy_ray_ban.npy" ) - assert np.abs(expected_image - image).max() < 0.9e-1 + assert numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten()) < 1e-2 def test_load_local(self): controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") diff --git a/tests/pipelines/kandinsky_v22/__init__.py b/tests/pipelines/kandinsky2_2/__init__.py similarity index 100% rename from tests/pipelines/kandinsky_v22/__init__.py rename to tests/pipelines/kandinsky2_2/__init__.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky.py b/tests/pipelines/kandinsky2_2/test_kandinsky.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky.py rename to tests/pipelines/kandinsky2_2/test_kandinsky.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_combined.py b/tests/pipelines/kandinsky2_2/test_kandinsky_combined.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_combined.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_combined.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py similarity index 98% rename from tests/pipelines/kandinsky_v22/test_kandinsky_controlnet.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py index cec209c7cfec..74a912faa33f 100644 --- a/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet.py +++ b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py @@ -221,6 +221,9 @@ def test_kandinsky_controlnet(self): def test_float16_inference(self): super().test_float16_inference(expected_max_diff=1e-1) + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=5e-4) + @nightly @require_torch_gpu diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet_img2img.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_controlnet_img2img.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_img2img.py b/tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_img2img.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_inpaint.py b/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_inpaint.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py b/tests/pipelines/kandinsky2_2/test_kandinsky_prior.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_prior.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_prior.py diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py b/tests/pipelines/kandinsky2_2/test_kandinsky_prior_emb2emb.py similarity index 100% rename from tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py rename to tests/pipelines/kandinsky2_2/test_kandinsky_prior_emb2emb.py diff --git a/tests/pipelines/test_pipelines_flax.py b/tests/pipelines/test_pipelines_flax.py index 294dad5ff0f1..fa2283d7a6b9 100644 --- a/tests/pipelines/test_pipelines_flax.py +++ b/tests/pipelines/test_pipelines_flax.py @@ -110,7 +110,7 @@ def test_stable_diffusion_v1_4(self): assert images.shape == (num_samples, 1, 512, 512, 3) if jax.device_count() == 8: - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-3 + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-2 assert np.abs((np.abs(images, dtype=np.float32).sum() - 2383808.2)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16(self): @@ -139,7 +139,7 @@ def test_stable_diffusion_v1_4_bfloat_16(self): assert images.shape == (num_samples, 1, 512, 512, 3) if jax.device_count() == 8: - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 1e-3 + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 5e-2 assert np.abs((np.abs(images, dtype=np.float32).sum() - 2373516.75)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16_with_safety(self): @@ -168,7 +168,7 @@ def test_stable_diffusion_v1_4_bfloat_16_with_safety(self): assert images.shape == (num_samples, 1, 512, 512, 3) if jax.device_count() == 8: - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 1e-3 + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 5e-2 assert np.abs((np.abs(images, dtype=np.float32).sum() - 2373516.75)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16_ddim(self): @@ -212,7 +212,7 @@ def test_stable_diffusion_v1_4_bfloat_16_ddim(self): assert images.shape == (num_samples, 1, 512, 512, 3) if jax.device_count() == 8: - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 1e-3 + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 5e-2 assert np.abs((np.abs(images, dtype=np.float32).sum() - 2347693.5)) < 5e-1 def test_jax_memory_efficient_attention(self): diff --git a/tests/pipelines/text_to_video/__init__.py b/tests/pipelines/text_to_video_synthesis/__init__.py similarity index 100% rename from tests/pipelines/text_to_video/__init__.py rename to tests/pipelines/text_to_video_synthesis/__init__.py diff --git a/tests/pipelines/text_to_video/test_text_to_video.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py similarity index 100% rename from tests/pipelines/text_to_video/test_text_to_video.py rename to tests/pipelines/text_to_video_synthesis/test_text_to_video.py diff --git a/tests/pipelines/text_to_video/test_text_to_video_zero.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py similarity index 100% rename from tests/pipelines/text_to_video/test_text_to_video_zero.py rename to tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py diff --git a/tests/pipelines/text_to_video/test_video_to_video.py b/tests/pipelines/text_to_video_synthesis/test_video_to_video.py similarity index 100% rename from tests/pipelines/text_to_video/test_video_to_video.py rename to tests/pipelines/text_to_video_synthesis/test_video_to_video.py diff --git a/utils/fetch_torch_cuda_pipeline_test_matrix.py b/utils/fetch_torch_cuda_pipeline_test_matrix.py new file mode 100644 index 000000000000..41a9c1c8270d --- /dev/null +++ b/utils/fetch_torch_cuda_pipeline_test_matrix.py @@ -0,0 +1,96 @@ +import json +import logging +import os +from collections import defaultdict +from pathlib import Path + +from huggingface_hub import HfApi, ModelFilter + +import diffusers + + +PATH_TO_REPO = Path(__file__).parent.parent.resolve() +ALWAYS_TEST_PIPELINE_MODULES = [ + "controlnet", + "stable_diffusion", + "stable_diffusion_2", + "stable_diffusion_xl", + "deepfloyd_if", + "kandinsky", + "kandinsky2_2", + "text_to_video_synthesis", + "wuerstchen", +] +PIPELINE_USAGE_CUTOFF = int(os.getenv("PIPELINE_USAGE_CUTOFF", 50000)) + +logger = logging.getLogger(__name__) +api = HfApi() +filter = ModelFilter(library="diffusers") + + +def filter_pipelines(usage_dict, usage_cutoff=10000): + output = [] + for diffusers_object, usage in usage_dict.items(): + if usage < usage_cutoff: + continue + + if "Pipeline" in diffusers_object: + output.append(diffusers_object) + + return output + + +def fetch_pipeline_objects(): + models = api.list_models(filter=filter) + downloads = defaultdict(int) + + for model in models: + is_counted = False + for tag in model.tags: + if tag.startswith("diffusers:"): + is_counted = True + downloads[tag[len("diffusers:") :]] += model.downloads + + if not is_counted: + downloads["other"] += model.downloads + + # Remove 0 downloads + downloads = {k: v for k, v in downloads.items() if v > 0} + pipeline_objects = filter_pipelines(downloads, PIPELINE_USAGE_CUTOFF) + + return pipeline_objects + + +def fetch_pipeline_modules_to_test(): + try: + pipeline_objects = fetch_pipeline_objects() + except Exception as e: + logger.error(e) + raise RuntimeError("Unable to fetch model list from HuggingFace Hub.") + + test_modules = [] + for pipeline_name in pipeline_objects: + module = getattr(diffusers, pipeline_name) + test_module = module.__module__.split(".")[-2].strip() + test_modules.append(test_module) + + return test_modules + + +def main(): + test_modules = fetch_pipeline_modules_to_test() + test_modules.extend(ALWAYS_TEST_PIPELINE_MODULES) + + # Get unique modules + test_modules = list(set(test_modules)) + print(json.dumps(test_modules)) + + save_path = f"{PATH_TO_REPO}/reports" + os.makedirs(save_path, exist_ok=True) + + with open(f"{save_path}/test-pipelines.json", "w") as f: + json.dump({"pipeline_test_modules": test_modules}, f) + + +if __name__ == "__main__": + main()