huggingface · DN6 · Oct 4, 2023 · Sep 20, 2023 · Sep 20, 2023 · Sep 20, 2023
diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
@@ -1,64 +1,127 @@
-name: Slow tests on main
+name: Slow Tests on main
 
 on:
   push:
     branches:
       - main
 
+
 env:
   DIFFUSERS_IS_CI: yes
   HF_HOME: /mnt/cache
   OMP_NUM_THREADS: 8
   MKL_NUM_THREADS: 8
   PYTEST_TIMEOUT: 600
   RUN_SLOW: yes
+  PIPELINE_USAGE_CUTOFF: 50000
 
 jobs:
-  run_slow_tests:
+  setup_torch_cuda_pipeline_matrix:
+    name: Setup Torch Pipelines CUDA Slow Tests Matrix
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-pytorch-cpu # this is a CPU image, but we need it to fetch the matrix
+      options: --shm-size "16gb" --ipc host
+    outputs:
+      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m pip install -e .[quality,test]
+          python -m pip install git+https://github.com/huggingface/accelerate.git
+
+      - name: Environment
+        run: |
+          python utils/print_env.py
+
+      - name: Fetch Pipeline Matrix
+        id: fetch_pipeline_matrix
+        run: |
+          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
+          echo $matrix
+          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
+
+      - name: Pipeline Tests Artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-pipelines.json
+          path: reports
+
+  torch_pipelines_cuda_tests:
+    name: Torch Pipelines CUDA Slow Tests
+    needs: setup_torch_cuda_pipeline_matrix
     strategy:
       fail-fast: false
       max-parallel: 1
       matrix:
-        config:
-          - name: Slow PyTorch CUDA tests on Ubuntu
-            framework: pytorch
-            runner: docker-gpu
-            image: diffusers/diffusers-pytorch-cuda
-            report: torch_cuda
-          - name: Slow Flax TPU tests on Ubuntu
-            framework: flax
-            runner: docker-tpu
-            image: diffusers/diffusers-flax-tpu
-            report: flax_tpu
-          - name: Slow ONNXRuntime CUDA tests on Ubuntu
-            framework: onnxruntime
-            runner: docker-gpu
-            image: diffusers/diffusers-onnxruntime-cuda
-            report: onnx_cuda
-
-    name: ${{ matrix.config.name }}
-
-    runs-on: ${{ matrix.config.runner }}
-
+        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
+    runs-on: docker-gpu
     container:
-      image: ${{ matrix.config.image }}
-      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}}
-
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m pip install -e .[quality,test]
+          python -m pip install git+https://github.com/huggingface/accelerate.git
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
+        env:
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -s -v -k "not Flax and not Onnx" \
+            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+            tests/pipelines/${{ matrix.module }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: pipeline_${{ matrix.module }}_test_reports
+          path: reports
+
+  torch_cuda_tests:
+    name: Torch CUDA Tests
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
     defaults:
       run:
         shell: bash
-
+    strategy:
+      matrix:
+        module: [models, schedulers, lora, others]
     steps:
     - name: Checkout diffusers
       uses: actions/checkout@v3
       with:
         fetch-depth: 2
 
-    - name: NVIDIA-SMI
-      if : ${{ matrix.config.runner == 'docker-gpu' }}
-      run: |
-        nvidia-smi
-
     - name: Install dependencies
       run: |
         apt-get update && apt-get install libsndfile1-dev libgl1 -y
@@ -70,47 +133,121 @@ jobs:
         python utils/print_env.py
 
     - name: Run slow PyTorch CUDA tests
-      if: ${{ matrix.config.framework == 'pytorch' }}
       env:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
         CUBLAS_WORKSPACE_CONFIG: :16:8
-
       run: |
         python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -s -v -k "not Flax and not Onnx and not compile" \
-          --make-reports=tests_${{ matrix.config.report }} \
-          tests/
+          -s -v -k "not Flax and not Onnx" \
+          --make-reports=tests_torch_cuda \
+          tests/${{ matrix.module }}
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_torch_cuda_stats.txt
+        cat reports/tests_torch_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_cuda_test_reports
+        path: reports
+
+  flax_tpu_tests:
+    name: Flax TPU Tests
+    runs-on: docker-tpu
+    container:
+      image: diffusers/diffusers-flax-tpu
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
 
     - name: Run slow Flax TPU tests
-      if: ${{ matrix.config.framework == 'flax' }}
       env:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       run: |
         python -m pytest -n 0 \
           -s -v -k "Flax" \
-          --make-reports=tests_${{ matrix.config.report }} \
+          --make-reports=tests_flax_tpu \
           tests/
 
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_flax_tpu_stats.txt
+        cat reports/tests_flax_tpu_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: flax_tpu_test_reports
+        path: reports
+
+  onnx_cuda_tests:
+    name: ONNX CUDA Tests
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-onnxruntime-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
+
     - name: Run slow ONNXRuntime CUDA tests
-      if: ${{ matrix.config.framework == 'onnxruntime' }}
       env:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       run: |
         python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
           -s -v -k "Onnx" \
-          --make-reports=tests_${{ matrix.config.report }} \
+          --make-reports=tests_onnx_cuda \
           tests/
 
     - name: Failure short reports
       if: ${{ failure() }}
-      run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
+      run: |
+        cat reports/tests_onnx_cuda_stats.txt
+        cat reports/tests_onnx_cuda_failures_short.txt
 
     - name: Test suite reports artifacts
       if: ${{ always() }}
       uses: actions/upload-artifact@v2
       with:
-        name: ${{ matrix.config.report }}_test_reports
+        name: onnx_cuda_test_reports
         path: reports
 
   run_torch_compile_tests:
@@ -131,21 +268,17 @@ jobs:
     - name: NVIDIA-SMI
       run: |
         nvidia-smi
-
     - name: Install dependencies
       run: |
         python -m pip install -e .[quality,test,training]
-
     - name: Environment
       run: |
         python utils/print_env.py
-
     - name: Run example tests on GPU
       env:
         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       run: |
         python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
-
     - name: Failure short reports
       if: ${{ failure() }}
       run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -192,11 +325,13 @@ jobs:
 
     - name: Failure short reports
       if: ${{ failure() }}
-      run: cat reports/examples_torch_cuda_failures_short.txt
+      run: |
+        cat reports/examples_torch_cuda_stats.txt
+        cat reports/examples_torch_cuda_failures_short.txt
 
     - name: Test suite reports artifacts
       if: ${{ always() }}
       uses: actions/upload-artifact@v2
       with:
         name: examples_test_reports
-        path: reports
+        path: reports
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py
@@ -213,7 +213,7 @@ def prepare_control_image(
             do_center_crop=False,
             do_normalize=False,
             return_tensors="pt",
-        )["pixel_values"].to(self.device)
+        )["pixel_values"].to(device)
         image_batch_size = image.shape[0]
 
         if image_batch_size == 1:
@@ -365,7 +365,7 @@ def __call__(
             height=height,
             batch_size=batch_size,
             num_images_per_prompt=1,
-            device=self.device,
+            device=device,
             dtype=self.controlnet.dtype,
             do_classifier_free_guidance=do_classifier_free_guidance,
         )

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -765,8 +765,9 @@ def __call__(
 
             if needs_upcasting:
                 self.upcast_vae()
-                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
 
+            # Ensure latents are always the same type as the VAE
+            latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
 
             # cast back to fp16 if needed

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
@@ -1554,7 +1554,7 @@ def test_lora_on_off(self, expected_max_diff=1e-3):
         torch_device != "cuda" or not is_xformers_available(),
         reason="XFormers attention is only available with CUDA and `xformers` installed",
     )
-    def test_lora_xformers_on_off(self, expected_max_diff=1e-4):
+    def test_lora_xformers_on_off(self, expected_max_diff=6e-4):
         # enable deterministic behavior for gradient checkpointing
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
 

diff --git a/tests/pipelines/controlnet/test_controlnet_inpaint.py b/tests/pipelines/controlnet/test_controlnet_inpaint.py
@@ -39,6 +39,7 @@
     enable_full_determinism,
     floats_tensor,
     load_numpy,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
     slow,
     torch_device,
@@ -550,7 +551,7 @@ def make_inpaint_condition(image, image_mask):
             "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/boy_ray_ban.npy"
         )
 
-        assert np.abs(expected_image - image).max() < 0.9e-1
+        assert numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten()) < 1e-2
 
     def test_load_local(self):
         controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")

diff --git a/tests/pipelines/kandinsky_v22/__init__.py → tests/pipelines/kandinsky2_2/__init__.py b/tests/pipelines/kandinsky_v22/__init__.py → tests/pipelines/kandinsky2_2/__init__.py
diff --git a/...pipelines/kandinsky_v22/test_kandinsky.py → .../pipelines/kandinsky2_2/test_kandinsky.py b/...pipelines/kandinsky_v22/test_kandinsky.py → .../pipelines/kandinsky2_2/test_kandinsky.py
diff --git a/.../kandinsky_v22/test_kandinsky_combined.py → ...s/kandinsky2_2/test_kandinsky_combined.py b/.../kandinsky_v22/test_kandinsky_combined.py → ...s/kandinsky2_2/test_kandinsky_combined.py
diff --git a/...andinsky_v22/test_kandinsky_controlnet.py → ...kandinsky2_2/test_kandinsky_controlnet.py b/...andinsky_v22/test_kandinsky_controlnet.py → ...kandinsky2_2/test_kandinsky_controlnet.py
@@ -221,6 +221,9 @@ def test_kandinsky_controlnet(self):
     def test_float16_inference(self):
         super().test_float16_inference(expected_max_diff=1e-1)
 
+    def test_inference_batch_single_identical(self):
+        super().test_inference_batch_single_identical(expected_max_diff=5e-4)
+
 
 @nightly
 @require_torch_gpu

diff --git a/..._v22/test_kandinsky_controlnet_img2img.py → ...y2_2/test_kandinsky_controlnet_img2img.py b/..._v22/test_kandinsky_controlnet_img2img.py → ...y2_2/test_kandinsky_controlnet_img2img.py
diff --git a/...s/kandinsky_v22/test_kandinsky_img2img.py → ...es/kandinsky2_2/test_kandinsky_img2img.py b/...s/kandinsky_v22/test_kandinsky_img2img.py → ...es/kandinsky2_2/test_kandinsky_img2img.py
diff --git a/...s/kandinsky_v22/test_kandinsky_inpaint.py → ...es/kandinsky2_2/test_kandinsky_inpaint.py b/...s/kandinsky_v22/test_kandinsky_inpaint.py → ...es/kandinsky2_2/test_kandinsky_inpaint.py
diff --git a/...nes/kandinsky_v22/test_kandinsky_prior.py → ...ines/kandinsky2_2/test_kandinsky_prior.py b/...nes/kandinsky_v22/test_kandinsky_prior.py → ...ines/kandinsky2_2/test_kandinsky_prior.py
diff --git a/...insky_v22/test_kandinsky_prior_emb2emb.py → ...dinsky2_2/test_kandinsky_prior_emb2emb.py b/...insky_v22/test_kandinsky_prior_emb2emb.py → ...dinsky2_2/test_kandinsky_prior_emb2emb.py