diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index 7925401a3017..70ee1d47294e 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -12,110 +12,96 @@ env: PYTEST_TIMEOUT: 600 RUN_SLOW: yes RUN_NIGHTLY: yes + PIPELINE_USAGE_CUTOFF: 5000 SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} jobs: - run_nightly_tests: + setup_torch_cuda_pipeline_matrix: + name: Setup Torch Pipelines Matrix + runs-on: ubuntu-latest + outputs: + pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }} + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Install dependencies + run: | + pip install -e . + pip install huggingface_hub + - name: Fetch Pipeline Matrix + id: fetch_pipeline_matrix + run: | + matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py) + echo $matrix + echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT + + - name: Pipeline Tests Artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: test-pipelines.json + path: reports + + run_nightly_tests_for_torch_pipelines: + name: Torch Pipelines CUDA Nightly Tests + needs: setup_torch_cuda_pipeline_matrix strategy: fail-fast: false matrix: - config: - - name: Nightly PyTorch CUDA tests on Ubuntu - framework: pytorch - runner: docker-gpu - image: diffusers/diffusers-pytorch-cuda - report: torch_cuda - - name: Nightly Flax TPU tests on Ubuntu - framework: flax - runner: docker-tpu - image: diffusers/diffusers-flax-tpu - report: flax_tpu - - name: Nightly ONNXRuntime CUDA tests on Ubuntu - framework: onnxruntime - runner: docker-gpu - image: diffusers/diffusers-onnxruntime-cuda - report: onnx_cuda - - name: ${{ matrix.config.name }} - - runs-on: ${{ matrix.config.runner }} - + module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }} + runs-on: [single-gpu, nvidia-gpu, t4, ci] container: - image: ${{ matrix.config.image }} - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}} - - defaults: - run: - shell: bash - + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 steps: - name: Checkout diffusers uses: actions/checkout@v3 with: fetch-depth: 2 - - name: NVIDIA-SMI - if: ${{ matrix.config.runner == 'docker-gpu' }} - run: | - nvidia-smi - + run: nvidia-smi + - name: Install dependencies run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" python -m uv pip install -e [quality,test] - python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers - python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate + python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install pytest-reportlog - + - name: Environment run: | python utils/print_env.py - - - name: Run nightly PyTorch CUDA tests - if: ${{ matrix.config.framework == 'pytorch' }} + + - name: Nightly PyTorch CUDA checkpoint (pipelines) tests env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 run: | - python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ -s -v -k "not Flax and not Onnx" \ - --make-reports=tests_${{ matrix.config.report }} \ - --report-log=${{ matrix.config.report }}.log \ - tests/ - - - name: Run nightly Flax TPU tests - if: ${{ matrix.config.framework == 'flax' }} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - run: | - python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" - python -m pytest -n 0 \ - -s -v -k "Flax" \ - --make-reports=tests_${{ matrix.config.report }} \ - --report-log=${{ matrix.config.report }}.log \ - tests/ - - - name: Run nightly ONNXRuntime CUDA tests - if: ${{ matrix.config.framework == 'onnxruntime' }} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - run: | - python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ - -s -v -k "Onnx" \ - --make-reports=tests_${{ matrix.config.report }} \ - --report-log=${{ matrix.config.report }}.log \ - tests/ - + --make-reports=tests_pipeline_${{ matrix.module }}_cuda \ + --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \ + tests/pipelines/${{ matrix.module }} + - name: Failure short reports if: ${{ failure() }} - run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt + run: | + cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt + cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: - name: ${{ matrix.config.report }}_test_reports + name: pipeline_${{ matrix.module }}_test_reports path: reports - name: Generate Report and Notify Channel @@ -124,6 +110,248 @@ jobs: pip install slack_sdk tabulate python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + run_nightly_tests_for_other_torch_modules: + name: Torch Non-Pipelines CUDA Nightly Tests + runs-on: docker-gpu + container: + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + defaults: + run: + shell: bash + strategy: + matrix: + module: [models, schedulers, others, examples] + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install pytest-reportlog + + - name: Environment + run: python utils/print_env.py + + - name: Run nightly PyTorch CUDA tests for non-pipeline modules + if: ${{ matrix.module != 'examples'}} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_torch_${{ matrix.module }}_cuda \ + --report-log=tests_torch_${{ matrix.module }}_cuda.log \ + tests/${{ matrix.module }} + + - name: Run nightly example tests with Torch + if: ${{ matrix.module == 'examples' }} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m uv pip install peft@git+https://github.com/huggingface/peft.git + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v --make-reports=examples_torch_cuda \ + --report-log=examples_torch_cuda.log \ + examples/ + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt + cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: torch_${{ matrix.module }}_cuda_test_reports + path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + + run_lora_nightly_tests: + name: Nightly LoRA Tests with PEFT and TORCH + runs-on: docker-gpu + container: + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install peft@git+https://github.com/huggingface/peft.git + python -m uv pip install pytest-reportlog + + - name: Environment + run: python utils/print_env.py + + - name: Run nightly LoRA tests with PEFT and Torch + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_torch_lora_cuda \ + --report-log=tests_torch_lora_cuda.log \ + tests/lora + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_torch_lora_cuda_stats.txt + cat reports/tests_torch_lora_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: torch_lora_cuda_test_reports + path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + + run_flax_tpu_tests: + name: Nightly Flax TPU Tests + runs-on: docker-tpu + container: + image: diffusers/diffusers-flax-tpu + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + apt-get update && apt-get install libsndfile1-dev libgl1 -y + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install pytest-reportlog + + - name: Environment + run: python utils/print_env.py + + - name: Run nightly Flax TPU tests + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 0 \ + -s -v -k "Flax" \ + --make-reports=tests_flax_tpu \ + --report-log=tests_flax_tpu.log \ + tests/ + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_flax_tpu_stats.txt + cat reports/tests_flax_tpu_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: flax_tpu_test_reports + path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + + run_nightly_onnx_tests: + name: Nightly ONNXRuntime CUDA tests on Ubuntu + runs-on: docker-gpu + container: + image: diffusers/diffusers-onnxruntime-cuda + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ + + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: NVIDIA-SMI + run: nvidia-smi + + - name: Install dependencies + run: | + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install pytest-reportlog + + - name: Environment + run: python utils/print_env.py + + - name: Run nightly ONNXRuntime CUDA tests + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "Onnx" \ + --make-reports=tests_onnx_cuda \ + --report-log=tests_onnx_cuda.log \ + tests/ + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_onnx_cuda_stats.txt + cat reports/tests_onnx_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.config.report }}_test_reports + path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + run_nightly_tests_apple_m1: name: Nightly PyTorch MPS tests on MacOS runs-on: [ self-hosted, apple-m1 ]