From 5a75304c137559e1b0daf16b801ff2e2ca5bfe37 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 15:40:35 +0000 Subject: [PATCH 01/29] Try merge tests --- .github/workflows/run_merge_tests.yml | 28 ++++++----- .../self_hosted_integration_tests.yml | 48 ++++++++++--------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 7dacab8c508..49e2c15fe24 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -18,31 +18,33 @@ jobs: options: --gpus all --shm-size "16gb" defaults: run: - working-directory: accelerate/ shell: bash steps: - - name: Update clone & pip install - run: | - source activate accelerate - git config --global --add safe.directory '*' - git fetch && git checkout ${{ github.sha }} - pip install -e .[testing,test_trackers] -U - pip install pytest-reportlog tabulate + - name: Install accelerate + run: + source activate accelerate; + git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + cd accelerate + pip install -e .[testing, test_trackers] -U; + pip install pytest-reportlog tabulate - - name: Run CLI tests + - name: Run CLI tests (use make cli) run: | source activate accelerate - make test_cli + cd accelerate + echo "Hello world!" - name: Run test on GPUs if: always() run: | source activate accelerate + cd accelerate make test - name: Run examples on GPUs if: always() run: | source activate accelerate + cd accelerate pip uninstall comet_ml -y make test_examples @@ -50,6 +52,7 @@ jobs: if: always() run: | pip install tabulate + cd accelerate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: @@ -73,17 +76,20 @@ jobs: - name: Run test on GPUs run: | source activate accelerate + cd accelerate make test - name: Run examples on GPUs if: always() run: | source activate accelerate + cd accelerate pip uninstall comet_ml -y make test_examples - name: Generate Report if: always() run: | - pip install tabulate + source activate accelerate + cd accelerate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 7b4d8f6b813..aeaba71a103 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -38,24 +38,25 @@ jobs: "0,1" ] steps: - - name: Update accelerate clone and pip install - working-directory: accelerate/ - run: - source activate accelerate; - git config --global --add safe.directory '*'; - git checkout main && git fetch && git checkout ${{ github.sha }}; - pip install -e .; - - - name: Update transformers clone & pip install - working-directory: transformers/ + - name: Install transformers run: | source activate accelerate - git config --global --add safe.directory '*' - git checkout main && git pull && git fetch --tags + git clone --filter=blob:none --no-checkout https://github.com/huggingface/transformers + cd transformers if [[ ${{ matrix.transformers-version }} = pypi ]]; then - git checkout $(git tag --sort=taggerdate | tail -1) + git checkout $(git tag --sort=taggerdate | tail -1); else + git checkout main fi pip install .[torch,deepspeed-testing] + cd .. + + - name: Install accelerate + run: + source activate accelerate; + git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + cd accelerate + pip install -e .[testing]; + cd .. - name: Show installed libraries run: | @@ -94,25 +95,26 @@ jobs: github ] steps: - - name: Update accelerate clone and pip install - working-directory: accelerate/ + - name: Install accelerate run: source activate accelerate; - git config --global --add safe.directory '*'; - git checkout main && git fetch && git checkout ${{ github.sha }}; - pip install -e .; + git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + cd accelerate + pip install -e .[testing]; + cd .. - - name: Update skorch clone & pip install - working-directory: skorch/ + - name: Install skorch run: | source activate accelerate - git config --global --add safe.directory '*' - git checkout master && git pull + git clone --filter=blob:none --no-checkout https://github.com/skorch-dev/skorch + cd skorch if [[ ${{ matrix.skorch-version }} = pypi ]]; then - git checkout $(git describe --tags `git rev-list --tags --max-count=1`) + git checkout $(git describe --tags `git rev-list --tags --max-count=1`); else + git checkout main fi pip install .[testing] pip install flaky + cd .. - name: Show installed libraries run: | From 0eb6faeddddfb25e2da0efeb835e2f0cdf759a5b Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 15:48:05 +0000 Subject: [PATCH 02/29] Fix --- .github/workflows/run_merge_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 49e2c15fe24..98669b621a4 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -21,7 +21,7 @@ jobs: shell: bash steps: - name: Install accelerate - run: + run: | source activate accelerate; git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate cd accelerate From 47ce4a626eaf84a14f17594dc00d86a0d58c5638 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 15:53:24 +0000 Subject: [PATCH 03/29] Checkout branch --- .github/workflows/run_merge_tests.yml | 3 ++- .github/workflows/self_hosted_integration_tests.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 98669b621a4..f6a23fac080 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -23,8 +23,9 @@ jobs: - name: Install accelerate run: | source activate accelerate; - git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + git clone https://github.com/huggingface/accelerate cd accelerate + git checkout ${{ github.sha }} pip install -e .[testing, test_trackers] -U; pip install pytest-reportlog tabulate diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index aeaba71a103..68a0d9d5fec 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -53,8 +53,9 @@ jobs: - name: Install accelerate run: source activate accelerate; - git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + git clone https://github.com/huggingface/accelerate cd accelerate + git checkout ${{ github.sha }} pip install -e .[testing]; cd .. From 95e26e9ec36b452c975c69e60ad5b34262028a18 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 15:58:38 +0000 Subject: [PATCH 04/29] Fix pip install --- .github/workflows/run_merge_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index f6a23fac080..e0a51f419a7 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -25,8 +25,8 @@ jobs: source activate accelerate; git clone https://github.com/huggingface/accelerate cd accelerate - git checkout ${{ github.sha }} - pip install -e .[testing, test_trackers] -U; + git checkout ${{ github.sha }}; + pip install -e .[testing,test_trackers] -U; pip install pytest-reportlog tabulate - name: Run CLI tests (use make cli) From 64b0595732b287133a495e3b3dd781543352aba7 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:11:12 +0000 Subject: [PATCH 05/29] rebase --- .github/workflows/run_merge_tests.yml | 4 ++-- .github/workflows/self_hosted_integration_tests.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index e0a51f419a7..3f4011cec34 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -30,10 +30,10 @@ jobs: pip install pytest-reportlog tabulate - name: Run CLI tests (use make cli) + working-directory: accelerate run: | source activate accelerate - cd accelerate - echo "Hello world!" + make test_cli - name: Run test on GPUs if: always() diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 68a0d9d5fec..57cda14a407 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -99,8 +99,9 @@ jobs: - name: Install accelerate run: source activate accelerate; - git clone -b ${{ github.sha }} https://github.com/huggingface/accelerate + git clone https://github.com/huggingface/accelerate cd accelerate + git checkout ${{ github.sha }}; pip install -e .[testing]; cd .. From c3355101affce07850cda1409d7f2bf866628c13 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:14:55 +0000 Subject: [PATCH 06/29] Colons --- .github/workflows/run_merge_tests.yml | 44 +++++++++++++-------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 3f4011cec34..f1d6495fa3a 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -23,37 +23,37 @@ jobs: - name: Install accelerate run: | source activate accelerate; - git clone https://github.com/huggingface/accelerate - cd accelerate + git clone https://github.com/huggingface/accelerate; + cd accelerate; git checkout ${{ github.sha }}; pip install -e .[testing,test_trackers] -U; - pip install pytest-reportlog tabulate + pip install pytest-reportlog tabulate ; - name: Run CLI tests (use make cli) working-directory: accelerate run: | - source activate accelerate + source activate accelerate; make test_cli - name: Run test on GPUs if: always() run: | - source activate accelerate - cd accelerate + source activate accelerate; + cd accelerate; make test - name: Run examples on GPUs if: always() run: | - source activate accelerate - cd accelerate - pip uninstall comet_ml -y + source activate accelerate; + cd accelerate; + pip uninstall comet_ml -y; make test_examples - name: Generate Report if: always() run: | - pip install tabulate - cd accelerate + pip install tabulate; + cd accelerate; python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: @@ -68,29 +68,29 @@ jobs: steps: - name: Update clone run: | - source activate accelerate - git config --global --add safe.directory '*' - git fetch && git checkout ${{ github.sha }} - pip install -e .[testing,test_trackers] -U + source activate accelerate; + git config --global --add safe.directory '*'; + git fetch && git checkout ${{ github.sha }}; + pip install -e .[testing,test_trackers] -U; pip install pytest-reportlog tabulate - name: Run test on GPUs run: | - source activate accelerate - cd accelerate + source activate accelerate; + cd accelerate; make test - name: Run examples on GPUs if: always() run: | - source activate accelerate - cd accelerate - pip uninstall comet_ml -y + source activate accelerate; + cd accelerate; + pip uninstall comet_ml -y; make test_examples - name: Generate Report if: always() run: | - source activate accelerate - cd accelerate + source activate accelerate; + cd accelerate; python utils/log_reports.py >> $GITHUB_STEP_SUMMARY \ No newline at end of file From 0ea347c18929143bb8f8a5e786e74ecdecca99b8 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:20:52 +0000 Subject: [PATCH 07/29] right one --- .../self_hosted_integration_tests.yml | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 57cda14a407..7aa89ed8f46 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -40,24 +40,24 @@ jobs: steps: - name: Install transformers run: | - source activate accelerate - git clone --filter=blob:none --no-checkout https://github.com/huggingface/transformers - cd transformers + source activate accelerate; + git clone --filter=blob:none --no-checkout https://github.com/huggingface/transformers; + cd transformers; if [[ ${{ matrix.transformers-version }} = pypi ]]; then git checkout $(git tag --sort=taggerdate | tail -1); else git checkout main - fi - pip install .[torch,deepspeed-testing] - cd .. + fi; + pip install .[torch,deepspeed-testing]; + cd ..; - name: Install accelerate run: source activate accelerate; - git clone https://github.com/huggingface/accelerate - cd accelerate - git checkout ${{ github.sha }} + git clone https://github.com/huggingface/accelerate; + cd accelerate; + git checkout ${{ github.sha }} ; pip install -e .[testing]; - cd .. + cd ..; - name: Show installed libraries run: | @@ -99,23 +99,23 @@ jobs: - name: Install accelerate run: source activate accelerate; - git clone https://github.com/huggingface/accelerate - cd accelerate + git clone https://github.com/huggingface/accelerate; + cd accelerate; git checkout ${{ github.sha }}; pip install -e .[testing]; cd .. - name: Install skorch run: | - source activate accelerate - git clone --filter=blob:none --no-checkout https://github.com/skorch-dev/skorch - cd skorch + source activate accelerate; + git clone --filter=blob:none --no-checkout https://github.com/skorch-dev/skorch; + cd skorch; if [[ ${{ matrix.skorch-version }} = pypi ]]; then git checkout $(git describe --tags `git rev-list --tags --max-count=1`); else git checkout main - fi - pip install .[testing] - pip install flaky + fi; + pip install .[testing]; + pip install flaky; cd .. - name: Show installed libraries From d0c90ace4a00b3e5b110ed11ac3b0e214471dc8d Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:22:05 +0000 Subject: [PATCH 08/29] use master --- .github/workflows/self_hosted_integration_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 7aa89ed8f46..897d2c19bb2 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -112,7 +112,7 @@ jobs: cd skorch; if [[ ${{ matrix.skorch-version }} = pypi ]]; then git checkout $(git describe --tags `git rev-list --tags --max-count=1`); else - git checkout main + git checkout master fi; pip install .[testing]; pip install flaky; From e090dcca8b9cd901253692062532db8ed471a423 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:24:55 +0000 Subject: [PATCH 09/29] Rm --- .github/workflows/self_hosted_integration_tests.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 897d2c19bb2..87f6d63d53c 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -21,6 +21,17 @@ defaults: shell: bash jobs: + clean-env: + container: + image: huggingface/accelerate-gpu:latest + options: --gpus all --shm-size "16gb" + runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + steps: + - name: Clean local files + run: | + rm -rf accelerate + rm -rf transformers + rm -rf skorch run-trainer-tests: container: image: huggingface/accelerate-gpu:latest From 1fe4f5e735e2fdacb5e9667dae6a386a100c0b1a Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:25:40 +0000 Subject: [PATCH 10/29] Add needs --- .github/workflows/self_hosted_integration_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 87f6d63d53c..543791aacd3 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -37,6 +37,7 @@ jobs: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + needs: [clean-env] strategy: fail-fast: false matrix: @@ -99,6 +100,7 @@ jobs: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + needs: [clean-env] strategy: fail-fast: false matrix: From de8fc5917947fc73a6bd5b7221fc0db1c0f38f37 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:29:14 +0000 Subject: [PATCH 11/29] Better clean --- .../self_hosted_integration_tests.yml | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 543791aacd3..510e6f1f7f6 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -21,23 +21,11 @@ defaults: shell: bash jobs: - clean-env: - container: - image: huggingface/accelerate-gpu:latest - options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - steps: - - name: Clean local files - run: | - rm -rf accelerate - rm -rf transformers - rm -rf skorch run-trainer-tests: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - needs: [clean-env] strategy: fail-fast: false matrix: @@ -94,13 +82,15 @@ jobs: run: | source activate accelerate; pytest -sv tests/deepspeed + - name: Clean local files + run: | + rm -rf accelerate transformers run-skorch-tests: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - needs: [clean-env] strategy: fail-fast: false matrix: @@ -140,4 +130,8 @@ jobs: working-directory: skorch/ run: | source activate accelerate; - pytest -sv -k TestAccelerate \ No newline at end of file + pytest -sv -k TestAccelerate + + - name: Clean local files + run: | + rm -rf accelerate skorch \ No newline at end of file From be067d38bc41a37f8f0a26dabca1e9380d1a2a65 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:46:49 +0000 Subject: [PATCH 12/29] always --- .github/workflows/self_hosted_integration_tests.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 510e6f1f7f6..b7cf1e3a7bd 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -83,6 +83,7 @@ jobs: source activate accelerate; pytest -sv tests/deepspeed - name: Clean local files + if: always() run: | rm -rf accelerate transformers @@ -102,7 +103,9 @@ jobs: - name: Install accelerate run: source activate accelerate; - git clone https://github.com/huggingface/accelerate; + if [! -d "accelerate"]; then + git clone https://github.com/huggingface/accelerate; + fi; cd accelerate; git checkout ${{ github.sha }}; pip install -e .[testing]; @@ -133,5 +136,6 @@ jobs: pytest -sv -k TestAccelerate - name: Clean local files + if: always() run: | rm -rf accelerate skorch \ No newline at end of file From bb1a57f012394fce63d16277b9c1a9078bfc09c5 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 1 Nov 2023 13:49:29 +0000 Subject: [PATCH 13/29] Forgot other --- .github/workflows/self_hosted_integration_tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index b7cf1e3a7bd..82f22754b70 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -103,9 +103,7 @@ jobs: - name: Install accelerate run: source activate accelerate; - if [! -d "accelerate"]; then - git clone https://github.com/huggingface/accelerate; - fi; + git clone https://github.com/huggingface/accelerate; cd accelerate; git checkout ${{ github.sha }}; pip install -e .[testing]; From 4af61d0140d96e064d6b50e15d80245f1067d8e8 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Thu, 2 Nov 2023 10:36:06 +0100 Subject: [PATCH 14/29] test on AWS --- .github/workflows/run_merge_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index f1d6495fa3a..3462a8f5f7e 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -10,7 +10,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, docker-gpu, multi-gpu] env: CUDA_VISIBLE_DEVICES: "0" container: @@ -93,4 +93,4 @@ jobs: run: | source activate accelerate; cd accelerate; - python utils/log_reports.py >> $GITHUB_STEP_SUMMARY \ No newline at end of file + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY From a3812932e039eed34918ce4b8ce26b3386eb1340 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Thu, 2 Nov 2023 10:54:28 +0100 Subject: [PATCH 15/29] update all labels --- .github/workflows/run_merge_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 3462a8f5f7e..b25b8810f98 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -10,7 +10,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, docker-gpu, multi-gpu] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4] env: CUDA_VISIBLE_DEVICES: "0" container: @@ -57,7 +57,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4] container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" From 65117944d1e230920912d839b1147cdbebac4aa0 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Thu, 2 Nov 2023 11:13:35 +0100 Subject: [PATCH 16/29] fix multi-gpu working directory --- .github/workflows/run_merge_tests.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index b25b8810f98..525a4f2dcf3 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -36,24 +36,24 @@ jobs: make test_cli - name: Run test on GPUs + working-directory: accelerate if: always() run: | source activate accelerate; - cd accelerate; make test - name: Run examples on GPUs + working-directory: accelerate if: always() run: | source activate accelerate; - cd accelerate; pip uninstall comet_ml -y; make test_examples - name: Generate Report + working-directory: accelerate if: always() run: | pip install tabulate; - cd accelerate; python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: @@ -63,34 +63,34 @@ jobs: options: --gpus all --shm-size "16gb" defaults: run: - working-directory: accelerate/ shell: bash steps: - name: Update clone run: | source activate accelerate; - git config --global --add safe.directory '*'; - git fetch && git checkout ${{ github.sha }}; + git clone https://github.com/huggingface/accelerate; + cd accelerate; + git checkout ${{ github.sha }}; pip install -e .[testing,test_trackers] -U; pip install pytest-reportlog tabulate - name: Run test on GPUs + working-directory: accelerate run: | source activate accelerate; - cd accelerate; make test - name: Run examples on GPUs + working-directory: accelerate if: always() run: | source activate accelerate; - cd accelerate; pip uninstall comet_ml -y; make test_examples - name: Generate Report + working-directory: accelerate if: always() run: | source activate accelerate; - cd accelerate; python utils/log_reports.py >> $GITHUB_STEP_SUMMARY From 8e0c4f915c915f8ed1e67d7d0e15bb716532e181 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Thu, 2 Nov 2023 11:45:01 +0100 Subject: [PATCH 17/29] limit to 2 GPU --- .github/workflows/run_merge_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 525a4f2dcf3..daa5d3dbeda 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -58,6 +58,8 @@ jobs: run_all_tests_multi_gpu: runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4] + env: + CUDA_VISIBLE_DEVICES: 0,1 container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" From 938b5503b7dafa5d1d07d46d80b009048d0e36c2 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Thu, 2 Nov 2023 19:18:17 +0100 Subject: [PATCH 18/29] force run on kube --- .github/workflows/run_merge_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index daa5d3dbeda..ef6ea84bc7e 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -10,7 +10,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0" container: @@ -57,7 +57,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: 0,1 container: From 114bcabc5c3a37c676f16c5a0d0898006ec047a1 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Mon, 6 Nov 2023 13:22:19 +0100 Subject: [PATCH 19/29] move build docker image to new ci --- .github/workflows/build_docker_images.yml | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 59f3e4dda61..68125d9ac50 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -11,19 +11,9 @@ concurrency: cancel-in-progress: false jobs: - clean-storage: - name: "Clean docker image storage" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - steps: - - name: Clean storage - run: | - docker image prune --all -f --filter "until=48h" - docker system prune --all -f --filter "until=48h" - latest-cpu: name: "Latest Accelerate CPU [dev]" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - needs: clean-storage + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -41,8 +31,7 @@ jobs: latest-cuda: name: "Latest Accelerate GPU [dev]" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] - needs: clean-storage + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 From 09356c59b495e3f87d109b0f677f9b8d827a804b Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Mon, 6 Nov 2023 15:07:46 +0100 Subject: [PATCH 20/29] test build on CPU instance --- .github/workflows/build_docker_images.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 68125d9ac50..2a8e687fa27 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -13,7 +13,7 @@ concurrency: jobs: latest-cpu: name: "Latest Accelerate CPU [dev]" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -31,7 +31,7 @@ jobs: latest-cuda: name: "Latest Accelerate GPU [dev]" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 From 20f78039b6435fefe6cc4c5501c368d59c56e85e Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Mon, 6 Nov 2023 15:38:54 +0100 Subject: [PATCH 21/29] move build docker image release to new ci --- .github/workflows/build-docker-images-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index 2d3a8a6fbfb..6b7c2f361e0 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -21,7 +21,7 @@ jobs: version-cpu: name: "Latest Accelerate CPU [version]" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] needs: get-version steps: - name: Set up Docker Buildx @@ -41,7 +41,7 @@ jobs: version-cuda: name: "Latest Accelerate GPU [version]" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] needs: get-version steps: - name: Set up Docker Buildx From 230b5a62dcc2e1d82b8cff9fb65332888ccd12ef Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Tue, 14 Nov 2023 08:58:58 +0100 Subject: [PATCH 22/29] move scheduled slow tests to new ci --- .github/workflows/nightly.yml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 09e64e060e3..3e7f77bf9fd 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -13,7 +13,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0" TEST_TYPE: "single_gpu" @@ -22,23 +22,25 @@ jobs: options: --gpus all --shm-size "16gb" defaults: run: - working-directory: accelerate/ shell: bash steps: - name: Update clone & pip install run: | source activate accelerate - git config --global --add safe.directory '*' - git fetch && git checkout ${{ github.sha }} + git clone https://github.com/huggingface/accelerate; + cd accelerate; + git checkout ${{ github.sha }}; pip install -e . --no-deps pip install pytest-reportlog tabulate - name: Run test on GPUs + working-directory: accelerate run: | source activate accelerate make test - name: Run examples on GPUs + working-directory: accelerate if: always() run: | source activate accelerate @@ -46,13 +48,14 @@ jobs: make test_examples - name: Generate Report + working-directory: accelerate if: always() run: | pip install slack_sdk tabulate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0,1" TEST_TYPE: "multi_gpu" @@ -61,18 +64,19 @@ jobs: options: --gpus all --shm-size "16gb" defaults: run: - working-directory: accelerate/ shell: bash steps: - name: Update clone run: | source activate accelerate - git config --global --add safe.directory '*' - git fetch && git checkout ${{ github.sha }} + git clone https://github.com/huggingface/accelerate; + cd accelerate; + git checkout ${{ github.sha }}; pip install -e . --no-deps pip install pytest-reportlog tabulate - name: Run core and big modeling tests on GPUs + working-directory: accelerate run: | source activate accelerate make test_core @@ -80,12 +84,14 @@ jobs: make test_cli - name: Run Integration tests on GPUs + working-directory: accelerate if: always() run: | source activate accelerate make test_integrations - name: Run examples on GPUs + working-directory: accelerate if: always() run: | source activate accelerate @@ -93,6 +99,7 @@ jobs: make test_examples - name: Generate Report + working-directory: accelerate if: always() run: | pip install slack_sdk tabulate From aef04137a779de61b803dc5f5b6a01e57d5f6310 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Tue, 14 Nov 2023 09:02:14 +0100 Subject: [PATCH 23/29] move integration test to new ci --- .github/workflows/self_hosted_integration_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 82f22754b70..c094797af89 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -25,7 +25,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] strategy: fail-fast: false matrix: @@ -91,7 +91,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, docker-gpu, multi-gpu, gcp] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] strategy: fail-fast: false matrix: From 56d1206c3453d7318a01aceeea7f7660e009a7a7 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 11:12:13 -0500 Subject: [PATCH 24/29] Comments --- .github/workflows/build-docker-images-release.yml | 4 ++-- .github/workflows/build_docker_images.yml | 4 ++-- .github/workflows/self_hosted_integration_tests.yml | 11 +---------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index 97bff6d35d3..0d4a68f5496 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -21,7 +21,7 @@ jobs: version-cpu: name: "Latest Accelerate CPU [version]" - runs-on: [self-hosted, intel-cpu, 8-cpu, ci] + runs-on: [self-hosted, docker-cpu, intel-cpu] needs: get-version steps: - name: Set up Docker Buildx @@ -41,7 +41,7 @@ jobs: version-cuda: name: "Latest Accelerate GPU [version]" - runs-on: [self-hosted, intel-cpu, 8-cpu, ci] + runs-on: [self-hosted, docker-gpu] needs: get-version steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 2a8e687fa27..437dd0354dd 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -13,7 +13,7 @@ concurrency: jobs: latest-cpu: name: "Latest Accelerate CPU [dev]" - runs-on: [self-hosted, intel-cpu, 8-cpu, ci] + runs-on: [self-hosted, docker-cpu, intel-cpu] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -31,7 +31,7 @@ jobs: latest-cuda: name: "Latest Accelerate GPU [dev]" - runs-on: [self-hosted, intel-cpu, 8-cpu, ci] + runs-on: [self-hosted, docker-gpu] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index ab9df5122b0..3c12b51e259 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -74,10 +74,6 @@ jobs: run: | source activate accelerate; pytest -sv tests/deepspeed - - name: Clean local files - if: always() - run: | - rm -rf accelerate transformers - name: Run transformers examples tests working-directory: transformers/ @@ -123,9 +119,4 @@ jobs: working-directory: skorch/ run: | source activate accelerate; - pytest -sv -k TestAccelerate - - - name: Clean local files - if: always() - run: | - rm -rf accelerate skorch \ No newline at end of file + pytest -sv -k TestAccelerate \ No newline at end of file From 0653214add2ebb516fca2aa6d5ed0b913d1c1cef Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 11:24:54 -0500 Subject: [PATCH 25/29] Right CPU tags --- .github/workflows/build-docker-images-release.yml | 2 +- .github/workflows/build_docker_images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index 0d4a68f5496..c10d22d33be 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -21,7 +21,7 @@ jobs: version-cpu: name: "Latest Accelerate CPU [version]" - runs-on: [self-hosted, docker-cpu, intel-cpu] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] needs: get-version steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 437dd0354dd..2e18aeaf76d 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -13,7 +13,7 @@ concurrency: jobs: latest-cpu: name: "Latest Accelerate CPU [dev]" - runs-on: [self-hosted, docker-cpu, intel-cpu] + runs-on: [self-hosted, intel-cpu, 8-cpu, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 From 2c4ac5dc71746252ed362f666fea46e2036bd2a4 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 11:28:28 -0500 Subject: [PATCH 26/29] Right machines --- .github/workflows/nightly.yml | 4 ++-- .github/workflows/run_merge_tests.yml | 4 ++-- .github/workflows/self_hosted_integration_tests.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3e7f77bf9fd..a06cae176c7 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -13,7 +13,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci] env: CUDA_VISIBLE_DEVICES: "0" TEST_TYPE: "single_gpu" @@ -55,7 +55,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci] env: CUDA_VISIBLE_DEVICES: "0,1" TEST_TYPE: "multi_gpu" diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index ef6ea84bc7e..42bf11e7516 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -10,7 +10,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci] env: CUDA_VISIBLE_DEVICES: "0" container: @@ -57,7 +57,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] env: CUDA_VISIBLE_DEVICES: 0,1 container: diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index 3c12b51e259..cd82295e4e2 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -25,7 +25,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] strategy: fail-fast: false matrix: @@ -89,7 +89,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] strategy: fail-fast: false steps: From 49fbe3384a61be6235b1090b298e0e3b7d0ae5b0 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 11:36:24 -0500 Subject: [PATCH 27/29] PR comments --- .github/workflows/build-docker-images-release.yml | 2 +- .github/workflows/build_docker_images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index c10d22d33be..efb6a95da6c 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -41,7 +41,7 @@ jobs: version-cuda: name: "Latest Accelerate GPU [version]" - runs-on: [self-hosted, docker-gpu] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci] needs: get-version steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 2e18aeaf76d..75b9fb9eefe 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -31,7 +31,7 @@ jobs: latest-cuda: name: "Latest Accelerate GPU [dev]" - runs-on: [self-hosted, docker-gpu] + runs-on: [self-hosted, nvidia-gpu, t4, daily-ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 From dad64a76cd7bb0119a7d3e0bf2f08c1e0c080f3b Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 12:15:59 -0500 Subject: [PATCH 28/29] Fix issues --- .github/workflows/nightly.yml | 4 ++-- .github/workflows/run_merge_tests.yml | 4 ++-- .github/workflows/self_hosted_integration_tests.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a06cae176c7..3e7f77bf9fd 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -13,7 +13,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0" TEST_TYPE: "single_gpu" @@ -55,7 +55,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0,1" TEST_TYPE: "multi_gpu" diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml index 42bf11e7516..ef6ea84bc7e 100644 --- a/.github/workflows/run_merge_tests.yml +++ b/.github/workflows/run_merge_tests.yml @@ -10,7 +10,7 @@ env: jobs: run_all_tests_single_gpu: - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: "0" container: @@ -57,7 +57,7 @@ jobs: python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] env: CUDA_VISIBLE_DEVICES: 0,1 container: diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml index cd82295e4e2..3c12b51e259 100644 --- a/.github/workflows/self_hosted_integration_tests.yml +++ b/.github/workflows/self_hosted_integration_tests.yml @@ -25,7 +25,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] strategy: fail-fast: false matrix: @@ -89,7 +89,7 @@ jobs: container: image: huggingface/accelerate-gpu:latest options: --gpus all --shm-size "16gb" - runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci] + runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] strategy: fail-fast: false steps: From ebfa910f08e50199ced51d2c116ef142674b6ea0 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 Nov 2023 12:19:25 -0500 Subject: [PATCH 29/29] Some trailers --- .github/workflows/build-docker-images-release.yml | 2 +- .github/workflows/build_docker_images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index efb6a95da6c..20a9ea51e5a 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -41,7 +41,7 @@ jobs: version-cuda: name: "Latest Accelerate GPU [version]" - runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci] + runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] needs: get-version steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 75b9fb9eefe..557032a0de2 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -31,7 +31,7 @@ jobs: latest-cuda: name: "Latest Accelerate GPU [dev]" - runs-on: [self-hosted, nvidia-gpu, t4, daily-ci] + runs-on: [self-hosted, nvidia-gpu, t4, ci] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2