Skip to content

Commit

Permalink
New CI Runners (huggingface#2087)
Browse files Browse the repository at this point in the history
* Try merge tests

* Fix

* Checkout branch

* Fix pip install

* rebase

* Colons

* right one

* use master

* Rm

* Add needs

* Better clean

* always

* Forgot other

* test on AWS

* update all labels

* fix multi-gpu working directory

* limit to 2 GPU

* force run on kube

* move build docker image to new ci

* test build on CPU instance

* move build docker image release to new ci

* move scheduled slow tests to new ci

* move integration test to new ci

* Comments

* Right CPU tags

* Right machines

* PR comments

---------

Co-authored-by: Guillaume LEGENDRE <glegendre01@gmail.com>
  • Loading branch information
muellerzr and glegendre01 committed Nov 20, 2023
1 parent 427ef8b commit ca300c0
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 70 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-docker-images-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

version-cpu:
name: "Latest Accelerate CPU [version]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
needs: get-version
steps:
- name: Set up Docker Buildx
Expand All @@ -41,7 +41,7 @@ jobs:

version-cuda:
name: "Latest Accelerate GPU [version]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
needs: get-version
steps:
- name: Set up Docker Buildx
Expand Down
15 changes: 2 additions & 13 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,9 @@ concurrency:
cancel-in-progress: false

jobs:
clean-storage:
name: "Clean docker image storage"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
steps:
- name: Clean storage
run: |
docker image prune --all -f --filter "until=48h"
docker system prune --all -f --filter "until=48h"
latest-cpu:
name: "Latest Accelerate CPU [dev]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand All @@ -41,8 +31,7 @@ jobs:

latest-cuda:
name: "Latest Accelerate GPU [dev]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
runs-on: [self-hosted, nvidia-gpu, t4, daily-ci]
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand Down
23 changes: 15 additions & 8 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
env:
CUDA_VISIBLE_DEVICES: "0"
TEST_TYPE: "single_gpu"
Expand All @@ -22,37 +22,40 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone & pip install
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e . --no-deps
pip install pytest-reportlog tabulate
- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples
- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
run_all_tests_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci]
env:
CUDA_VISIBLE_DEVICES: "0,1"
TEST_TYPE: "multi_gpu"
Expand All @@ -61,38 +64,42 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e . --no-deps
pip install pytest-reportlog tabulate
- name: Run core and big modeling tests on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test_core
make test_big_modeling
make test_cli
- name: Run Integration tests on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
make test_integrations
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples
- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
Expand Down
59 changes: 34 additions & 25 deletions .github/workflows/run_merge_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,80 +10,89 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci]
env:
CUDA_VISIBLE_DEVICES: "0"
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone & pip install
- name: Install accelerate
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
pip install pytest-reportlog tabulate
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
pip install pytest-reportlog tabulate ;
- name: Run CLI tests
- name: Run CLI tests (use make cli)
working-directory: accelerate
run: |
source activate accelerate
source activate accelerate;
make test_cli
- name: Run test on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
source activate accelerate;
make test
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
source activate accelerate;
pip uninstall comet_ml -y;
make test_examples
- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install tabulate
pip install tabulate;
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
run_all_tests_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
env:
CUDA_VISIBLE_DEVICES: 0,1
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
pip install pytest-reportlog tabulate
- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate
source activate accelerate;
make test
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
source activate accelerate;
pip uninstall comet_ml -y;
make test_examples
- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
source activate accelerate;
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
44 changes: 22 additions & 22 deletions .github/workflows/self_hosted_integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
strategy:
fail-fast: false
matrix:
Expand All @@ -34,22 +34,22 @@ jobs:
"0,1"
]
steps:
- name: Update accelerate clone and pip install
working-directory: accelerate/
run:
- name: Install transformers
run: |
source activate accelerate;
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;
git clone https://github.com/huggingface/transformers --depth 1;
cd transformers;
pip install .[torch,deepspeed-testing];
cd ..;
- name: Update transformers clone & pip install
working-directory: transformers/
- name: Install accelerate
run: |
source activate accelerate
git config --global --add safe.directory '*'
git checkout main && git pull
pip install .[torch,deepspeed-testing]
pip uninstall comet_ml wandb -y
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }} ;
pip install -e .[testing];
cd ..;
- name: Show installed libraries
run: |
Expand Down Expand Up @@ -89,20 +89,20 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
strategy:
fail-fast: false
steps:
- name: Update accelerate clone and pip install
working-directory: accelerate/
- name: Install accelerate
run:
source activate accelerate;
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing];
cd ..

- name: Update skorch clone & pip install
working-directory: skorch/
- name: Install skorch
run: |
source activate accelerate
git config --global --add safe.directory '*'
Expand Down

0 comments on commit ca300c0

Please sign in to comment.