Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New CI Runners #2087

Merged
merged 29 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build-docker-images-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

version-cpu:
name: "Latest Accelerate CPU [version]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
needs: get-version
steps:
- name: Set up Docker Buildx
Expand All @@ -41,7 +41,7 @@ jobs:

version-cuda:
name: "Latest Accelerate GPU [version]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, docker-gpu]
muellerzr marked this conversation as resolved.
Show resolved Hide resolved
needs: get-version
steps:
- name: Set up Docker Buildx
Expand Down
15 changes: 2 additions & 13 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,9 @@ concurrency:
cancel-in-progress: false

jobs:
clean-storage:
name: "Clean docker image storage"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
steps:
- name: Clean storage
run: |
docker image prune --all -f --filter "until=48h"
docker system prune --all -f --filter "until=48h"

latest-cpu:
name: "Latest Accelerate CPU [dev]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand All @@ -41,8 +31,7 @@ jobs:

latest-cuda:
name: "Latest Accelerate GPU [dev]"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
runs-on: [self-hosted, docker-gpu]
steps:
muellerzr marked this conversation as resolved.
Show resolved Hide resolved
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand Down
23 changes: 15 additions & 8 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
env:
CUDA_VISIBLE_DEVICES: "0"
TEST_TYPE: "single_gpu"
Expand All @@ -22,37 +22,40 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone & pip install
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e . --no-deps
pip install pytest-reportlog tabulate

- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

run_all_tests_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci]
env:
CUDA_VISIBLE_DEVICES: "0,1"
TEST_TYPE: "multi_gpu"
Expand All @@ -61,38 +64,42 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e . --no-deps
pip install pytest-reportlog tabulate

- name: Run core and big modeling tests on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test_core
make test_big_modeling
make test_cli

- name: Run Integration tests on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
make test_integrations

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
Expand Down
59 changes: 34 additions & 25 deletions .github/workflows/run_merge_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,80 +10,89 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci]
env:
CUDA_VISIBLE_DEVICES: "0"
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone & pip install
- name: Install accelerate
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
pip install pytest-reportlog tabulate
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
pip install pytest-reportlog tabulate ;

- name: Run CLI tests
- name: Run CLI tests (use make cli)
working-directory: accelerate
run: |
source activate accelerate
source activate accelerate;
make test_cli

- name: Run test on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
source activate accelerate;
make test
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
source activate accelerate;
pip uninstall comet_ml -y;
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install tabulate
pip install tabulate;
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

run_all_tests_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
env:
CUDA_VISIBLE_DEVICES: 0,1
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
pip install pytest-reportlog tabulate

- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate
source activate accelerate;
make test

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
source activate accelerate;
pip uninstall comet_ml -y;
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
source activate accelerate;
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
44 changes: 22 additions & 22 deletions .github/workflows/self_hosted_integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
strategy:
fail-fast: false
matrix:
Expand All @@ -34,22 +34,22 @@ jobs:
"0,1"
]
steps:
- name: Update accelerate clone and pip install
working-directory: accelerate/
run:
- name: Install transformers
run: |
source activate accelerate;
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;
git clone https://github.com/huggingface/transformers --depth 1;
cd transformers;
pip install .[torch,deepspeed-testing];
cd ..;

- name: Update transformers clone & pip install
working-directory: transformers/
- name: Install accelerate
run: |
source activate accelerate
git config --global --add safe.directory '*'
git checkout main && git pull
pip install .[torch,deepspeed-testing]
pip uninstall comet_ml wandb -y
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }} ;
pip install -e .[testing];
cd ..;

- name: Show installed libraries
run: |
Expand Down Expand Up @@ -89,20 +89,20 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
strategy:
fail-fast: false
steps:
- name: Update accelerate clone and pip install
working-directory: accelerate/
- name: Install accelerate
run:
source activate accelerate;
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing];
cd ..

- name: Update skorch clone & pip install
working-directory: skorch/
- name: Install skorch
run: |
source activate accelerate
git config --global --add safe.directory '*'
Expand Down
Loading