Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "New CI Runners" #2172

Merged
merged 1 commit into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build-docker-images-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

version-cpu:
name: "Latest Accelerate CPU [version]"
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: get-version
steps:
- name: Set up Docker Buildx
Expand All @@ -41,7 +41,7 @@ jobs:

version-cuda:
name: "Latest Accelerate GPU [version]"
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: get-version
steps:
- name: Set up Docker Buildx
Expand Down
15 changes: 13 additions & 2 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,19 @@ concurrency:
cancel-in-progress: false

jobs:
clean-storage:
name: "Clean docker image storage"
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
steps:
- name: Clean storage
run: |
docker image prune --all -f --filter "until=48h"
docker system prune --all -f --filter "until=48h"

latest-cpu:
name: "Latest Accelerate CPU [dev]"
runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand All @@ -31,7 +41,8 @@ jobs:

latest-cuda:
name: "Latest Accelerate GPU [dev]"
runs-on: [self-hosted, nvidia-gpu, t4, daily-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
needs: clean-storage
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
Expand Down
23 changes: 8 additions & 15 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
env:
CUDA_VISIBLE_DEVICES: "0"
TEST_TYPE: "single_gpu"
Expand All @@ -22,40 +22,37 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone & pip install
run: |
source activate accelerate
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e . --no-deps
pip install pytest-reportlog tabulate

- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

run_all_tests_multi_gpu:
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
env:
CUDA_VISIBLE_DEVICES: "0,1"
TEST_TYPE: "multi_gpu"
Expand All @@ -64,42 +61,38 @@ jobs:
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e . --no-deps
pip install pytest-reportlog tabulate

- name: Run core and big modeling tests on GPUs
working-directory: accelerate
run: |
source activate accelerate
make test_core
make test_big_modeling
make test_cli

- name: Run Integration tests on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
make test_integrations

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install slack_sdk tabulate
Expand Down
59 changes: 25 additions & 34 deletions .github/workflows/run_merge_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,89 +10,80 @@ env:

jobs:
run_all_tests_single_gpu:
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
env:
CUDA_VISIBLE_DEVICES: "0"
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Install accelerate
- name: Update clone & pip install
run: |
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
pip install pytest-reportlog tabulate ;
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
pip install pytest-reportlog tabulate

- name: Run CLI tests (use make cli)
working-directory: accelerate
- name: Run CLI tests
run: |
source activate accelerate;
source activate accelerate
make test_cli

- name: Run test on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate;
source activate accelerate
make test
- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate;
pip uninstall comet_ml -y;
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
pip install tabulate;
pip install tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

run_all_tests_multi_gpu:
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
env:
CUDA_VISIBLE_DEVICES: 0,1
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
defaults:
run:
working-directory: accelerate/
shell: bash
steps:
- name: Update clone
run: |
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing,test_trackers] -U;
source activate accelerate
git config --global --add safe.directory '*'
git fetch && git checkout ${{ github.sha }}
pip install -e .[testing,test_trackers] -U
pip install pytest-reportlog tabulate

- name: Run test on GPUs
working-directory: accelerate
run: |
source activate accelerate;
source activate accelerate
make test

- name: Run examples on GPUs
working-directory: accelerate
if: always()
run: |
source activate accelerate;
pip uninstall comet_ml -y;
source activate accelerate
pip uninstall comet_ml -y
make test_examples

- name: Generate Report
working-directory: accelerate
if: always()
run: |
source activate accelerate;
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
pip install tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
44 changes: 22 additions & 22 deletions .github/workflows/self_hosted_integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
strategy:
fail-fast: false
matrix:
Expand All @@ -34,22 +34,22 @@ jobs:
"0,1"
]
steps:
- name: Install transformers
run: |
- name: Update accelerate clone and pip install
working-directory: accelerate/
run:
source activate accelerate;
git clone https://github.com/huggingface/transformers --depth 1;
cd transformers;
pip install .[torch,deepspeed-testing];
cd ..;
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;

- name: Install accelerate
- name: Update transformers clone & pip install
working-directory: transformers/
run: |
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }} ;
pip install -e .[testing];
cd ..;
source activate accelerate
git config --global --add safe.directory '*'
git checkout main && git pull
pip install .[torch,deepspeed-testing]
pip uninstall comet_ml wandb -y

- name: Show installed libraries
run: |
Expand Down Expand Up @@ -89,20 +89,20 @@ jobs:
container:
image: huggingface/accelerate-gpu:latest
options: --gpus all --shm-size "16gb"
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
strategy:
fail-fast: false
steps:
- name: Install accelerate
- name: Update accelerate clone and pip install
working-directory: accelerate/
run:
source activate accelerate;
git clone https://github.com/huggingface/accelerate;
cd accelerate;
git checkout ${{ github.sha }};
pip install -e .[testing];
cd ..
git config --global --add safe.directory '*';
git checkout main && git fetch && git checkout ${{ github.sha }};
pip install -e .;

- name: Install skorch
- name: Update skorch clone & pip install
working-directory: skorch/
run: |
source activate accelerate
git config --global --add safe.directory '*'
Expand Down
Loading