Revert "New CI Runners (#2087)" (#2172)

This reverts commit ca300c0.
huggingface · Nov 20, 2023 · 2b25b8b · 2b25b8b
1 parent ca300c0
commit 2b25b8b
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 75 deletions.
diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml
@@ -21,7 +21,7 @@ jobs:
 
   version-cpu:
     name: "Latest Accelerate CPU [version]"
-    runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     needs: get-version
     steps:
       - name: Set up Docker Buildx
@@ -41,7 +41,7 @@ jobs:
 
   version-cuda:
     name: "Latest Accelerate GPU [version]"
-    runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     needs: get-version
     steps:
       - name: Set up Docker Buildx

diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml
@@ -11,9 +11,19 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
+  clean-storage:
+    name: "Clean docker image storage"
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
+    steps:
+      - name: Clean storage
+        run: |
+          docker image prune --all -f --filter "until=48h"
+          docker system prune --all -f --filter "until=48h"
+
   latest-cpu:
     name: "Latest Accelerate CPU [dev]"
-    runs-on: [self-hosted, intel-cpu, 8-cpu, ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
+    needs: clean-storage
     steps:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v2
@@ -31,7 +41,8 @@ jobs:
 
   latest-cuda:
     name: "Latest Accelerate GPU [dev]"
-    runs-on: [self-hosted, nvidia-gpu, t4, daily-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
+    needs: clean-storage
     steps:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v2

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -13,7 +13,7 @@ env:
 
 jobs:
   run_all_tests_single_gpu:
-    runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, daily-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     env:
       CUDA_VISIBLE_DEVICES: "0"
       TEST_TYPE: "single_gpu"
@@ -22,40 +22,37 @@ jobs:
       options: --gpus all --shm-size "16gb"
     defaults:
       run:
+        working-directory: accelerate/
         shell: bash
     steps:
       - name: Update clone & pip install
         run: |
           source activate accelerate
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
+          git config --global --add safe.directory '*'
+          git fetch && git checkout ${{ github.sha }} 
           pip install -e . --no-deps
           pip install pytest-reportlog tabulate
 
       - name: Run test on GPUs
-        working-directory: accelerate
         run: |
           source activate accelerate
           make test
           
       - name: Run examples on GPUs
-        working-directory: accelerate
         if: always()
         run: |
           source activate accelerate
           pip uninstall comet_ml -y
           make test_examples
           
       - name: Generate Report
-        working-directory: accelerate
         if: always()
         run: |
           pip install slack_sdk tabulate
           python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_all_tests_multi_gpu:
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, daily-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     env:
       CUDA_VISIBLE_DEVICES: "0,1"
       TEST_TYPE: "multi_gpu"
@@ -64,42 +61,38 @@ jobs:
       options: --gpus all --shm-size "16gb"
     defaults:
       run:
+        working-directory: accelerate/
         shell: bash
     steps:
       - name: Update clone
         run: |
           source activate accelerate
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
+          git config --global --add safe.directory '*'
+          git fetch && git checkout ${{ github.sha }}
           pip install -e . --no-deps
           pip install pytest-reportlog tabulate
 
       - name: Run core and big modeling tests on GPUs
-        working-directory: accelerate
         run: |
           source activate accelerate
           make test_core
           make test_big_modeling
           make test_cli
 
       - name: Run Integration tests on GPUs
-        working-directory: accelerate
         if: always()
         run: |
           source activate accelerate
           make test_integrations
 
       - name: Run examples on GPUs
-        working-directory: accelerate
         if: always()
         run: |
           source activate accelerate
           pip uninstall comet_ml -y
           make test_examples
 
       - name: Generate Report
-        working-directory: accelerate
         if: always()
         run: |
           pip install slack_sdk tabulate

diff --git a/.github/workflows/run_merge_tests.yml b/.github/workflows/run_merge_tests.yml
@@ -10,89 +10,80 @@ env:
 
 jobs:
   run_all_tests_single_gpu:
-    runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, push-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     env:
       CUDA_VISIBLE_DEVICES: "0"
     container:
       image: huggingface/accelerate-gpu:latest
       options: --gpus all --shm-size "16gb"
     defaults:
       run:
+        working-directory: accelerate/
         shell: bash
     steps:
-      - name: Install accelerate
+      - name: Update clone & pip install
         run: |
-          source activate accelerate;
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e .[testing,test_trackers] -U;
-          pip install pytest-reportlog tabulate  ;
+          source activate accelerate
+          git config --global --add safe.directory '*'
+          git fetch && git checkout ${{ github.sha }}
+          pip install -e .[testing,test_trackers] -U
+          pip install pytest-reportlog tabulate
 
-      - name: Run CLI tests (use make cli)
-        working-directory: accelerate
+      - name: Run CLI tests
         run: |
-          source activate accelerate;
+          source activate accelerate
           make test_cli
           
       - name: Run test on GPUs
-        working-directory: accelerate
         if: always()
         run: |
-          source activate accelerate;
+          source activate accelerate
           make test
       - name: Run examples on GPUs
-        working-directory: accelerate
         if: always()
         run: |
-          source activate accelerate;
-          pip uninstall comet_ml -y;
+          source activate accelerate
+          pip uninstall comet_ml -y
           make test_examples
 
       - name: Generate Report
-        working-directory: accelerate
         if: always()
         run: |
-          pip install tabulate;
+          pip install tabulate
           python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_all_tests_multi_gpu:
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
-    env:
-      CUDA_VISIBLE_DEVICES: 0,1
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     container:
       image: huggingface/accelerate-gpu:latest
       options: --gpus all --shm-size "16gb"
     defaults:
       run:
+        working-directory: accelerate/
         shell: bash
     steps:
       - name: Update clone
         run: |
-          source activate accelerate;
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e .[testing,test_trackers] -U;
+          source activate accelerate
+          git config --global --add safe.directory '*'
+          git fetch && git checkout ${{ github.sha }}
+          pip install -e .[testing,test_trackers] -U
           pip install pytest-reportlog tabulate
 
       - name: Run test on GPUs
-        working-directory: accelerate
         run: |
-          source activate accelerate;
+          source activate accelerate
           make test
 
       - name: Run examples on GPUs
-        working-directory: accelerate
         if: always()
         run: |
-          source activate accelerate;
-          pip uninstall comet_ml -y;
+          source activate accelerate
+          pip uninstall comet_ml -y
           make test_examples
 
       - name: Generate Report
-        working-directory: accelerate
         if: always()
         run: |
-          source activate accelerate;
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+          pip install tabulate
+          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/self_hosted_integration_tests.yml b/.github/workflows/self_hosted_integration_tests.yml
@@ -25,7 +25,7 @@ jobs:
     container:
       image: huggingface/accelerate-gpu:latest
       options: --gpus all --shm-size "16gb"
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     strategy:
       fail-fast: false
       matrix:
@@ -34,22 +34,22 @@ jobs:
           "0,1"
         ]
     steps:
-      - name: Install transformers
-        run: |
+      - name: Update accelerate clone and pip install
+        working-directory: accelerate/
+        run: 
           source activate accelerate;
-          git clone https://github.com/huggingface/transformers --depth 1;
-          cd transformers;
-          pip install .[torch,deepspeed-testing];
-          cd ..;
+          git config --global --add safe.directory '*';
+          git checkout main && git fetch && git checkout ${{ github.sha }};
+          pip install -e .;
 
-      - name: Install accelerate
+      - name: Update transformers clone & pip install
+        working-directory: transformers/
         run: |
-          source activate accelerate;
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }} ;
-          pip install -e .[testing];
-          cd ..;
+          source activate accelerate
+          git config --global --add safe.directory '*'
+          git checkout main && git pull
+          pip install .[torch,deepspeed-testing]
+          pip uninstall comet_ml wandb -y
       
       - name: Show installed libraries
         run: |
@@ -89,20 +89,20 @@ jobs:
     container:
       image: huggingface/accelerate-gpu:latest
       options: --gpus all --shm-size "16gb"
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, push-ci]
+    runs-on: [self-hosted, docker-gpu, multi-gpu, gcp]
     strategy:
       fail-fast: false
     steps:
-      - name: Install accelerate
+      - name: Update accelerate clone and pip install
+        working-directory: accelerate/
         run: 
           source activate accelerate;
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e .[testing];
-          cd ..
+          git config --global --add safe.directory '*';
+          git checkout main && git fetch && git checkout ${{ github.sha }};
+          pip install -e .;
 
-      - name: Install skorch
+      - name: Update skorch clone & pip install
+        working-directory: skorch/
         run: |
           source activate accelerate
           git config --global --add safe.directory '*'