huggingface · ydshieh · Sep 16, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml
@@ -12,9 +12,6 @@ on:
       slice_id:
         required: true
         type: number
-      runner_map:
-        required: false
-        type: string
       docker:
         required: true
         type: string
@@ -54,10 +51,12 @@ jobs:
       matrix:
         folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
     runs-on:
-      group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
+      group: '${{ inputs.machine_type }}'
     container:
       image: ${{ inputs.docker }}
       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      machine_type: ${{ steps.set_machine_type.outputs.machine_type }}
     steps:
       - name: Echo input and matrix info
         shell: bash
@@ -111,6 +110,7 @@ jobs:
         run: pip freeze
 
       - name: Set `machine_type` for report and artifact names
+        id: set_machine_type
         working-directory: /transformers
         shell: bash
         run: |
@@ -126,6 +126,7 @@ jobs:
 
           echo "$machine_type"
           echo "machine_type=$machine_type" >> $GITHUB_ENV
+          echo "machine_type=$machine_type" >> $GITHUB_OUTPUT
 
       - name: Run all tests on GPU
         working-directory: /transformers
@@ -159,5 +160,5 @@ jobs:
       job: run_models_gpu
       report_repo_id: ${{ inputs.report_repo_id }}
       gpu_name: ${{ inputs.runner_type }}
-      machine_type: ${{ inputs.machine_type }}
+      machine_type: ${{ needs.run_models_gpu.outputs.machine_type }}
     secrets: inherit
diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml
@@ -88,6 +88,7 @@ jobs:
       job: run_trainer_and_fsdp_gpu
       slack_report_channel: "#transformers-ci-daily-training"
       docker: huggingface/transformers-all-latest-gpu
+      runner_type: "a10"
       ci_event: Daily CI
       report_repo_id: hf-internal-testing/transformers_daily_ci
       commit_sha: ${{ github.sha }}

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
@@ -68,7 +68,6 @@ jobs:
     outputs:
       folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
       slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-      runner_map: ${{ steps.set-matrix.outputs.runner_map }}
       quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
     steps:
       - name: Update clone
@@ -95,7 +94,6 @@ jobs:
           if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
             echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
             echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-            echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
           elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
             echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
             echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
@@ -119,14 +117,13 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        machine_type: [single-gpu, multi-gpu]
+        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
         slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
     uses: ./.github/workflows/model_jobs.yml
     with:
       folder_slices: ${{ needs.setup.outputs.folder_slices }}
       machine_type: ${{ matrix.machine_type }}
       slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
       docker: ${{ inputs.docker }}
       commit_sha: ${{ inputs.commit_sha || github.sha }}
       runner_type: ${{ inputs.runner_type }}
@@ -147,9 +144,10 @@ jobs:
       folder_slices: ${{ needs.setup.outputs.folder_slices }}
       machine_type: ${{ matrix.machine_type }}
       slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
       docker: ${{ inputs.docker }}
       commit_sha: ${{ inputs.commit_sha || github.sha }}
+      runner_type: ${{ inputs.runner_type }}
+      report_repo_id: ${{ inputs.report_repo_id }}
       report_name_prefix: run_trainer_and_fsdp_gpu
     secrets: inherit
 

diff --git a/utils/get_runner_map.py b/utils/get_runner_map.py