diff --git a/.github/workflows/execute-test-script.yml b/.github/workflows/execute-test-script.yml index 422b8ef..20ac472 100644 --- a/.github/workflows/execute-test-script.yml +++ b/.github/workflows/execute-test-script.yml @@ -30,11 +30,6 @@ on: required: true default: spr type: string - shutdown_cloud_runner: - description: Whether to shutdown AWS cloud runner - required: false - default: false - type: boolean test_script: description: Test script to run required: false @@ -57,7 +52,7 @@ jobs: runner_labels: ${{ steps.set_up_vars.outputs.runner_labels }} results_name: ${{ steps.set_up_vars.outputs.results_name }} steps: - - name: Get conda env and results name for AWS runners + - name: Get conda env and results name for cloud runners id: set_up_vars shell: bash -el {0} run: | @@ -139,24 +134,19 @@ jobs: fi - - name: Upload results.db to artifacts when running in AWS - if: contains(fromJSON(needs.set_up_vars.outputs.runner_labels), 'aws') + - name: Upload results.db to artifacts when running in the cloud + if: ${{ inputs.runner_type != 'spr' }} uses: actions/upload-artifact@v3 with: name: ${{ needs.set_up_vars.outputs.results_name }} path: results.db - - name: Shutdown AWS cloud runner - if: contains(fromJSON(needs.set_up_vars.outputs.runner_labels), 'aws') && inputs.shutdown_cloud_runner - shell: bash -el {0} - run: sudo shutdown -h +2 - parse_results: runs-on: [self-hosted, glados, spr] needs: - set_up_vars - mlp_test - if: contains(fromJSON(needs.set_up_vars.outputs.runner_labels), 'aws') + if: ${{ inputs.runner_type != 'spr' }} steps: - uses: actions/checkout@v4 - name: Download results.db from artifacts @@ -170,7 +160,7 @@ jobs: run: | ls -l - sudo apt update && sudo apt install sqlite3 + sudo apt update && sudo apt install -y sqlite3 ./db_tools/export_sqlite2csv.sh source ${CONDA}/bin/activate diff --git a/dl_bench/utils.py b/dl_bench/utils.py index 9300fa2..4d5f673 100644 --- a/dl_bench/utils.py +++ b/dl_bench/utils.py @@ -152,6 +152,10 @@ def to_device(self, x: torch.Tensor): else: raise ValueError("Unknown device") + def sync(self): + if self.device_name == 'cuda': + torch.cuda.synchronize() + def prepare_eval_transformer(self, model): model = model.to(memory_format=torch.channels_last) @@ -390,6 +394,7 @@ def inference(self, backend: Backend): # Duration is inconsistent now with tm.timeit("duration_s"): for i, x in enumerate(test_loader): + backend.sync() s = get_time() x = backend.to_device(x) if backend.dtype != torch.float32: @@ -405,6 +410,7 @@ def inference(self, backend: Backend): start = time.perf_counter() continue + backend.sync() fw_times.append(get_time() - s) n_items += len(x) outputs.append(y)