From 2ce7e21adaee505c5d10b24067a7aefcf5e76d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 15 Oct 2025 12:12:00 +0200 Subject: [PATCH] [CI][Bench] Cleanup in benchmarks jobs and scripts --- .../workflows/sycl-benchmark-aggregate.yml | 52 ------------------- .../workflows/sycl-ur-perf-benchmarking.yml | 19 +++---- devops/scripts/benchmarks/CONTRIB.md | 4 +- devops/scripts/benchmarks/README.md | 28 +++++----- devops/scripts/benchmarks/benches/compute.py | 2 +- devops/scripts/benchmarks/compare.py | 2 +- devops/scripts/benchmarks/main.py | 2 +- devops/scripts/benchmarks/options.py | 5 ++ 8 files changed, 35 insertions(+), 79 deletions(-) delete mode 100644 .github/workflows/sycl-benchmark-aggregate.yml diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml deleted file mode 100644 index d417a0fe9893d..0000000000000 --- a/.github/workflows/sycl-benchmark-aggregate.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Aggregate compute-benchmark averages from historical data - -# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on -# how the benchmark results compare to a historical average: This historical -# average is calculated in this workflow, which aggregates historical data and -# produces measures of central tendency (median in this case) used for this -# purpose. - -on: - workflow_dispatch: - inputs: - lookback_days: - description: | - Number of days from today to look back in historical results for: - This sets the age limit of data used in average calculation: Any - benchmark results created before `lookback_days` from today is - excluded from being aggregated in the historical average. - type: number - required: true - workflow_call: - inputs: - lookback_days: - type: number - required: true - secrets: - LLVM_SYCL_BENCHMARK_TOKEN: - description: | - Github token used by the faceless account to push newly calculated - medians. - required: true - - -permissions: - contents: read - -jobs: - aggregate: - name: Aggregate average (median) value for all metrics - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - with: - sparse-checkout: | - devops/scripts/benchmarking - devops/benchmarking - devops/actions/benchmarking - - name: Aggregate benchmark results and produce historical average - uses: ./devops/actions/benchmarking/aggregate - with: - lookback_days: ${{ inputs.lookback_days }} - env: - GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index b5b14cf34ade1..c154bb53413cb 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -15,8 +15,8 @@ on: PR no. to build SYCL from if specified: SYCL will be built from HEAD of incoming branch used by the specified PR no. - If both pr_no and commit_hash are empty, the latest SYCL nightly build - will be used. + If both pr_no and commit_hash are empty, the latest commit in + deployment branch will be used. required: false default: '' commit_hash: @@ -64,33 +64,34 @@ on: pr_no: type: string description: | - PR no. to build SYCL from: - - SYCL will be built from HEAD of incoming branch. + PR no. to build SYCL from - it will be built from HEAD of incoming branch. + + Leave both pr_no and commit_hash empty to use the latest commit from branch/tag this workflow started from. required: false default: '' commit_hash: type: string description: | - Commit hash (within intel/llvm) to build SYCL from: + Commit hash (within intel/llvm) to build SYCL from. - Leave both pr_no and commit_hash empty to use latest commit. + Leave both pr_no and commit_hash empty to use the latest commit from branch/tag this workflow started from. required: false default: '' save_name: type: string description: | - Name to use for the benchmark result: + Name to use for the benchmark result required: false default: '' upload_results: - description: 'Save and upload results (to https://intel.github.io/llvm/benchmarks)' + description: Save and upload results (to https://intel.github.io/llvm/benchmarks) type: choice options: - false - true default: true runner: + description: Self-hosted runner to use for the benchmarks type: choice options: - '["PVC_PERF"]' diff --git a/devops/scripts/benchmarks/CONTRIB.md b/devops/scripts/benchmarks/CONTRIB.md index ed11f007a7fc6..7d1d1150e1353 100644 --- a/devops/scripts/benchmarks/CONTRIB.md +++ b/devops/scripts/benchmarks/CONTRIB.md @@ -2,7 +2,7 @@ ## Architecture -The suite is structured around three main components: Suites, Benchmarks, and Results. +The suite is structured around four main components: Suites, Benchmarks, Results, and BenchmarkMetadata. 1. **Suites:** * Collections of related benchmarks (e.g., `ComputeBench`, `LlamaCppBench`). @@ -170,7 +170,7 @@ The benchmark suite generates an interactive HTML dashboard that visualizes `Res * If adding to an existing category, modify the corresponding `Suite` class (e.g., `benches/compute.py`) to instantiate and return your new benchmark in its `benchmarks()` method. * If creating a new category, create a new `Suite` class inheriting from `benches.base.Suite`. Implement `name()` and `benchmarks()`. Add necessary `setup()` if the suite requires shared setup. Add group metadata via `additional_metadata()` if needed. 3. **Register Suite:** Import and add your new `Suite` instance to the `suites` list in `main.py`. -4. **Add to Presets:** If adding a new suite, add its `name()` to the relevant lists in `presets.py` (e.g., "Full", "Normal") so it runs with those presets. +4. **Add to Presets:** If adding a new suite, add its `name()` to the relevant lists in `presets.py` (e.g., "Full", "Normal") so it runs with those presets. Update `README.md` to include the new suite in presets' description. ## Recommendations diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md index 900826bac9de3..4f7341e3b1e8a 100644 --- a/devops/scripts/benchmarks/README.md +++ b/devops/scripts/benchmarks/README.md @@ -1,6 +1,6 @@ -# Unified Runtime Benchmark Runner +# SYCL and Unified Runtime Benchmark Runner -Scripts for running performance tests on SYCL and Unified Runtime. +Scripts for running benchmarks on SYCL and Unified Runtime. ## Benchmarks @@ -31,9 +31,9 @@ $ pip install -r requirements.txt $ ./main.py ~/benchmarks_workdir/ --sycl ~/llvm/build/ --ur ~/ur_install --adapter adapter_name ``` -This last command will **download and build** everything in `~/benchmarks_workdir/` -using the built compiler located in `~/llvm/build/`, -UR **install directory** from `~/ur`, +This last command will **download and build** everything in `~/benchmarks_workdir/` +using the built compiler located in `~/llvm/build/` and +installed Unified Runtime in directory `~/ur_install`, and then **run** the benchmarks for `adapter_name` adapter. >NOTE: By default `level_zero` adapter is used. @@ -41,9 +41,11 @@ and then **run** the benchmarks for `adapter_name` adapter. >NOTE: Pay attention to the `--ur` parameter. It points directly to the directory where UR is installed. To install Unified Runtime in the predefined location, use the `-DCMAKE_INSTALL_PREFIX`. -UR build example: +UR build and install example: ``` $ cmake -DCMAKE_BUILD_TYPE=Release -S~/llvm/unified-runtime -B~/ur_build -DCMAKE_INSTALL_PREFIX=~/ur_install -DUR_BUILD_ADAPTER_L0=ON -DUR_BUILD_ADAPTER_L0_V2=ON +$ cmake --build ~/ur_build -j $(nproc) +$ cmake --install ~/ur_build ``` ### Rebuild @@ -95,11 +97,12 @@ In addition to the above parameters, there are also additional options that help `--preset