From 2ce7e21adaee505c5d10b24067a7aefcf5e76d31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= <lukasz.stolarczuk@intel.com>
Date: Wed, 15 Oct 2025 12:12:00 +0200
Subject: [PATCH] [CI][Bench] Cleanup in benchmarks jobs and scripts

---
 .../workflows/sycl-benchmark-aggregate.yml    | 52 -------------------
 .../workflows/sycl-ur-perf-benchmarking.yml   | 19 +++----
 devops/scripts/benchmarks/CONTRIB.md          |  4 +-
 devops/scripts/benchmarks/README.md           | 28 +++++-----
 devops/scripts/benchmarks/benches/compute.py  |  2 +-
 devops/scripts/benchmarks/compare.py          |  2 +-
 devops/scripts/benchmarks/main.py             |  2 +-
 devops/scripts/benchmarks/options.py          |  5 ++
 8 files changed, 35 insertions(+), 79 deletions(-)
 delete mode 100644 .github/workflows/sycl-benchmark-aggregate.yml

diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
deleted file mode 100644
index d417a0fe9893d..0000000000000
--- a/.github/workflows/sycl-benchmark-aggregate.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-name: Aggregate compute-benchmark averages from historical data
-
-# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
-# how the benchmark results compare to a historical average: This historical
-# average is calculated in this workflow, which aggregates historical data and
-# produces measures of central tendency (median in this case) used for this
-# purpose.
-
-on:
-  workflow_dispatch:
-    inputs:
-      lookback_days:
-        description: |
-          Number of days from today to look back in historical results for:
-          This sets the age limit of data used in average calculation: Any
-          benchmark results created before `lookback_days` from today is
-          excluded from being aggregated in the historical average. 
-        type: number
-        required: true
-  workflow_call:
-    inputs:
-      lookback_days:
-        type: number
-        required: true
-    secrets:
-      LLVM_SYCL_BENCHMARK_TOKEN:
-        description: |
-          Github token used by the faceless account to push newly calculated
-          medians.
-        required: true
-
-
-permissions:
-  contents: read
-
-jobs:
-  aggregate:
-    name: Aggregate average (median) value for all metrics
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v5
-      with:
-        sparse-checkout: |
-          devops/scripts/benchmarking
-          devops/benchmarking
-          devops/actions/benchmarking
-    - name: Aggregate benchmark results and produce historical average
-      uses: ./devops/actions/benchmarking/aggregate
-      with:
-        lookback_days: ${{ inputs.lookback_days }}
-      env:
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index b5b14cf34ade1..c154bb53413cb 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -15,8 +15,8 @@ on:
           PR no. to build SYCL from if specified: SYCL will be built from HEAD
           of incoming branch used by the specified PR no.
 
-          If both pr_no and commit_hash are empty, the latest SYCL nightly build
-          will be used.
+          If both pr_no and commit_hash are empty, the latest commit in
+          deployment branch will be used.
         required: false
         default: ''
       commit_hash:
@@ -64,33 +64,34 @@ on:
       pr_no:
         type: string
         description: |
-          PR no. to build SYCL from:
-            
-          SYCL will be built from HEAD of incoming branch.
+          PR no. to build SYCL from - it will be built from HEAD of incoming branch.
+
+          Leave both pr_no and commit_hash empty to use the latest commit from branch/tag this workflow started from.
         required: false
         default: ''
       commit_hash:
         type: string
         description: |
-          Commit hash (within intel/llvm) to build SYCL from:
+          Commit hash (within intel/llvm) to build SYCL from.
 
-          Leave both pr_no and commit_hash empty to use latest commit.
+          Leave both pr_no and commit_hash empty to use the latest commit from branch/tag this workflow started from.
         required: false
         default: ''
       save_name:
         type: string
         description: |
-          Name to use for the benchmark result:
+          Name to use for the benchmark result
         required: false
         default: ''
       upload_results:
-        description: 'Save and upload results (to https://intel.github.io/llvm/benchmarks)'
+        description: Save and upload results (to https://intel.github.io/llvm/benchmarks)
         type: choice
         options:
           - false
           - true
         default: true
       runner:
+        description: Self-hosted runner to use for the benchmarks
         type: choice
         options:
           - '["PVC_PERF"]'
diff --git a/devops/scripts/benchmarks/CONTRIB.md b/devops/scripts/benchmarks/CONTRIB.md
index ed11f007a7fc6..7d1d1150e1353 100644
--- a/devops/scripts/benchmarks/CONTRIB.md
+++ b/devops/scripts/benchmarks/CONTRIB.md
@@ -2,7 +2,7 @@
 
 ## Architecture
 
-The suite is structured around three main components: Suites, Benchmarks, and Results.
+The suite is structured around four main components: Suites, Benchmarks, Results, and BenchmarkMetadata.
 
 1. **Suites:**
     * Collections of related benchmarks (e.g., `ComputeBench`, `LlamaCppBench`).
@@ -170,7 +170,7 @@ The benchmark suite generates an interactive HTML dashboard that visualizes `Res
     * If adding to an existing category, modify the corresponding `Suite` class (e.g., `benches/compute.py`) to instantiate and return your new benchmark in its `benchmarks()` method.
     * If creating a new category, create a new `Suite` class inheriting from `benches.base.Suite`. Implement `name()` and `benchmarks()`. Add necessary `setup()` if the suite requires shared setup. Add group metadata via `additional_metadata()` if needed.
 3. **Register Suite:** Import and add your new `Suite` instance to the `suites` list in `main.py`.
-4. **Add to Presets:** If adding a new suite, add its `name()` to the relevant lists in `presets.py` (e.g., "Full", "Normal") so it runs with those presets.
+4. **Add to Presets:** If adding a new suite, add its `name()` to the relevant lists in `presets.py` (e.g., "Full", "Normal") so it runs with those presets. Update `README.md` to include the new suite in presets' description.
 
 ## Recommendations
 
diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
index 900826bac9de3..4f7341e3b1e8a 100644
--- a/devops/scripts/benchmarks/README.md
+++ b/devops/scripts/benchmarks/README.md
@@ -1,6 +1,6 @@
-# Unified Runtime Benchmark Runner
+# SYCL and Unified Runtime Benchmark Runner
 
-Scripts for running performance tests on SYCL and Unified Runtime.
+Scripts for running benchmarks on SYCL and Unified Runtime.
 
 ## Benchmarks
 
@@ -31,9 +31,9 @@ $ pip install -r requirements.txt
 $ ./main.py ~/benchmarks_workdir/ --sycl ~/llvm/build/ --ur ~/ur_install --adapter adapter_name
 ```
 
-This last command will **download and build** everything in `~/benchmarks_workdir/`  
-using the built compiler located in `~/llvm/build/`,  
-UR **install directory** from `~/ur`,  
+This last command will **download and build** everything in `~/benchmarks_workdir/`
+using the built compiler located in `~/llvm/build/` and
+installed Unified Runtime in directory `~/ur_install`,
 and then **run** the benchmarks for `adapter_name` adapter.
 
 >NOTE: By default `level_zero` adapter is used.
@@ -41,9 +41,11 @@ and then **run** the benchmarks for `adapter_name` adapter.
 >NOTE: Pay attention to the `--ur` parameter. It points directly to the directory where UR is installed.  
 To install Unified Runtime in the predefined location, use the `-DCMAKE_INSTALL_PREFIX`.
 
-UR build example:
+UR build and install example:
 ```
 $ cmake -DCMAKE_BUILD_TYPE=Release -S~/llvm/unified-runtime -B~/ur_build -DCMAKE_INSTALL_PREFIX=~/ur_install -DUR_BUILD_ADAPTER_L0=ON -DUR_BUILD_ADAPTER_L0_V2=ON
+$ cmake --build ~/ur_build -j $(nproc)
+$ cmake --install ~/ur_build
 ```
 
 ### Rebuild
@@ -95,11 +97,12 @@ In addition to the above parameters, there are also additional options that help
 `--preset <option>` - limits the types of benchmarks that are run.
 
 The available benchmarks options are:
-* `Full` (Compute, Gromacs, llama, SYCL, Velocity and UMF benchmarks)
+* `Full` (BenchDNN, Compute, Gromacs, llama, SYCL, Velocity and UMF benchmarks)
 * `SYCL` (Compute, llama, SYCL, Velocity)
 * `Minimal` (Compute)
-* `Normal` (Compute, Gromacs, llama, Velocity)
+* `Normal` (BenchDNN, Compute, Gromacs, llama, Velocity)
 * `Gromacs` (Gromacs)
+* `OneDNN` (BenchDNN)
 * `Test` (Test Suite)
 
 `--filter <regex>` - allows to set the regex pattern to filter benchmarks by name.
@@ -108,18 +111,17 @@ For example `--filter "graph_api_*"`
 
 ## Running in CI
 
-The benchmarks scripts are used in a GitHub Actions worflow, and can be automatically executed on a preconfigured system against any Pull Request.
+The benchmarks scripts are used in a GitHub Actions workflow, and can be automatically executed on a preconfigured system against any Pull Request.
 
 ![compute benchmarks](workflow.png "Compute Benchmarks CI job")
 
-To execute the benchmarks in CI, navigate to the `Actions` tab and then go to the `Compute Benchmarks` action. Here, you will find a list of previous runs and a "Run workflow" button. Upon clicking the button, you will be prompted to fill in a form to customize your benchmark run. The only mandatory field is the `PR number`, which is the identifier for the Pull Request against which you want the benchmarks to run.
+To execute the benchmarks in CI, navigate to the `Actions` tab and then go to the `Run Benchmarks` workflow. Here, you will find a list of previous runs and a "Run workflow" button. Upon clicking the button, you will be prompted to fill in a form to customize your benchmark run. Important field is the `PR number`, which is the identifier for the Pull Request against which you want the benchmarks to run. Instead, you can specify `Commit hash` from within intel/llvm repository, or leave both empty to run benchmarks against the branch/tag the workflow started from (the value from dropdown list at the top).
 
-You can also include additional benchmark parameters, such as environment variables or filters. For a complete list of options, refer to `$ ./main.py --help`.
-
-Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
+Once all the information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
 
 >NOTE: You must be a member of the `oneapi-src` organization to access these features.
 
+## Requirements
 ### System
 
 Sobel Filter benchmark:
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index aafdda370edb7..3eab381e7cf16 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -414,7 +414,7 @@ def run(
         ret = []
         for label, median, stddev, unit in parsed_results:
             extra_label = " CPU count" if parse_unit_type(unit) == "instr" else ""
-            # Note: SYCL CI currently parses for on this "CPU count" value.
+            # Note: SYCL CI currently relies on this "CPU count" value.
             # Please update /devops/scripts/benchmarks/compare.py if this value
             # is changed. See compare.py usage (w.r.t. --regression-filter) in
             # /devops/actions/run-tests/benchmarks/action.yml.
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index f8ca7a354d91b..e886dfdc0060d 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -321,7 +321,7 @@ def to_hist(
         "--compare-file",
         type=str,
         required=True,
-        help="Result file to compare against te historic average",
+        help="Result file to compare against the historic average",
     )
     parser_avg.add_argument(
         "--results-dir", type=str, required=True, help="Directory storing results"
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 7bf6919915cc4..e24920bdec3a8 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -507,7 +507,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--compare-max",
         type=int,
-        help="How many results to read for comparisions",
+        help="How many results to read for comparisons",
         default=options.compare_max,
     )
     parser.add_argument(
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index f17d96313badb..d888774710a8c 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,3 +1,8 @@
+# Copyright (C) 2025 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
 from dataclasses import dataclass, field
 from enum import Enum
 import multiprocessing