From 21fa7fb1569e02b253b37d1b8e7c6c15eed49501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Mon, 20 Oct 2025 18:08:58 +0000 Subject: [PATCH 1/9] [Benchmarks] Pin benchmarks to small set of cores For better results stability, pin benchmark binaries to four cores with the maximum available frequency. --- devops/actions/run-tests/benchmark/action.yml | 4 ++-- devops/scripts/benchmarks/benches/compute.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 6c070f83470c2..b14bfb52af540 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -79,14 +79,14 @@ runs: shell: bash run: | # Compute the core range for the first NUMA node; second node is used by - # UMF. Skip the first 4 cores as the kernel is likely to schedule more + # UMF. Skip the first 3 cores as the kernel is likely to schedule more # work on these. CORES="$(lscpu | awk ' /NUMA node0 CPU|On-line CPU/ {line=$0} END { split(line, a, " ") split(a[4], b, ",") - sub(/^0/, "4", b[1]) + sub(/^0/, "3", b[1]) print b[1] }')" echo "CPU core range to use: $CORES" diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 6a2aea460ddb9..fabf422130ee7 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -10,6 +10,7 @@ from enum import Enum from itertools import product from pathlib import Path +from psutil import Process from git_project import GitProject from options import options @@ -417,6 +418,24 @@ def run( command += self.bin_args(run_trace) env_vars.update(self.extra_env_vars()) + # Pin compute benchmarks to a CPU cores set to ensure consistent results + # and non-zero CPU count measurements (e.g. avoid E-cores). 4 max freq cores + # are pinned by default to satisfy multiple threads benchmarks. + available_cores = Process().cpu_affinity() + # Get 4 cores with the highest available frequency. + core_frequencies = [] + for core in available_cores: + with open( + f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq" + ) as f: + freq = int(f.read().strip()) + core_frequencies.append((core, freq)) + core_frequencies.sort(key=lambda x: x[1], reverse=True) + available_cores = [core for core, _ in core_frequencies[:4]] + cores_list = ",".join([str(core) for core in available_cores]) + + command = ["taskset", "-c", cores_list] + command + result = self.run_bench( command, env_vars, run_trace=run_trace, force_trace=force_trace ) From 2b495faac02c5384e1db3b2d9be396b4579504e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Wed, 22 Oct 2025 10:32:37 +0000 Subject: [PATCH 2/9] Pin benchmarks to 4 cores in all suites --- devops/scripts/benchmarks/benches/base.py | 33 +++++++++++++++++--- devops/scripts/benchmarks/benches/compute.py | 20 ------------ 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index bbbedaf629bf5..d6554942182e8 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -6,15 +6,18 @@ import os import shutil import subprocess -from pathlib import Path +from abc import ABC, abstractmethod from enum import Enum -from utils.result import BenchmarkMetadata, BenchmarkTag, Result +from pathlib import Path + +from psutil import Process + from options import options -from utils.utils import download, run -from abc import ABC, abstractmethod -from utils.unitrace import get_unitrace from utils.flamegraph import get_flamegraph from utils.logger import log +from utils.result import BenchmarkMetadata, BenchmarkTag, Result +from utils.unitrace import get_unitrace +from utils.utils import download, run class TracingType(Enum): @@ -167,6 +170,8 @@ def run_bench( log.debug(f"FlameGraph perf data: {perf_data_file}") log.debug(f"FlameGraph command: {' '.join(command)}") + command = self.taskset_cmd() + command + try: result = run( command=command, @@ -268,6 +273,24 @@ def get_metadata(self) -> dict[str, BenchmarkMetadata]: ) } + def taskset_cmd(self) -> list[str]: + """Returns a list of strings with taskset usage for core pinning. + Pin compute benchmarks to a CPU cores set to ensure consistent results + and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores + with the maximum frequency are pinned by default to satisfy multiple threads benchmarks. + """ + available_cores = Process().cpu_affinity() + core_frequencies = [] + for core in available_cores: # type: ignore + with open( + f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq" + ) as f: + freq = int(f.read().strip()) + core_frequencies.append((core, freq)) + core_frequencies.sort(key=lambda x: x[1], reverse=True) + cores_list = ",".join([str(core) for core, _ in core_frequencies[:4]]) + return ["taskset", "-c", cores_list] + class Suite(ABC): @abstractmethod diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index fabf422130ee7..2bb08402fd533 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -3,14 +3,12 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import copy import csv import io import math from enum import Enum from itertools import product from pathlib import Path -from psutil import Process from git_project import GitProject from options import options @@ -418,24 +416,6 @@ def run( command += self.bin_args(run_trace) env_vars.update(self.extra_env_vars()) - # Pin compute benchmarks to a CPU cores set to ensure consistent results - # and non-zero CPU count measurements (e.g. avoid E-cores). 4 max freq cores - # are pinned by default to satisfy multiple threads benchmarks. - available_cores = Process().cpu_affinity() - # Get 4 cores with the highest available frequency. - core_frequencies = [] - for core in available_cores: - with open( - f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq" - ) as f: - freq = int(f.read().strip()) - core_frequencies.append((core, freq)) - core_frequencies.sort(key=lambda x: x[1], reverse=True) - available_cores = [core for core, _ in core_frequencies[:4]] - cores_list = ",".join([str(core) for core in available_cores]) - - command = ["taskset", "-c", cores_list] + command - result = self.run_bench( command, env_vars, run_trace=run_trace, force_trace=force_trace ) From 670adfcfd797c5a6f86ba0408e514ff137dd23f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Wed, 22 Oct 2025 15:50:57 +0000 Subject: [PATCH 3/9] Review updates --- devops/scripts/benchmarks/benches/base.py | 8 ++++++-- devops/scripts/benchmarks/requirements.txt | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index d6554942182e8..ec85b0562a53f 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -287,8 +287,12 @@ def taskset_cmd(self) -> list[str]: ) as f: freq = int(f.read().strip()) core_frequencies.append((core, freq)) - core_frequencies.sort(key=lambda x: x[1], reverse=True) - cores_list = ",".join([str(core) for core, _ in core_frequencies[:4]]) + selected = core_frequencies[:4] # first ones have highest frequency + if len({freq for _, freq in selected}) > 1: + log.warning( + f"Selected cores for pinning have differing max frequencies: {selected}" + ) + cores_list = ",".join([str(core) for core, _ in selected]) return ["taskset", "-c", cores_list] diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt index 9283c797eaf47..d9c7264a92c6e 100644 --- a/devops/scripts/benchmarks/requirements.txt +++ b/devops/scripts/benchmarks/requirements.txt @@ -3,3 +3,4 @@ mpld3==0.5.10 dataclasses-json==0.6.7 PyYAML==6.0.1 Mako==1.3.0 +psutil>=7.0.0 From a0aacd5c24ab6e17a02761b4211424f820bdee06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Fri, 24 Oct 2025 10:23:27 +0000 Subject: [PATCH 4/9] taskset_cmd refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Ślusarczyk --- devops/scripts/benchmarks/benches/base.py | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index ec85b0562a53f..5707929b12898 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -277,23 +277,22 @@ def taskset_cmd(self) -> list[str]: """Returns a list of strings with taskset usage for core pinning. Pin compute benchmarks to a CPU cores set to ensure consistent results and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores - with the maximum frequency are pinned by default to satisfy multiple threads benchmarks. + are pinned by default to satisfy multiple threads benchmarks. It is assumed + that they have the maximum, or at least the same, frequency. """ - available_cores = Process().cpu_affinity() - core_frequencies = [] - for core in available_cores: # type: ignore - with open( - f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq" - ) as f: - freq = int(f.read().strip()) - core_frequencies.append((core, freq)) - selected = core_frequencies[:4] # first ones have highest frequency - if len({freq for _, freq in selected}) > 1: + get_core_frequency = ( + lambda num: open( + f"/sys/devices/system/cpu/cpu{num}/cpufreq/cpuinfo_max_freq" + ) + .read() + .strip() + ) + selected_cores = [str(core) for core in Process().cpu_affinity()[:4]] # type: ignore + if len({get_core_frequency(core) for core in selected_cores}) > 1: log.warning( - f"Selected cores for pinning have differing max frequencies: {selected}" + f"Selected cores for pinning have differing max frequencies: {selected_cores}" ) - cores_list = ",".join([str(core) for core, _ in selected]) - return ["taskset", "-c", cores_list] + return ["taskset", "-c", ",".join(selected_cores)] class Suite(ABC): From fc66416beae343df8e71723c3a46fe467caff232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Wed, 29 Oct 2025 13:35:37 +0000 Subject: [PATCH 5/9] Bind benchmarks framework run to whole numa node Cores for running benchmark scenarios are selected in the framework's logic now. --- devops/actions/run-tests/benchmark/action.yml | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index b14bfb52af540..4eecc16968655 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -75,25 +75,13 @@ runs: python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset echo "PRESET=$PRESET" >> $GITHUB_ENV - - name: Compute CPU core range to run benchmarks on + - name: Set NUMA node to run benchmarks on shell: bash run: | - # Compute the core range for the first NUMA node; second node is used by - # UMF. Skip the first 3 cores as the kernel is likely to schedule more - # work on these. - CORES="$(lscpu | awk ' - /NUMA node0 CPU|On-line CPU/ {line=$0} - END { - split(line, a, " ") - split(a[4], b, ",") - sub(/^0/, "3", b[1]) - print b[1] - }')" - echo "CPU core range to use: $CORES" - echo "CORES=$CORES" >> $GITHUB_ENV - - ZE_AFFINITY_MASK=0 - echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + # Set CPU and GPU affinity for the first NUMA node; second node is used by UMF + NUMA_NODE=0 + echo "ZE_AFFINITY_MASK=$NUMA_NODE" >> $GITHUB_ENV + echo "NUMA_NODE=$NUMA_NODE" >> $GITHUB_ENV # Compute-benchmarks relies on UR static libraries, cmake config files, etc. # DPC++ doesn't ship with these files. The easiest way of obtaining these @@ -192,7 +180,8 @@ runs: sycl-ls echo "-----" - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + numactl --cpunodebind "$NUMA_NODE" --membind "$NUMA_NODE" \ + ./devops/scripts/benchmarks/main.py \ "$(realpath ./llvm_test_workdir)" \ --sycl "$(realpath ./toolchain)" \ --ur "$(realpath ./ur/install)" \ From 48d964fa644254e82a6b88b18797bca6f8262a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Wed, 29 Oct 2025 14:06:30 +0000 Subject: [PATCH 6/9] Add numactl to images for binding benchmarks --- devops/scripts/install_build_tools.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh index 1a1aa6dccda63..c7e097c356f1f 100755 --- a/devops/scripts/install_build_tools.sh +++ b/devops/scripts/install_build_tools.sh @@ -28,7 +28,8 @@ apt update && apt install -yqq \ libzstd-dev \ linux-tools-generic \ linux-tools-common \ - time + time \ + numactl # To obtain latest release of spriv-tool. # Same as what's done in SPRIV-LLVM-TRANSLATOR: From c9767f596725211b0b8e12b74e05d67ef1c8cfc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Wed, 5 Nov 2025 10:29:45 +0000 Subject: [PATCH 7/9] Remove check for same frequency CPU driver sometimes changes a bit the maximum frequency of selected cores. This, however, doesn't impact benchmark results greatly. Assumption of having first cores with highest frequency is enough. --- devops/scripts/benchmarks/benches/base.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 5707929b12898..aef661a85a3f5 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -278,20 +278,9 @@ def taskset_cmd(self) -> list[str]: Pin compute benchmarks to a CPU cores set to ensure consistent results and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores are pinned by default to satisfy multiple threads benchmarks. It is assumed - that they have the maximum, or at least the same, frequency. + that they have the maximum, or at least similar, frequency. """ - get_core_frequency = ( - lambda num: open( - f"/sys/devices/system/cpu/cpu{num}/cpufreq/cpuinfo_max_freq" - ) - .read() - .strip() - ) selected_cores = [str(core) for core in Process().cpu_affinity()[:4]] # type: ignore - if len({get_core_frequency(core) for core in selected_cores}) > 1: - log.warning( - f"Selected cores for pinning have differing max frequencies: {selected_cores}" - ) return ["taskset", "-c", ",".join(selected_cores)] From 127b51985d010199431ea4c96832c008903a0da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Tue, 4 Nov 2025 14:19:11 +0000 Subject: [PATCH 8/9] [TEST] Install numactl --- devops/actions/run-tests/benchmark/action.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 1ef32cbd086d9..937803d777969 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -44,6 +44,12 @@ runs: # composite actions don't make use of 'name', so copy-paste names as a comment in the first line of each step using: "composite" steps: + - name: Install numactl + shell: bash + run: | + # Install numactl for NUMA support + sudo apt-get update + sudo apt-get install -y numactl - name: Check specified runner type / target backend shell: bash env: From b09adca948bd7d3533a4d94cb56e8a7c5d42d66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Thu, 13 Nov 2025 13:06:37 +0000 Subject: [PATCH 9/9] Revert "[TEST] Install numactl" This reverts commit 127b51985d010199431ea4c96832c008903a0da7. --- devops/actions/run-tests/benchmark/action.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index d43d4d9cbf42c..bb6a161be6065 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -42,12 +42,6 @@ runs: # composite actions don't make use of 'name', so copy-paste names as a comment in the first line of each step using: "composite" steps: - - name: Install numactl - shell: bash - run: | - # Install numactl for NUMA support - sudo apt-get update - sudo apt-get install -y numactl - name: Check specified runner type / target backend shell: bash env: