From 21fa7fb1569e02b253b37d1b8e7c6c15eed49501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Mon, 20 Oct 2025 18:08:58 +0000
Subject: [PATCH 1/9] [Benchmarks] Pin benchmarks to small set of cores

For better results stability, pin benchmark binaries to four cores with the maximum available frequency.
---
 devops/actions/run-tests/benchmark/action.yml |  4 ++--
 devops/scripts/benchmarks/benches/compute.py  | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 6c070f83470c2..b14bfb52af540 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -79,14 +79,14 @@ runs:
     shell: bash
     run: |
       # Compute the core range for the first NUMA node; second node is used by
-      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # UMF. Skip the first 3 cores as the kernel is likely to schedule more
       # work on these.
       CORES="$(lscpu | awk '
         /NUMA node0 CPU|On-line CPU/ {line=$0}
         END {
           split(line, a, " ")
           split(a[4], b, ",")
-          sub(/^0/, "4", b[1])
+          sub(/^0/, "3", b[1])
           print b[1]
         }')"
       echo "CPU core range to use: $CORES"
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 6a2aea460ddb9..fabf422130ee7 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -10,6 +10,7 @@
 from enum import Enum
 from itertools import product
 from pathlib import Path
+from psutil import Process
 
 from git_project import GitProject
 from options import options
@@ -417,6 +418,24 @@ def run(
         command += self.bin_args(run_trace)
         env_vars.update(self.extra_env_vars())
 
+        # Pin compute benchmarks to a CPU cores set to ensure consistent results
+        # and non-zero CPU count measurements (e.g. avoid E-cores). 4 max freq cores
+        # are pinned by default to satisfy multiple threads benchmarks.
+        available_cores = Process().cpu_affinity()
+        # Get 4 cores with the highest available frequency.
+        core_frequencies = []
+        for core in available_cores:
+            with open(
+                f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq"
+            ) as f:
+                freq = int(f.read().strip())
+                core_frequencies.append((core, freq))
+        core_frequencies.sort(key=lambda x: x[1], reverse=True)
+        available_cores = [core for core, _ in core_frequencies[:4]]
+        cores_list = ",".join([str(core) for core in available_cores])
+
+        command = ["taskset", "-c", cores_list] + command
+
         result = self.run_bench(
             command, env_vars, run_trace=run_trace, force_trace=force_trace
         )

From 2b495faac02c5384e1db3b2d9be396b4579504e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Wed, 22 Oct 2025 10:32:37 +0000
Subject: [PATCH 2/9] Pin benchmarks to 4 cores in all suites

---
 devops/scripts/benchmarks/benches/base.py    | 33 +++++++++++++++++---
 devops/scripts/benchmarks/benches/compute.py | 20 ------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index bbbedaf629bf5..d6554942182e8 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,15 +6,18 @@
 import os
 import shutil
 import subprocess
-from pathlib import Path
+from abc import ABC, abstractmethod
 from enum import Enum
-from utils.result import BenchmarkMetadata, BenchmarkTag, Result
+from pathlib import Path
+
+from psutil import Process
+
 from options import options
-from utils.utils import download, run
-from abc import ABC, abstractmethod
-from utils.unitrace import get_unitrace
 from utils.flamegraph import get_flamegraph
 from utils.logger import log
+from utils.result import BenchmarkMetadata, BenchmarkTag, Result
+from utils.unitrace import get_unitrace
+from utils.utils import download, run
 
 
 class TracingType(Enum):
@@ -167,6 +170,8 @@ def run_bench(
             log.debug(f"FlameGraph perf data: {perf_data_file}")
             log.debug(f"FlameGraph command: {' '.join(command)}")
 
+        command = self.taskset_cmd() + command
+
         try:
             result = run(
                 command=command,
@@ -268,6 +273,24 @@ def get_metadata(self) -> dict[str, BenchmarkMetadata]:
             )
         }
 
+    def taskset_cmd(self) -> list[str]:
+        """Returns a list of strings with taskset usage for core pinning.
+        Pin compute benchmarks to a CPU cores set to ensure consistent results
+        and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores
+        with the maximum frequency are pinned by default to satisfy multiple threads benchmarks.
+        """
+        available_cores = Process().cpu_affinity()
+        core_frequencies = []
+        for core in available_cores:  # type: ignore
+            with open(
+                f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq"
+            ) as f:
+                freq = int(f.read().strip())
+                core_frequencies.append((core, freq))
+        core_frequencies.sort(key=lambda x: x[1], reverse=True)
+        cores_list = ",".join([str(core) for core, _ in core_frequencies[:4]])
+        return ["taskset", "-c", cores_list]
+
 
 class Suite(ABC):
     @abstractmethod
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index fabf422130ee7..2bb08402fd533 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -3,14 +3,12 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import copy
 import csv
 import io
 import math
 from enum import Enum
 from itertools import product
 from pathlib import Path
-from psutil import Process
 
 from git_project import GitProject
 from options import options
@@ -418,24 +416,6 @@ def run(
         command += self.bin_args(run_trace)
         env_vars.update(self.extra_env_vars())
 
-        # Pin compute benchmarks to a CPU cores set to ensure consistent results
-        # and non-zero CPU count measurements (e.g. avoid E-cores). 4 max freq cores
-        # are pinned by default to satisfy multiple threads benchmarks.
-        available_cores = Process().cpu_affinity()
-        # Get 4 cores with the highest available frequency.
-        core_frequencies = []
-        for core in available_cores:
-            with open(
-                f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq"
-            ) as f:
-                freq = int(f.read().strip())
-                core_frequencies.append((core, freq))
-        core_frequencies.sort(key=lambda x: x[1], reverse=True)
-        available_cores = [core for core, _ in core_frequencies[:4]]
-        cores_list = ",".join([str(core) for core in available_cores])
-
-        command = ["taskset", "-c", cores_list] + command
-
         result = self.run_bench(
             command, env_vars, run_trace=run_trace, force_trace=force_trace
         )

From 670adfcfd797c5a6f86ba0408e514ff137dd23f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Wed, 22 Oct 2025 15:50:57 +0000
Subject: [PATCH 3/9] Review updates

---
 devops/scripts/benchmarks/benches/base.py  | 8 ++++++--
 devops/scripts/benchmarks/requirements.txt | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index d6554942182e8..ec85b0562a53f 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -287,8 +287,12 @@ def taskset_cmd(self) -> list[str]:
             ) as f:
                 freq = int(f.read().strip())
                 core_frequencies.append((core, freq))
-        core_frequencies.sort(key=lambda x: x[1], reverse=True)
-        cores_list = ",".join([str(core) for core, _ in core_frequencies[:4]])
+        selected = core_frequencies[:4]  # first ones have highest frequency
+        if len({freq for _, freq in selected}) > 1:
+            log.warning(
+                f"Selected cores for pinning have differing max frequencies: {selected}"
+            )
+        cores_list = ",".join([str(core) for core, _ in selected])
         return ["taskset", "-c", cores_list]
 
 
diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
index 9283c797eaf47..d9c7264a92c6e 100644
--- a/devops/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -3,3 +3,4 @@ mpld3==0.5.10
 dataclasses-json==0.6.7
 PyYAML==6.0.1
 Mako==1.3.0
+psutil>=7.0.0

From a0aacd5c24ab6e17a02761b4211424f820bdee06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Fri, 24 Oct 2025 10:23:27 +0000
Subject: [PATCH 4/9] taskset_cmd refactor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Łukasz Ślusarczyk <lukasz.slusarczyk@intel.com>
---
 devops/scripts/benchmarks/benches/base.py | 27 +++++++++++------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index ec85b0562a53f..5707929b12898 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -277,23 +277,22 @@ def taskset_cmd(self) -> list[str]:
         """Returns a list of strings with taskset usage for core pinning.
         Pin compute benchmarks to a CPU cores set to ensure consistent results
         and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores
-        with the maximum frequency are pinned by default to satisfy multiple threads benchmarks.
+        are pinned by default to satisfy multiple threads benchmarks. It is assumed
+        that they have the maximum, or at least the same, frequency.
         """
-        available_cores = Process().cpu_affinity()
-        core_frequencies = []
-        for core in available_cores:  # type: ignore
-            with open(
-                f"/sys/devices/system/cpu/cpu{core}/cpufreq/cpuinfo_max_freq"
-            ) as f:
-                freq = int(f.read().strip())
-                core_frequencies.append((core, freq))
-        selected = core_frequencies[:4]  # first ones have highest frequency
-        if len({freq for _, freq in selected}) > 1:
+        get_core_frequency = (
+            lambda num: open(
+                f"/sys/devices/system/cpu/cpu{num}/cpufreq/cpuinfo_max_freq"
+            )
+            .read()
+            .strip()
+        )
+        selected_cores = [str(core) for core in Process().cpu_affinity()[:4]]  # type: ignore
+        if len({get_core_frequency(core) for core in selected_cores}) > 1:
             log.warning(
-                f"Selected cores for pinning have differing max frequencies: {selected}"
+                f"Selected cores for pinning have differing max frequencies: {selected_cores}"
             )
-        cores_list = ",".join([str(core) for core, _ in selected])
-        return ["taskset", "-c", cores_list]
+        return ["taskset", "-c", ",".join(selected_cores)]
 
 
 class Suite(ABC):

From fc66416beae343df8e71723c3a46fe467caff232 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Wed, 29 Oct 2025 13:35:37 +0000
Subject: [PATCH 5/9] Bind benchmarks framework run to whole numa node

Cores for running benchmark scenarios are selected in the framework's logic now.
---
 devops/actions/run-tests/benchmark/action.yml | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index b14bfb52af540..4eecc16968655 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -75,25 +75,13 @@ runs:
       python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
       [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
       echo "PRESET=$PRESET" >> $GITHUB_ENV
-  - name: Compute CPU core range to run benchmarks on
+  - name: Set NUMA node to run benchmarks on
     shell: bash
     run: |
-      # Compute the core range for the first NUMA node; second node is used by
-      # UMF. Skip the first 3 cores as the kernel is likely to schedule more
-      # work on these.
-      CORES="$(lscpu | awk '
-        /NUMA node0 CPU|On-line CPU/ {line=$0}
-        END {
-          split(line, a, " ")
-          split(a[4], b, ",")
-          sub(/^0/, "3", b[1])
-          print b[1]
-        }')"
-      echo "CPU core range to use: $CORES"
-      echo "CORES=$CORES" >> $GITHUB_ENV
-
-      ZE_AFFINITY_MASK=0
-      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+      # Set CPU and GPU affinity for the first NUMA node; second node is used by UMF
+      NUMA_NODE=0
+      echo "ZE_AFFINITY_MASK=$NUMA_NODE" >> $GITHUB_ENV
+      echo "NUMA_NODE=$NUMA_NODE" >> $GITHUB_ENV
 
   # Compute-benchmarks relies on UR static libraries, cmake config files, etc.
   # DPC++ doesn't ship with these files. The easiest way of obtaining these
@@ -192,7 +180,8 @@ runs:
       sycl-ls
       echo "-----"
 
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+      numactl --cpunodebind "$NUMA_NODE" --membind "$NUMA_NODE" \
+      ./devops/scripts/benchmarks/main.py \
         "$(realpath ./llvm_test_workdir)" \
         --sycl "$(realpath ./toolchain)" \
         --ur "$(realpath ./ur/install)" \

From 48d964fa644254e82a6b88b18797bca6f8262a9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Wed, 29 Oct 2025 14:06:30 +0000
Subject: [PATCH 6/9] Add numactl to images for binding benchmarks

---
 devops/scripts/install_build_tools.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh
index 1a1aa6dccda63..c7e097c356f1f 100755
--- a/devops/scripts/install_build_tools.sh
+++ b/devops/scripts/install_build_tools.sh
@@ -28,7 +28,8 @@ apt update && apt install -yqq \
       libzstd-dev \
       linux-tools-generic \
       linux-tools-common \
-      time 
+      time \
+      numactl
 
 # To obtain latest release of spriv-tool.
 # Same as what's done in SPRIV-LLVM-TRANSLATOR:

From c9767f596725211b0b8e12b74e05d67ef1c8cfc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Wed, 5 Nov 2025 10:29:45 +0000
Subject: [PATCH 7/9] Remove check for same frequency

CPU driver sometimes changes a bit the maximum frequency of selected cores. This, however, doesn't impact benchmark results greatly. Assumption of having first cores with highest frequency is enough.
---
 devops/scripts/benchmarks/benches/base.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 5707929b12898..aef661a85a3f5 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -278,20 +278,9 @@ def taskset_cmd(self) -> list[str]:
         Pin compute benchmarks to a CPU cores set to ensure consistent results
         and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores
         are pinned by default to satisfy multiple threads benchmarks. It is assumed
-        that they have the maximum, or at least the same, frequency.
+        that they have the maximum, or at least similar, frequency.
         """
-        get_core_frequency = (
-            lambda num: open(
-                f"/sys/devices/system/cpu/cpu{num}/cpufreq/cpuinfo_max_freq"
-            )
-            .read()
-            .strip()
-        )
         selected_cores = [str(core) for core in Process().cpu_affinity()[:4]]  # type: ignore
-        if len({get_core_frequency(core) for core in selected_cores}) > 1:
-            log.warning(
-                f"Selected cores for pinning have differing max frequencies: {selected_cores}"
-            )
         return ["taskset", "-c", ",".join(selected_cores)]
 
 

From 127b51985d010199431ea4c96832c008903a0da7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Tue, 4 Nov 2025 14:19:11 +0000
Subject: [PATCH 8/9] [TEST] Install numactl

---
 devops/actions/run-tests/benchmark/action.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 1ef32cbd086d9..937803d777969 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -44,6 +44,12 @@ runs:
   # composite actions don't make use of 'name', so copy-paste names as a comment in the first line of each step
   using: "composite"
   steps:
+  - name: Install numactl
+    shell: bash
+    run: |
+      # Install numactl for NUMA support
+      sudo apt-get update
+      sudo apt-get install -y numactl
   - name: Check specified runner type / target backend
     shell: bash
     env:

From b09adca948bd7d3533a4d94cb56e8a7c5d42d66c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Thu, 13 Nov 2025 13:06:37 +0000
Subject: [PATCH 9/9] Revert "[TEST] Install numactl"

This reverts commit 127b51985d010199431ea4c96832c008903a0da7.
---
 devops/actions/run-tests/benchmark/action.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index d43d4d9cbf42c..bb6a161be6065 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -42,12 +42,6 @@ runs:
   # composite actions don't make use of 'name', so copy-paste names as a comment in the first line of each step
   using: "composite"
   steps:
-  - name: Install numactl
-    shell: bash
-    run: |
-      # Install numactl for NUMA support
-      sudo apt-get update
-      sudo apt-get install -y numactl
   - name: Check specified runner type / target backend
     shell: bash
     env: