From aa1cc9eb272cdea3f2b36119ff287686518a994b Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Fri, 22 Aug 2025 17:30:29 +0200 Subject: [PATCH 01/17] [SYCL] Implement sycl_ext_oneapi_clock Spec: https://github.com/intel/llvm/pull/19842 --- .../sycl/ext/oneapi/experimental/clock.hpp | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/clock.hpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp new file mode 100644 index 0000000000000..50314fa72765c --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -0,0 +1,37 @@ +//==-------- clock.hpp --- SYCL extension for clock() free function --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class clock_scope : int { + // Aligned with SPIR-V Scope values + device = 1, + work_group = 2, + sub_group = 3 +}; + +uint64_t clock(clock_scope scope = clock_scope::sub_group) { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_ReadClockKHR(static_cast(scope)); +#else + throw sycl::exception( + make_error_code(errc::runtime), + "sycl::ext::oneapi::experimental::clock() is not supported on host."); +#endif // __SYCL_DEVICE_ONLY__ +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl From 3c58917c84b04ad1bce2c2b48810f09025b06901 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Mon, 25 Aug 2025 16:09:02 +0200 Subject: [PATCH 02/17] rest --- sycl/include/sycl/info/aspects.def | 1 + sycl/source/detail/device_impl.hpp | 3 +++ sycl/source/feature_test.hpp.in | 1 + sycl/test-e2e/Experimental/clock.cpp | 40 ++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 sycl/test-e2e/Experimental/clock.cpp diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 00816611233d2..845a65c25803c 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -80,4 +80,5 @@ __SYCL_ASPECT(ext_oneapi_async_memory_alloc, 87) __SYCL_ASPECT(ext_intel_device_info_luid, 88) __SYCL_ASPECT(ext_intel_device_info_node_mask, 89) __SYCL_ASPECT(ext_oneapi_exportable_device_mem, 90) +__SYCL_ASPECT(ext_oneapi_clock, 91) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index e46633d9fab45..8775180383812 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1579,6 +1579,9 @@ class device_impl : public std::enable_shared_from_this { UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>() .value_or(0); } + CASE(ext_oneapi_clock) { + return has_extension("cl_khr_kernel_clock"); + } else { return false; // This device aspect has not been implemented yet. } diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 5b5cab4e0fc48..86040d75db03a 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -120,6 +120,7 @@ inline namespace _V1 { #define SYCL_KHR_FREE_FUNCTION_COMMANDS 1 #define SYCL_KHR_QUEUE_EMPTY_QUERY 1 #define SYCL_EXT_ONEAPI_MEMORY_EXPORT 1 +#define SYCL_EXT_ONEAPI_CLOCK 1 // In progress yet #define SYCL_EXT_ONEAPI_ATOMIC16 0 #define SYCL_KHR_DEFAULT_CONTEXT 1 diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp new file mode 100644 index 0000000000000..11dd8a608a671 --- /dev/null +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -0,0 +1,40 @@ +// REQUIRES: ext_oneapi_clock + +#include +#include +#include + +int main() { + sycl::queue q; + uint64_t *data = sycl::malloc_shared(3, q); + + q.single_task([=]() { + uint64_t sg_clock_start = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::sub_group); + uint64_t wg_clock_start = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::work_group); + uint64_t dev_clock_start = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::device); + + int count = 0; + for (int i = 0; i < 1e6; ++i) + count++; + + uint64_t sg_clock_end = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::sub_group); + uint64_t wg_clock_end = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::work_group); + uint64_t dev_clock_end = sycl::ext::oneapi::experimental::clock( + sycl::ext::oneapi::experimental::clock_scope::device); + data[0] = sg_clock_end - sg_clock_start; + data[1] = wg_clock_end - wg_clock_start; + data[2] = dev_clock_end - dev_clock_start; + }); + q.wait(); + + assert(data[0] > 0); + assert(data[1] > 0); + assert(data[2] > 0); + + return 0; +} \ No newline at end of file From 7f4762d9d31137960a7854dd1ee429dee47dfe71 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Mon, 25 Aug 2025 17:07:44 +0200 Subject: [PATCH 03/17] format --- sycl/source/detail/device_impl.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8775180383812..81a62d49ac6df 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1579,9 +1579,7 @@ class device_impl : public std::enable_shared_from_this { UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>() .value_or(0); } - CASE(ext_oneapi_clock) { - return has_extension("cl_khr_kernel_clock"); - } + CASE(ext_oneapi_clock) { return has_extension("cl_khr_kernel_clock"); } else { return false; // This device aspect has not been implemented yet. } From 8dff32b2c43b38227b8dd71939bdadcc06496e98 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Tue, 26 Aug 2025 15:36:16 +0200 Subject: [PATCH 04/17] Update sycl/test-e2e/Experimental/clock.cpp --- sycl/test-e2e/Experimental/clock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index 11dd8a608a671..8ea010a3ea6b2 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -37,4 +37,4 @@ int main() { assert(data[2] > 0); return 0; -} \ No newline at end of file +} From 8332e5022ed27656f14f80e168bffdc9c79fd583 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Tue, 26 Aug 2025 17:54:33 +0200 Subject: [PATCH 05/17] throw if aspect it not supported --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 50314fa72765c..13723556a3991 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -22,6 +22,9 @@ enum class clock_scope : int { sub_group = 3 }; +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock)]] +#endif // __SYCL_DEVICE_ONLY__ uint64_t clock(clock_scope scope = clock_scope::sub_group) { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_ReadClockKHR(static_cast(scope)); From 7872221c649006fe30a9723268107acc7b1a5dc9 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Tue, 2 Sep 2025 11:43:32 +0200 Subject: [PATCH 06/17] apply suggestions --- sycl/include/sycl/sycl.hpp | 1 + sycl/test-e2e/Experimental/clock.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 56d8fceb34dc2..a09870dd77c30 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -113,6 +113,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index 8ea010a3ea6b2..c1b649487ad61 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ext_oneapi_clock +// REQUIRES: aspect-ext_oneapi_clock, aspect-usm_shared_allocations #include #include @@ -35,6 +35,7 @@ int main() { assert(data[0] > 0); assert(data[1] > 0); assert(data[2] > 0); + sycl::free(data, q); return 0; } From 9918c934fa35d7cfb8270c87cfd40f459710ef27 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 3 Sep 2025 13:10:13 +0200 Subject: [PATCH 07/17] fix some --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 2 +- sycl/test-e2e/Experimental/clock.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 13723556a3991..69b63cc21cbe9 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -25,7 +25,7 @@ enum class clock_scope : int { #ifdef __SYCL_DEVICE_ONLY__ [[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock)]] #endif // __SYCL_DEVICE_ONLY__ -uint64_t clock(clock_scope scope = clock_scope::sub_group) { +uint64_t clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_ReadClockKHR(static_cast(scope)); #else diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index c1b649487ad61..2d81153b44590 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,4 +1,6 @@ // REQUIRES: aspect-ext_oneapi_clock, aspect-usm_shared_allocations +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include #include From 1c2112247eac5a8ce29c83847b75e69617721044 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 3 Sep 2025 14:58:20 +0200 Subject: [PATCH 08/17] more fixes --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 69b63cc21cbe9..7c309e00e29b1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -25,9 +25,16 @@ enum class clock_scope : int { #ifdef __SYCL_DEVICE_ONLY__ [[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock)]] #endif // __SYCL_DEVICE_ONLY__ -uint64_t clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { +inline uint64_t +clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { #ifdef __SYCL_DEVICE_ONLY__ +#if defined(__SPIR__) || defined(__SPIRV__) return __spirv_ReadClockKHR(static_cast(scope)); +#else + throw sycl::exception(make_error_code(errc::feature_not_supported), + "sycl::ext::oneapi::experimental::clock() is currently " + "supported only on backends with SPIR-V support."); +#endif // defined(__SPIR__) || defined(__SPIRV__) #else throw sycl::exception( make_error_code(errc::runtime), From a565716eeecc8e8970162607e7d99fa488bd9085 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 3 Sep 2025 15:38:06 +0200 Subject: [PATCH 09/17] add missing include --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 7c309e00e29b1..984339de55716 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include namespace sycl { From af1aab3dc311aca8a153c95d6124869de3763b81 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 3 Sep 2025 16:05:53 +0200 Subject: [PATCH 10/17] more more fixes --- llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index dbfffb5d490eb..b03b35bff3f42 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -94,6 +94,7 @@ def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc" def AspectExt_intel_device_info_luid : Aspect<"ext_intel_device_info_luid">; def AspectExt_intel_device_info_node_mask : Aspect<"ext_intel_device_info_node_mask">; def Aspectext_oneapi_exportable_device_mem : Aspect<"ext_oneapi_exportable_device_mem">; +def Aspectext_oneapi_clock : Aspect<"ext_oneapi_clock">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -168,7 +169,8 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_async_memory_alloc, AspectExt_intel_device_info_luid, AspectExt_intel_device_info_node_mask, - Aspectext_oneapi_exportable_device_mem], + Aspectext_oneapi_exportable_device_mem, + Aspectext_oneapi_clock], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. From 4c75261fee0306ba6e94598da06b92812ef5c83d Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Thu, 4 Sep 2025 14:11:50 +0200 Subject: [PATCH 11/17] dummy for non-spirv --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 984339de55716..4896bb50f1d82 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -29,13 +29,12 @@ enum class clock_scope : int { inline uint64_t clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { #ifdef __SYCL_DEVICE_ONLY__ -#if defined(__SPIR__) || defined(__SPIRV__) - return __spirv_ReadClockKHR(static_cast(scope)); +#if defined(__NVPTX__) || defined(__AMDGCN__) + // Currently clock() is not supported on NVPTX and AMDGCN. + return 0; #else - throw sycl::exception(make_error_code(errc::feature_not_supported), - "sycl::ext::oneapi::experimental::clock() is currently " - "supported only on backends with SPIR-V support."); -#endif // defined(__SPIR__) || defined(__SPIRV__) + return __spirv_ReadClockKHR(static_cast(scope)); +#endif // defined(__NVPTX__) || defined(__AMDGCN__) #else throw sycl::exception( make_error_code(errc::runtime), From f63caae053faa738acc4fb359b02bc7087e7017d Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Thu, 4 Sep 2025 18:23:31 +0200 Subject: [PATCH 12/17] aligh with new spec changes --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 8 ++- .../sycl/ext/oneapi/experimental/clock.hpp | 44 ++++++++++++--- sycl/include/sycl/info/aspects.def | 5 +- sycl/source/detail/device_impl.hpp | 23 +++++++- sycl/test-e2e/Experimental/clock.cpp | 54 +++++++++---------- 5 files changed, 93 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index b03b35bff3f42..b4f7c71af7d9f 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -94,7 +94,9 @@ def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc" def AspectExt_intel_device_info_luid : Aspect<"ext_intel_device_info_luid">; def AspectExt_intel_device_info_node_mask : Aspect<"ext_intel_device_info_node_mask">; def Aspectext_oneapi_exportable_device_mem : Aspect<"ext_oneapi_exportable_device_mem">; -def Aspectext_oneapi_clock : Aspect<"ext_oneapi_clock">; +def Aspectext_oneapi_clock_sub_group : Aspect<"ext_oneapi_clock_sub_group">; +def Aspectext_oneapi_clock_work_group : Aspect<"ext_oneapi_clock_work_group">; +def Aspectext_oneapi_clock_device : Aspect<"ext_oneapi_clock_device">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -170,7 +172,9 @@ def : TargetInfo<"__TestAspectList", AspectExt_intel_device_info_luid, AspectExt_intel_device_info_node_mask, Aspectext_oneapi_exportable_device_mem, - Aspectext_oneapi_clock], + Aspectext_oneapi_clock_sub_group, + Aspectext_oneapi_clock_work_group, + Aspectext_oneapi_clock_device], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 4896bb50f1d82..af1882f6b8c6c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -17,17 +17,14 @@ inline namespace _V1 { namespace ext::oneapi::experimental { enum class clock_scope : int { - // Aligned with SPIR-V Scope values + // Aligned with SPIR-V Scope values. device = 1, work_group = 2, sub_group = 3 }; -#ifdef __SYCL_DEVICE_ONLY__ -[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock)]] -#endif // __SYCL_DEVICE_ONLY__ -inline uint64_t -clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { +namespace detail { +inline uint64_t clock_impl(clock_scope scope) { #ifdef __SYCL_DEVICE_ONLY__ #if defined(__NVPTX__) || defined(__AMDGCN__) // Currently clock() is not supported on NVPTX and AMDGCN. @@ -37,10 +34,41 @@ clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) { #endif // defined(__NVPTX__) || defined(__AMDGCN__) #else throw sycl::exception( - make_error_code(errc::runtime), - "sycl::ext::oneapi::experimental::clock() is not supported on host."); + make_error_code(errc::runtime), + "sycl::ext::oneapi::experimental::clock() is not supported on host."); #endif // __SYCL_DEVICE_ONLY__ } +} // namespace detail + +template +inline uint64_t clock(); + +// Specialization for device. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_device)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(clock_scope::device); +} + +// Specialization for work-group. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_work_group)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(clock_scope::work_group); +} + +// Specialization for sub-group. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_sub_group)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(clock_scope::sub_group); +} } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 845a65c25803c..d3e97a47a0248 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -80,5 +80,6 @@ __SYCL_ASPECT(ext_oneapi_async_memory_alloc, 87) __SYCL_ASPECT(ext_intel_device_info_luid, 88) __SYCL_ASPECT(ext_intel_device_info_node_mask, 89) __SYCL_ASPECT(ext_oneapi_exportable_device_mem, 90) -__SYCL_ASPECT(ext_oneapi_clock, 91) - +__SYCL_ASPECT(ext_oneapi_clock_sub_group, 91) +__SYCL_ASPECT(ext_oneapi_clock_work_group, 92) +__SYCL_ASPECT(ext_oneapi_clock_device, 93) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 81a62d49ac6df..7e01b1b1533fb 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -1579,7 +1580,27 @@ class device_impl : public std::enable_shared_from_this { UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>() .value_or(0); } - CASE(ext_oneapi_clock) { return has_extension("cl_khr_kernel_clock"); } + else if constexpr (Aspect == aspect::ext_oneapi_clock_sub_group || + Aspect == aspect::ext_oneapi_clock_work_group || + Aspect == aspect::ext_oneapi_clock_device) { + detail::adapter_impl &Adapter = getAdapter(); + uint32_t ipVersion = 0; + auto res = Adapter.call_nocheck( + getHandleRef(), UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), + &ipVersion, nullptr); + if (res != UR_RESULT_SUCCESS) + return false; + std::string Feature; + if (Aspect == aspect::ext_oneapi_clock_sub_group) { + Feature = "__opencl_c_kernel_clock_scope_sub_group"; + } else if (Aspect == aspect::ext_oneapi_clock_work_group) { + Feature = "__opencl_c_kernel_clock_scope_work_group"; + } else if (Aspect == aspect::ext_oneapi_clock_device) { + Feature = "__opencl_c_kernel_clock_scope_device"; + } + return ext::oneapi::experimental::detail::OpenCLC_Feature_Available( + std::string(Feature), ipVersion); + } else { return false; // This device aspect has not been implemented yet. } diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index 2d81153b44590..e6a1abe420376 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,4 +1,4 @@ -// REQUIRES: aspect-ext_oneapi_clock, aspect-usm_shared_allocations +// REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -6,38 +6,36 @@ #include #include -int main() { +namespace syclex = sycl::ext::oneapi::experimental; + +template void test() { sycl::queue q; - uint64_t *data = sycl::malloc_shared(3, q); - - q.single_task([=]() { - uint64_t sg_clock_start = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::sub_group); - uint64_t wg_clock_start = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::work_group); - uint64_t dev_clock_start = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::device); - - int count = 0; - for (int i = 0; i < 1e6; ++i) - count++; - - uint64_t sg_clock_end = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::sub_group); - uint64_t wg_clock_end = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::work_group); - uint64_t dev_clock_end = sycl::ext::oneapi::experimental::clock( - sycl::ext::oneapi::experimental::clock_scope::device); - data[0] = sg_clock_end - sg_clock_start; - data[1] = wg_clock_end - wg_clock_start; - data[2] = dev_clock_end - dev_clock_start; + if (!q.get_device().has(aspect)) + return; + + uint64_t *data = sycl::malloc_shared(2, q); + + q.parallel_for(2, [=](sycl::id<1> idx) { + if (idx == 0) { + data[0] = syclex::clock(); + int count = 0; + for (int i = 0; i < 1e6; ++i) + count++; + data[1] = syclex::clock(); + } }); q.wait(); - assert(data[0] > 0); - assert(data[1] > 0); - assert(data[2] > 0); + assert(data[1] > data[0]); sycl::free(data, q); +} + +int main() { + test(); + test(); + test(); return 0; } From 1ce9c3d468be2ea10f3c12a506be58f2b51d87a2 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Fri, 5 Sep 2025 15:49:41 +0200 Subject: [PATCH 13/17] format --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index af1882f6b8c6c..b15824a3c6ed1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -34,14 +34,13 @@ inline uint64_t clock_impl(clock_scope scope) { #endif // defined(__NVPTX__) || defined(__AMDGCN__) #else throw sycl::exception( - make_error_code(errc::runtime), - "sycl::ext::oneapi::experimental::clock() is not supported on host."); + make_error_code(errc::runtime), + "sycl::ext::oneapi::experimental::clock() is not supported on host."); #endif // __SYCL_DEVICE_ONLY__ } } // namespace detail -template -inline uint64_t clock(); +template inline uint64_t clock(); // Specialization for device. template <> From 4e39cdcdd5e4ddedd7ec98d0fdbd488efd01ca60 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Fri, 5 Sep 2025 17:15:59 +0200 Subject: [PATCH 14/17] unused parameter --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index b15824a3c6ed1..547680b22cfe1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -24,7 +24,7 @@ enum class clock_scope : int { }; namespace detail { -inline uint64_t clock_impl(clock_scope scope) { +inline uint64_t clock_impl([[maybe_unused]] clock_scope scope) { #ifdef __SYCL_DEVICE_ONLY__ #if defined(__NVPTX__) || defined(__AMDGCN__) // Currently clock() is not supported on NVPTX and AMDGCN. From c6903806f5522d2fff26187a2dae07a664f8d322 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Tue, 9 Sep 2025 14:37:07 +0200 Subject: [PATCH 15/17] dummy UR part --- sycl/source/detail/device_impl.hpp | 31 +++++++++++------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 7e01b1b1533fb..7369418fd36b5 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1580,26 +1580,17 @@ class device_impl : public std::enable_shared_from_this { UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>() .value_or(0); } - else if constexpr (Aspect == aspect::ext_oneapi_clock_sub_group || - Aspect == aspect::ext_oneapi_clock_work_group || - Aspect == aspect::ext_oneapi_clock_device) { - detail::adapter_impl &Adapter = getAdapter(); - uint32_t ipVersion = 0; - auto res = Adapter.call_nocheck( - getHandleRef(), UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - if (res != UR_RESULT_SUCCESS) - return false; - std::string Feature; - if (Aspect == aspect::ext_oneapi_clock_sub_group) { - Feature = "__opencl_c_kernel_clock_scope_sub_group"; - } else if (Aspect == aspect::ext_oneapi_clock_work_group) { - Feature = "__opencl_c_kernel_clock_scope_work_group"; - } else if (Aspect == aspect::ext_oneapi_clock_device) { - Feature = "__opencl_c_kernel_clock_scope_device"; - } - return ext::oneapi::experimental::detail::OpenCLC_Feature_Available( - std::string(Feature), ipVersion); + CASE(ext_oneapi_clock_sub_group) { + // Will be updated in a follow-up UR patch. + return false; + } + CASE(ext_oneapi_clock_work_group) { + // Will be updated in a follow-up UR patch. + return false; + } + CASE(ext_oneapi_clock_device) { + // Will be updated in a follow-up UR patch. + return false; } else { return false; // This device aspect has not been implemented yet. From 2976ff7eb39d283f2a3954dc67247e6a1bd3c1ba Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Tue, 9 Sep 2025 14:42:15 +0200 Subject: [PATCH 16/17] remove unnecessary include --- sycl/source/detail/device_impl.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 7369418fd36b5..38214254595c6 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include #include #include #include From 6019f708ba42d7d3df059211715b819309763d7c Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Tue, 9 Sep 2025 17:06:28 +0200 Subject: [PATCH 17/17] apply suggestions --- sycl/include/sycl/ext/oneapi/experimental/clock.hpp | 10 +++++----- sycl/test-e2e/Experimental/clock.cpp | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index 547680b22cfe1..f0cf05b2b3bd9 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -24,13 +24,13 @@ enum class clock_scope : int { }; namespace detail { -inline uint64_t clock_impl([[maybe_unused]] clock_scope scope) { +template inline uint64_t clock_impl() { #ifdef __SYCL_DEVICE_ONLY__ #if defined(__NVPTX__) || defined(__AMDGCN__) // Currently clock() is not supported on NVPTX and AMDGCN. return 0; #else - return __spirv_ReadClockKHR(static_cast(scope)); + return __spirv_ReadClockKHR(static_cast(Scope)); #endif // defined(__NVPTX__) || defined(__AMDGCN__) #else throw sycl::exception( @@ -48,7 +48,7 @@ template <> [[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_device)]] #endif inline uint64_t clock() { - return detail::clock_impl(clock_scope::device); + return detail::clock_impl(); } // Specialization for work-group. @@ -57,7 +57,7 @@ template <> [[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_work_group)]] #endif inline uint64_t clock() { - return detail::clock_impl(clock_scope::work_group); + return detail::clock_impl(); } // Specialization for sub-group. @@ -66,7 +66,7 @@ template <> [[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_sub_group)]] #endif inline uint64_t clock() { - return detail::clock_impl(clock_scope::sub_group); + return detail::clock_impl(); } } // namespace ext::oneapi::experimental diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index e6a1abe420376..604900d87294a 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,4 +1,5 @@ // REQUIRES: aspect-usm_shared_allocations +// REQUIRES: aspect-ext_oneapi_clock_sub_group || aspect-ext_oneapi_clock_work_group || aspect-ext_oneapi_clock_device // RUN: %{build} -o %t.out // RUN: %{run} %t.out