diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 38214254595c6..7958f384c09f3 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1580,16 +1580,17 @@ class device_impl : public std::enable_shared_from_this { .value_or(0); } CASE(ext_oneapi_clock_sub_group) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck() + .value_or(0); } CASE(ext_oneapi_clock_work_group) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck< + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP>() + .value_or(0); } CASE(ext_oneapi_clock_device) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck() + .value_or(0); } else { return false; // This device aspect has not been implemented yet. diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index e1e724262b85f..11336b2ff4e8c 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -193,4 +193,7 @@ MAP(UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES, ur_kernel_launch_properties_flags_t) MAP(UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t) // clang-format on diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index f78714b4e06aa..b3874c8cb4490 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2429,6 +2429,15 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] returns true if the device supports enqueueing of /// allocations and frees. UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP = 0x2050, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the sub-group clock. + UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP = 0x2060, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the work-group clock. + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP = 0x2061, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the device clock. + UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP = 0x2062, /// [::ur_bool_t] Returns true if the device supports the USM P2P /// experimental feature. UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP = 0x4000, diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 15c50dd0eb479..62f653a640e08 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -3119,6 +3119,15 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: os << "UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP"; break; @@ -5257,6 +5266,45 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { diff --git a/unified-runtime/scripts/core/EXP-CLOCK.rst b/unified-runtime/scripts/core/EXP-CLOCK.rst new file mode 100644 index 0000000000000..327e6840f6920 --- /dev/null +++ b/unified-runtime/scripts/core/EXP-CLOCK.rst @@ -0,0 +1,60 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-clock: + +================================================================================ +Clock +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +This experimental extension enables the sycl_ext_oneapi_clock feature: +https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_clock.asciidoc +It introduces descriptors to query sub-group/work-group/device clock support. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP + * ${X}_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP + * ${X}_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from +the ${x}DeviceGetInfo call with new ${X}_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP, +${X}_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP or ${X}_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP +device descriptors. + +Contributors +-------------------------------------------------------------------------------- + +* Kornev, Nikita `nikita.kornev@intel.com `_ diff --git a/unified-runtime/scripts/core/exp-clock.yml b/unified-runtime/scripts/core/exp-clock.yml new file mode 100644 index 0000000000000..323c14135d1e5 --- /dev/null +++ b/unified-runtime/scripts/core/exp-clock.yml @@ -0,0 +1,31 @@ +# +# Copyright (C) 2025 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +# Exceptions. +# See LICENSE.TXT +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for Clock" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums for $x_device_info_t to support multi device compile." +name: $x_device_info_t +etors: + - name: CLOCK_SUB_GROUP_SUPPORT_EXP + value: "0x2060" + desc: "[$x_bool_t] returns true if the device supports sampling values from the sub-group clock." + - name: CLOCK_WORK_GROUP_SUPPORT_EXP + value: "0x2061" + desc: "[$x_bool_t] returns true if the device supports sampling values from the work-group clock." + - name: CLOCK_DEVICE_SUPPORT_EXP + value: "0x2062" + desc: "[$x_bool_t] returns true if the device supports sampling values from the device clock." diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index 4f697b05b5c88..eac2c9fe0bf2c 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1480,6 +1480,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(nodeMask); } + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: { + bool Supported = false; + size_t ExtSize = 0; + + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->CLDevice, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, + CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + if (ExtStr.find("cl_khr_kernel_clock") != std::string::npos) { + cl_device_kernel_clock_capabilities_khr caps = 0; + + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->CLDevice, CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR, + sizeof(cl_device_kernel_clock_capabilities_khr), &caps, nullptr)); + + if ((propName == UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR)) || + (propName == UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR)) || + (propName == UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR))) + Supported = true; + } + return ReturnValue(Supported); + } // TODO: We can't query to check if these are supported, they will need to be // manually updated if support is ever implemented. case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index b08661787cccf..3407c57f847d7 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -448,6 +448,14 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP); std::cout << prefix; printDeviceInfo(hDevice,