From b45075f21051f055b72f8ac3140d2d200efc116f Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 31 Oct 2025 18:19:29 +0100 Subject: [PATCH 1/2] [SYCL] Extend no-handler submission path to support kernel properties. --- .../oneapi/experimental/enqueue_functions.hpp | 49 ++++---- .../sycl/khr/free_function_commands.hpp | 55 ++++----- sycl/include/sycl/queue.hpp | 106 +++++++++++------- sycl/source/detail/queue_impl.cpp | 6 + sycl/source/detail/queue_impl.hpp | 7 +- sycl/source/queue.cpp | 12 +- sycl/test/abi/sycl_symbols_linux.dump | 12 +- 7 files changed, 139 insertions(+), 108 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 8c8488a99e354..7dfa19cd41851 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -152,15 +152,12 @@ template void single_task(queue Q, const KernelType &KernelObj, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT::value)) { detail::submit_kernel_direct_single_task( - std::move(Q), empty_properties_t{}, KernelObj, CodeLoc); + std::move(Q), KernelObj, empty_properties_t{}, CodeLoc); } else { submit( std::move(Q), @@ -271,17 +268,13 @@ template void nd_launch(queue Q, nd_range Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(ReductionsT) == 0 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { - detail::submit_kernel_direct_parallel_for( - std::move(Q), empty_properties_t{}, Range, KernelObj); + detail::submit_kernel_direct_parallel_for(std::move(Q), Range, + KernelObj); } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Range, KernelObj, @@ -308,13 +301,25 @@ template void nd_launch(queue Q, launch_config, Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO This overload of the nd_launch function takes the kernel function - // properties, which are not yet supported for the handler-less path, - // so it only supports handler based submission for now - submit(std::move(Q), [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, - std::forward(Reductions)...); - }); + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(ReductionsT) == 0 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + ext::oneapi::experimental::detail::LaunchConfigAccess, + Properties> + LaunchConfigAccess(Config); + + detail::submit_kernel_direct_parallel_for( + std::move(Q), LaunchConfigAccess.getRange(), KernelObj, + LaunchConfigAccess.getProperties()); + } else { + submit(std::move(Q), [&](handler &CGH) { + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); + }); + } } template diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index 68dd159bf8211..2165076a83229 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -157,16 +157,12 @@ template r, range<1> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<1>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<1>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<1>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -178,16 +174,12 @@ template r, range<2> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<2>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<2>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<2>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -199,16 +191,12 @@ template r, range<3> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<3>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<3>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<3>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -324,16 +312,13 @@ template ::value) && - !(detail::KernelLambdaHasKernelHandlerArgT::value)) { detail::submit_kernel_direct_single_task( - q, ext::oneapi::experimental::empty_properties_t{}, - std::forward(k), codeLoc); + q, std::forward(k), + ext::oneapi::experimental::empty_properties_t{}, codeLoc); } else { submit(q, [&](handler &h) { launch_task(h, k); }, codeLoc); } diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 4a7f1fac789a3..f36f4da661554 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -68,6 +68,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -75,6 +76,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); namespace detail { @@ -159,16 +161,14 @@ class __SYCL_EXPORT SubmissionInfo { template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct( - const queue &Queue, [[maybe_unused]] PropertiesT Props, - const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + const queue &Queue, const nd_range &Range, + KernelTypeUniversalRef &&KernelFunc, + const PropertiesT &ExtraProps = + ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { - // TODO Properties not supported yet - static_assert( - std::is_same_v, - "Setting properties not supported yet for no-CGH kernel submit."); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = @@ -210,22 +210,42 @@ auto submit_kernel_direct( "-fsycl-host-compiler-options='/std:c++latest' " "might also help."); + detail::KernelPropertyHolderStructTy ParsedProperties; + if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< + const KernelType &>::value) { + // Merge properties via get() and manually specified properties. + // get() method is used for specifying kernel properties but properties + // passed via launch_config (ExtraProps) should be kernel launch properties. + // They are mutually exclusive, so there should not be any conflict when + // merging properties. merge_properties() throws if there's a conflict. + auto MergedProps = + sycl::ext::oneapi::experimental::detail::merge_properties( + ExtraProps, + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + + ParsedProperties = extractKernelProperties(MergedProps); + } else { + ParsedProperties = extractKernelProperties(ExtraProps); + } + if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } } template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct_parallel_for( - const queue &Queue, PropertiesT Props, const nd_range &Range, + const queue &Queue, const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { using KernelType = @@ -246,21 +266,23 @@ auto submit_kernel_direct_parallel_for( return submit_kernel_direct( - Queue, Props, Range, std::forward(KernelFunc), + Queue, Range, std::forward(KernelFunc), Props, CodeLoc); } template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef> auto submit_kernel_direct_single_task( - const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc, + const queue &Queue, KernelTypeUniversalRef &&KernelFunc, + const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { return submit_kernel_direct( - Queue, Props, nd_range<1>{1, 1}, - std::forward(KernelFunc), CodeLoc); + Queue, nd_range<1>{1, 1}, + std::forward(KernelFunc), Props, CodeLoc); } } // namespace detail @@ -2775,18 +2797,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - // TODO The handler-less path does not support kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. - if constexpr ( - std::is_same_v && - !(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT::value)) { + // TODO The handler-less path does not support kernel functions + // with the kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { return detail::submit_kernel_direct_single_task( - *this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc, - TlsCodeLocCapture.query()); + *this, KernelFunc, Properties, TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { @@ -3323,11 +3339,22 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., Properties, TlsCodeLocCapture.query()); + } else + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -3344,18 +3371,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = std::tuple_element_t<0, std::tuple>; - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(RestT) == 1 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, ext::oneapi::experimental::empty_properties_t{}, Range, - Rest..., TlsCodeLocCapture.query()); + *this, Range, Rest..., + ext::oneapi::experimental::empty_properties_t{}, + TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d86f6a5c6aac2..dd194ff78d593 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -567,6 +567,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; @@ -574,6 +575,11 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( KData.setDeviceKernelInfoPtr(DeviceKernelInfo); KData.setNDRDesc(NDRDesc); + // Validate and set kernel launch properties. + KData.validateAndSetKernelLaunchProperties( + Props, getCommandGraph() != nullptr /*HasGraph?*/, + getDeviceImpl() /*device_impl*/); + auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, bool SchedulerBypass) -> EventImplPtr { if (SchedulerBypass) { diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 49da7aee8c448..031b0a01f56bc 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -363,10 +363,11 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - true, CodeLoc, IsTopCodeLoc); + true, Props, CodeLoc, IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } @@ -374,9 +375,10 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - false, CodeLoc, IsTopCodeLoc); + false, Props, CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, @@ -929,6 +931,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f34da47852266..7fe5649aecc2a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,27 +476,31 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -504,27 +508,31 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); } // namespace _V1 diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index bfb11f8c79fe8..af3c4d64d194c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,12 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv From 106dacb43ff21200ad5f9e61bb809e654c508b1d Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 3 Nov 2025 17:56:12 +0100 Subject: [PATCH 2/2] Update Windows ABI and address reviews --- sycl/include/sycl/khr/free_function_commands.hpp | 2 +- sycl/source/detail/queue_impl.cpp | 5 ++--- sycl/test/abi/sycl_symbols_windows.dump | 13 +++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index 2165076a83229..f32e493008bfe 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -317,7 +317,7 @@ void launch_task(const sycl::queue &q, KernelType &&k, if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { detail::submit_kernel_direct_single_task( - q, std::forward(k), + q, std::forward(k), ext::oneapi::experimental::empty_properties_t{}, codeLoc); } else { submit(q, [&](handler &h) { launch_task(h, k); }, codeLoc); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index dd194ff78d593..0433451ae6da7 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -576,9 +576,8 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( KData.setNDRDesc(NDRDesc); // Validate and set kernel launch properties. - KData.validateAndSetKernelLaunchProperties( - Props, getCommandGraph() != nullptr /*HasGraph?*/, - getDeviceImpl() /*device_impl*/); + KData.validateAndSetKernelLaunchProperties(Props, hasCommandGraph(), + getDeviceImpl()); auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, bool SchedulerBypass) -> EventImplPtr { diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index b2c805ac1b9d0..f7e0f0f062223 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,12 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4393,6 +4393,7 @@ ?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVqueue@45@@Z ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVcontext@45@@Z ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVqueue@45@@Z +?remove@free_function_info_map@detail@_V1@sycl@@YAXPEBQEBDPEBII@Z ?removeDuplicateDevices@detail@_V1@sycl@@YA?BV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV45@@Z ?remquo_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@0PEAH@Z ?remquo_impl@detail@_V1@sycl@@YAMMMPEAH@Z