From 3485beaf37bc0dd636f37f6bbacc5621441e43a1 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 16 May 2024 08:32:45 -0700 Subject: [PATCH 1/8] Use discard events to avoid PI events Signed-off-by: Larsen, Steffen --- .../oneapi/experimental/enqueue_functions.hpp | 20 ++-- sycl/include/sycl/handler.hpp | 41 ++++++- sycl/include/sycl/queue.hpp | 31 ++++++ sycl/include/sycl/reduction.hpp | 5 +- sycl/source/detail/graph_impl.cpp | 9 +- sycl/source/detail/handler_impl.hpp | 10 +- sycl/source/detail/queue_impl.cpp | 43 +++++--- sycl/source/detail/queue_impl.hpp | 101 ++++++++++++------ sycl/source/detail/scheduler/commands.cpp | 25 +++-- sycl/source/detail/scheduler/commands.hpp | 6 ++ .../source/detail/scheduler/graph_builder.cpp | 13 ++- sycl/source/detail/scheduler/scheduler.cpp | 13 +-- sycl/source/detail/scheduler/scheduler.hpp | 4 +- sycl/source/enqueue_functions.cpp | 30 ++++++ sycl/source/handler.cpp | 32 +++++- sycl/source/queue.cpp | 62 +++++++---- sycl/test/abi/sycl_symbols_linux.dump | 6 ++ .../arg_mask/EliminatedArgMask.cpp | 2 +- .../scheduler/AccessorDefaultCtor.cpp | 7 +- sycl/unittests/scheduler/Commands.cpp | 3 +- .../scheduler/EnqueueWithDependsOnDeps.cpp | 5 +- sycl/unittests/scheduler/GraphCleanup.cpp | 15 ++- .../scheduler/InOrderQueueSyncCheck.cpp | 4 + sycl/unittests/scheduler/KernelFusion.cpp | 6 +- sycl/unittests/scheduler/QueueFlushing.cpp | 3 +- .../scheduler/SchedulerTestUtils.hpp | 20 ++-- .../scheduler/StreamInitDependencyOnHost.cpp | 10 +- 27 files changed, 385 insertions(+), 141 deletions(-) create mode 100644 sycl/source/enqueue_functions.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 21d383b6fe2cc..f89b7f278ca73 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -8,8 +8,9 @@ #pragma once -#include // for std::forward +#include +#include #include #include #include @@ -75,8 +76,7 @@ template struct LaunchConfigAccess { template void submit(queue Q, CommandGroupFunc &&CGF) { - // TODO: Use new submit without Events. - Q.submit(std::forward(CGF)); + Q.submit_without_event(std::forward(CGF)); } template @@ -261,9 +261,9 @@ inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) { CGH.memcpy(Dest, Src, NumBytes); } -inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes) { - submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); }); -} +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); template void copy(handler &CGH, const T *Src, T *Dest, size_t Count) { @@ -278,9 +278,7 @@ inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) { CGH.memset(Ptr, Value, NumBytes); } -inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes) { - submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); }); -} +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes); template void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) { @@ -304,9 +302,7 @@ inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) { CGH.mem_advise(Ptr, NumBytes, Advice); } -inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice) { - submit(Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); }); -} +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice); inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8b03a1d85c61c..0347d93a2d8ea 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -463,6 +463,7 @@ class __SYCL_EXPORT handler { /// /// \param Queue is a SYCL queue. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, bool IsHost); /// Constructs SYCL handler from the associated queue and the submission's @@ -474,10 +475,36 @@ class __SYCL_EXPORT handler { /// \param SecondaryQueue is the secondary SYCL queue of the submission. This /// is null if no secondary queue is associated with the submission. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost); + /// Constructs SYCL handler from queue. + /// + /// \param Queue is a SYCL queue. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler + /// is needed by the caller. + handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent); + + /// Constructs SYCL handler from the associated queue and the submission's + /// primary and secondary queue. + /// + /// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or + /// SecondaryQueue. + /// \param PrimaryQueue is the primary SYCL queue of the submission. + /// \param SecondaryQueue is the secondary SYCL queue of the submission. This + /// is null if no secondary queue is associated with the submission. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler +/// is needed by the caller. + handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, bool IsHost, + bool CallerNeedsEvent); + /// Constructs SYCL handler from Graph. /// /// The hander will add the command-group as a node to the graph rather than @@ -498,7 +525,7 @@ class __SYCL_EXPORT handler { void setType(detail::CG::CGTYPE Type) { MCGType = Type; } - detail::CG::CGTYPE getType() { return MCGType; } + detail::CG::CGTYPE getType() const { return MCGType; } void throwIfActionIsCreated() { if (detail::CG::None != getType()) @@ -575,6 +602,16 @@ class __SYCL_EXPORT handler { /// \return a SYCL event object representing the command group event finalize(); + /// Constructs CG object of specific type, passes it to Scheduler and + /// returns sycl::event object representing the command group. + /// It's expected that the method is the latest method executed before + /// object destruction. + /// \param CallerNeedsEvent Specifies if the caller needs an event + /// representing the work related to this handler. + /// + /// \return a SYCL event object representing the command group + event finalize(bool CallerNeedsEvent); + /// Saves streams associated with this handler. /// /// Streams are then forwarded to command group and flushed in the scheduler. @@ -1180,6 +1217,8 @@ class __SYCL_EXPORT handler { Size == 32 || Size == 64 || Size == 128; } + bool eventNeeded() const; + template struct TransformUserItemType { using type = std::conditional_t< std::is_convertible_v, LambdaArgType>, nd_item, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index a3819f69b47bc..be7dac22b86e4 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -95,6 +95,9 @@ namespace ext ::oneapi ::experimental { // returned by info::queue::state enum class queue_state { executing, recording }; struct image_descriptor; + +template +void submit(queue Q, CommandGroupFunc &&CGF); } // namespace ext::oneapi::experimental /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -2637,6 +2640,34 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + /// A template-free version of submit_without_event. + void submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc); + + + /// Submits a command group function object to the queue, in order to be + /// scheduled for execution on the device. + /// + /// \param CGF is a function object containing command group. + /// \param CodeLoc is the code location of the submit call (default argument) + template + std::enable_if_t, void> + submit_without_event(T CGF, const detail::code_location &CodeLoc = + detail::code_location::current()) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); +#if __SYCL_USE_FALLBACK_ASSERT + // If post-processing is needed, fall back to the regular submit. + // TODO: Revisit whether we can avoid this. + submit(CGF, CodeLoc); +#else + submit_without_event_impl(CGF, CodeLoc); +#endif // __SYCL_USE_FALLBACK_ASSERT + } + + template + friend void ext::oneapi::experimental::submit(queue Q, + CommandGroupFunc &&CGF); + /// Checks if the event needs to be discarded and if so, discards it and /// returns a discarded event. Otherwise, it returns input event. /// TODO: move to impl class in the next ABI Breaking window diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 5def244a01ae0..98938e2e9bc7b 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1173,8 +1173,9 @@ namespace reduction { inline void finalizeHandler(handler &CGH) { CGH.finalize(); } template void withAuxHandler(handler &CGH, FunctorTy Func) { event E = CGH.finalize(); - handler AuxHandler(CGH.MQueue, CGH.MIsHost); - AuxHandler.depends_on(E); + handler AuxHandler(CGH.MQueue, CGH.MIsHost, CGH.eventNeeded()); + if (!createSyclObjFromImpl(CGH.MQueue).is_in_order()) + AuxHandler.depends_on(E); AuxHandler.saveCodeLoc(CGH.MCodeLoc); Func(AuxHandler); CGH.MLastEvent = AuxHandler.finalize(); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 5071c3d982066..7d9759b12928e 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -689,7 +689,8 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( sycl::detail::EventImplPtr Event = sycl::detail::Scheduler::getInstance().addCG( - Node->getCGCopy(), AllocaQueue, CommandBuffer, Deps); + Node->getCGCopy(), AllocaQueue, /*EventNeeded=*/true, CommandBuffer, + Deps); MCommandMap[Node] = Event->getCommandBufferCommand(); return Event->getSyncPoint(); @@ -927,7 +928,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, CommandBuffer, nullptr, std::move(CGData)); NewEvent = sycl::detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), Queue); + std::move(CommandGroup), Queue, /*EventNeeded=*/true); } NewEvent->setEventFromSubmittedExecCommandBuffer(true); } else if ((CurrentPartition->MSchedule.size() > 0) && @@ -945,7 +946,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, .MQueue = Queue; NewEvent = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); } else { std::vector> ScheduledEvents; for (auto &NodeImpl : CurrentPartition->MSchedule) { @@ -981,7 +982,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // dependencies are propagated in findRealDeps sycl::detail::EventImplPtr EventImpl = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); ScheduledEvents.push_back(EventImpl); } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index e268175781989..53d96a401f8aa 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -31,9 +31,11 @@ enum class HandlerSubmissionState : std::uint8_t { class handler_impl { public: handler_impl(std::shared_ptr SubmissionPrimaryQueue, - std::shared_ptr SubmissionSecondaryQueue) + std::shared_ptr SubmissionSecondaryQueue, + bool EventNeeded) : MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)), - MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)){}; + MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), + MEventNeeded(EventNeeded){}; handler_impl() = default; @@ -74,6 +76,10 @@ class handler_impl { /// submission is a fallback from a previous submission. std::shared_ptr MSubmissionSecondaryQueue; + /// Bool stores information about whether the event resulting from the + /// corresponding work is required. + bool MEventNeeded = true; + // Stores auxiliary resources used by internal operations. std::vector> MAuxiliaryResources; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 05c579f78a405..7c1677b978a78 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -132,7 +132,8 @@ queue_impl::getExtendDependencyList(const std::vector &DepEvents, event queue_impl::memset(const std::shared_ptr &Self, void *Ptr, int Value, size_t Count, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we use the object ptr; if code location // information is available, we will have function name and source file @@ -159,7 +160,8 @@ event queue_impl::memset(const std::shared_ptr &Self, #endif return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, Count, Value); } @@ -180,7 +182,7 @@ void report(const code_location &CodeLoc) { event queue_impl::memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, - const code_location &CodeLoc) { + bool CallerNeedsEvent, const code_location &CodeLoc) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we duse the object ptr; If code location // is available, we use the source file information along with the object @@ -211,7 +213,8 @@ event queue_impl::memcpy(const std::shared_ptr &Self, PI_ERROR_INVALID_VALUE); } return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self, Count, Dest); } @@ -219,9 +222,10 @@ event queue_impl::memcpy(const std::shared_ptr &Self, event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr, Self, Length, Advice); @@ -230,9 +234,9 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, event queue_impl::memcpyToDeviceGlobal( const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents) { + const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset); @@ -246,9 +250,9 @@ event queue_impl::memcpyToDeviceGlobal( event queue_impl::memcpyFromDeviceGlobal( const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, const std::vector &DepEvents) { + size_t Offset, const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset); @@ -345,6 +349,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, template event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemOpFuncT MemOpFunc, MemOpArgTs... MemOpArgs) { @@ -361,7 +366,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, // handler rather than by-passing the scheduler. if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( ExpandedDepEvents, MContext)) { - if (MSupportsDiscardingPiEvents) { + if ((MDiscardEvents || !CallerNeedsEvent) && + supportsDiscardingPiEvents()) { MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); return createDiscardedEvent(); @@ -581,10 +587,17 @@ bool queue_impl::ext_oneapi_empty() const { // the status of the last event. if (isInOrder() && !MDiscardEvents) { std::lock_guard Lock(MMutex); - return !MDefaultGraphDeps.LastEventPtr || - MDefaultGraphDeps.LastEventPtr - ->get_info() == - info::event_command_status::complete; + // If there is no last event we know that no work has been submitted, so it + // must be trivially empty. + if (!MDefaultGraphDeps.LastEventPtr) + return true; + // Otherwise, check if the last event is finished. + // Note that we fall back to the backend query if the event was discarded, + // which may happend despite the queue not being a discard event queue. + if (MDefaultGraphDeps.LastEventPtr->isDiscarded()) + return MDefaultGraphDeps.LastEventPtr + ->get_info() == + info::event_command_status::complete; } // Check the status of the backend queue if this is not a host queue. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a4a984c270b0e..6dfcab977c0b1 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -111,8 +111,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { if (has_property()) { @@ -292,8 +290,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { queue_impl_interop(PiQueue); @@ -313,9 +309,7 @@ class queue_impl { MIsInorder(has_property()), MDiscardEvents( has_property()), - MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)) { + MIsProfilingEnabled(has_property()) { queue_impl_interop(PiQueue); } @@ -370,9 +364,14 @@ class queue_impl { /// \return true if this queue is a SYCL host queue. bool is_host() const { return MHostQueue; } - /// \return true if this queue has discard_events support. + /// \return true if the discard event property was set at time of creation. + bool hasDiscardEventsProperty() const { + return MDiscardEvents; + } + + /// \return true if this queue allows for discarded events. bool supportsDiscardingPiEvents() const { - return MSupportsDiscardingPiEvents; + return MHostQueue ? true : MIsInorder; } bool isInOrder() const { return MIsInorder; } @@ -410,10 +409,12 @@ class queue_impl { const SubmitPostProcessF *PostProcess = nullptr) { event ResEvent; try { - ResEvent = submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess); + ResEvent = submit_impl(CGF, Self, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } catch (...) { - ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, - Loc, PostProcess); + ResEvent = + SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } return discard_or_return(ResEvent); } @@ -430,10 +431,19 @@ class queue_impl { const std::shared_ptr &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess = nullptr) { - auto ResEvent = submit_impl(CGF, Self, Self, nullptr, Loc, PostProcess); + auto ResEvent = submit_impl(CGF, Self, Self, nullptr, + /*CallerNeedsEvent=*/true, Loc, PostProcess); return discard_or_return(ResEvent); } + void submit_without_event(const std::function &CGF, + const std::shared_ptr &Self, + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess = nullptr) { + submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, + PostProcess); + } + /// Performs a blocking wait for the completion of all enqueued tasks in the /// queue. /// @@ -637,9 +647,11 @@ class queue_impl { /// \param Count is a number of bytes to fill. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing fill operation. event memset(const std::shared_ptr &Self, void *Ptr, int Value, - size_t Count, const std::vector &DepEvents); + size_t Count, const std::vector &DepEvents, + bool CallerNeedsEvent); /// Copies data from one memory region to another, both pointed by /// USM pointers. /// @@ -649,10 +661,11 @@ class queue_impl { /// \param Count is a number of bytes to copy. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing copy operation. event memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, - const std::vector &DepEvents, + const std::vector &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc); /// Provides additional information to the underlying runtime about how /// different allocations are used. @@ -663,10 +676,11 @@ class queue_impl { /// \param Advice is a device-defined advice for the specified allocation. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing advise operation. event mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents); + const std::vector &DepEvents, bool CallerNeedsEvent); /// Puts exception to the list of asynchronous ecxeptions. /// @@ -704,13 +718,14 @@ class queue_impl { event memcpyToDeviceGlobal(const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, - const std::vector &DepEvents); + size_t Offset, const std::vector &DepEvents, + bool CallerNeedsEvent); event memcpyFromDeviceGlobal(const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents); + const std::vector &DepEvents, + bool CallerNeedsEvent); bool isProfilingFallback() { return MFallbackProfiling; } @@ -762,6 +777,14 @@ class queue_impl { // Hook to the scheduler to clean up any fusion command held on destruction. void cleanup_fusion_cmd(); + EventImplPtr insertHelperBarrier() { + const PluginPtr &Plugin = getPlugin(); + pi_event BarrierPiEvent = 0; + Plugin->call( + getHandleRef(), 0, nullptr, &BarrierPiEvent); + return std::make_shared(BarrierPiEvent, get_context()); + } + // template is needed for proper unit testing template void finalizeHandler(HandlerType &Handler, const CG::CGTYPE &Type, @@ -770,6 +793,10 @@ class queue_impl { // Accessing and changing of an event isn't atomic operation. // Hence, here is the lock for thread-safety. std::lock_guard Lock{MMutex}; + + auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + // This dependency is needed for the following purposes: // - host tasks are handled by the runtime and cannot be implicitly // synchronized by the backend. @@ -777,11 +804,19 @@ class queue_impl { // by a host task. This dependency allows to build the enqueue order in // the RT but will not be passed to the backend. See getPIEvents in // Command. - - auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - if (EventToBuildDeps) - Handler.depends_on(EventToBuildDeps); + if (EventToBuildDeps) { + // In the case where the last event was discarded and we are to run a + // host_task, we insert a barrier into the queue and use the resulting + // event as the dependency for the host_task. + // Note that host_task events can never be discarded, so this will not + // insert barriers between host_task enqueues. + if (EventToBuildDeps->isDiscarded() && + Handler.getType() == CG::CodeplayHostTask) + EventToBuildDeps = insertHelperBarrier(); + + if (!EventToBuildDeps->isDiscarded()) + Handler.depends_on(EventToBuildDeps); + } // If there is an external event set, add it as a dependency and clear it. // We do not need to hold the lock as MLastEventMtx will ensure the last @@ -827,12 +862,15 @@ class queue_impl { /// same as Self. /// \param SecondaryQueue is a pointer to the secondary queue. This may be the /// same as Self. + /// \param CallerNeedsEvent is a boolean indicating whether the event is required + /// by the user after the call. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess) { // Flag used to detect nested calls to submit and report an error. @@ -845,7 +883,8 @@ class queue_impl { "function objects should use the sycl::handler API instead."); } - handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue); + handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue, + CallerNeedsEvent); Handler.saveCodeLoc(Loc); PreventSubmit = true; try { @@ -897,6 +936,8 @@ class queue_impl { /// /// \param Self is a shared_ptr to this queue. /// \param DepEvents is a vector of dependencies of the operation. + /// \param CallerNeedsEvent specifies if the caller needs an event from this + /// memory operation. /// \param HandlerFunc is a function that submits the operation with a /// handler. /// \param MemMngrFunc is a function that forwards its arguments to the @@ -909,8 +950,8 @@ class queue_impl { typename... MemMngrArgTs> event submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, - HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, - MemMngrArgTs... MemOpArgs); + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, + MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs); // When instrumentation is enabled emits trace event for wait begin and // returns the telemetry event generated for the wait @@ -1002,12 +1043,6 @@ class queue_impl { const bool MIsProfilingEnabled; protected: - // Indicates whether the queue supports discarding PI events for tasks - // submitted to it. This condition is necessary but not sufficient, PI events - // should be discarded only if they also don't represent potential implicit - // dependencies for future tasks in other queues. - const bool MSupportsDiscardingPiEvents; - // Command graph which is associated with this queue for the purposes of // recording commands to it. std::weak_ptr MGraph{}; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bf7e44062cb5e..f6520160e529c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1925,11 +1925,11 @@ static std::string_view cgTypeToString(detail::CG::CGTYPE Type) { ExecCGCommand::ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), - MCommandGroup(std::move(CommandGroup)) { + MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -2744,11 +2744,15 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Plugin->call(RawEvents.size(), &RawEvents[0]); } + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = - (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0) - ? nullptr - : &MEvent->getHandleRef(); + DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); sycl::detail::pi::PiExtSyncPoint OutSyncPoint; sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; switch (MCommandGroup->getType()) { @@ -2895,8 +2899,13 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, getWorkerQueue()); - bool DiscardPiEvent = (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0); + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8ba0cceee9e6a..ea1a5b5111149 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -648,6 +648,7 @@ class ExecCGCommand : public Command { public: ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -672,6 +673,11 @@ class ExecCGCommand : public Command { // necessary. KernelFusionCommand *MFusionCmd = nullptr; + // MEventNeeded is true if the command needs to produce a valid event. The + // implementation may elect to not produce events (native or SYCL) if this + // is false. + bool MEventNeeded = true; + bool producesPiEvent() const final; bool supportsPostEnqueueCleanup() const final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index f0c5dc670aa05..15c98386462b9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -940,13 +940,15 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, std::vector &ToEnqueue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); - auto NewCmd = std::make_unique( - std::move(CommandGroup), Queue, CommandBuffer, std::move(Dependencies)); + auto NewCmd = std::make_unique(std::move(CommandGroup), Queue, + EventNeeded, CommandBuffer, + std::move(Dependencies)); if (!NewCmd) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); @@ -1345,7 +1347,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent( CG::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue(), + /*EventNeeded=*/true); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } @@ -1619,8 +1622,8 @@ Scheduler::GraphBuilder::completeFusion(QueueImplPtr Queue, }), FusedEventDeps.end()); - auto FusedKernelCmd = - std::make_unique(std::move(FusedCG), Queue); + auto FusedKernelCmd = std::make_unique( + std::move(FusedCG), Queue, /*EventNeeded=*/true); // Inherit auxiliary resources from fused command groups Scheduler::getInstance().takeAuxiliaryResources(FusedKernelCmd->getEvent(), diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7b6c837131658..6cf8e23c90d8b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -94,7 +94,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, EventImplPtr Scheduler::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; const CG::CGTYPE Type = CommandGroup->getType(); @@ -130,17 +130,18 @@ EventImplPtr Scheduler::addCG( NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - DefaultHostQueue, AuxiliaryCmds); + auto Result = + MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue, + AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; break; } default: - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - std::move(Queue), AuxiliaryCmds, - CommandBuffer, std::move(Dependencies)); + auto Result = MGraphBuilder.addCG( + std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded, + CommandBuffer, std::move(Dependencies)); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 09437928f1d32..75c85af5c5d83 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -372,6 +372,7 @@ class Scheduler { /// /// \param CommandGroup is a unique_ptr to a command group to be added. /// \param Queue Queue that is registering the command-group. + /// \param EventNeeded Specifies whether an event is explicitly required. /// \param CommandBuffer Optional command buffer to enqueue to instead of /// directly to the queue. /// \param Dependencies Optional list of dependency @@ -379,6 +380,7 @@ class Scheduler { /// \return an event object to wait on for command group completion. EventImplPtr addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -602,7 +604,7 @@ class Scheduler { /// processor right away or not. GraphBuildResult addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp new file mode 100644 index 0000000000000..93e33c309ec25 --- /dev/null +++ b/sycl/source/enqueue_functions.cpp @@ -0,0 +1,30 @@ +//==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = detail::getSyclObjImpl(Q); + return QueueImplPtr->memcpy(QueueImplPtr, Dest, Src, NumBytes, {}, + /*CallerNeedsEvent=*/false, CodeLoc); +} + +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes) { + auto QueueImplPtr = detail::getSyclObjImpl(Q); + return QueueImplPtr->memset(QueueImplPtr, Ptr, Value, NumBytes, {}, + /*CallerNeedsEvent=*/false); +} + +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice) { + auto QueueImplPtr = detail::getSyclObjImpl(Q); + return QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, + pi_mem_advice(Advice), {}, + /*CallerNeedsEvent=*/false); +} diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 8223c9330814e..044e9d941a89a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,15 +80,29 @@ void *getValueFromDynamicParameter( } // namespace detail + /// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, bool IsHost) - : handler(Queue, Queue, nullptr, IsHost) {} + : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost) + : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, + /*CallerNeedsEvent=*/true) {} + +handler::handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : handler(Queue, Queue, nullptr, IsHost, CallerNeedsEvent) {} + +handler::handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, + bool IsHost, bool CallerNeedsEvent) : MImpl(std::make_shared(std::move(PrimaryQueue), - std::move(SecondaryQueue))), + std::move(SecondaryQueue), + CallerNeedsEvent)), MQueue(std::move(Queue)), MIsHost(IsHost) {} handler::handler( @@ -327,8 +341,9 @@ event handler::finalize() { return Result; }; - bool DiscardEvent = false; - if (MQueue->supportsDiscardingPiEvents()) { + bool DiscardEvent = (MQueue->MDiscardEvents || !MImpl->MEventNeeded) && + MQueue->supportsDiscardingPiEvents(); + if (DiscardEvent) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && @@ -341,6 +356,9 @@ event handler::finalize() { if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); + auto EventImpl = std::make_shared( + detail::event_impl::HES_Discarded); + MLastEvent = detail::createSyclObjFromImpl(EventImpl); } else { NewEvent = std::make_shared(MQueue); NewEvent->setWorkerQueue(MQueue); @@ -600,7 +618,7 @@ event handler::finalize() { } detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), std::move(MQueue)); + std::move(CommandGroup), std::move(MQueue), MImpl->MEventNeeded); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; @@ -1728,5 +1746,9 @@ void handler::registerDynamicParameter( } MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); } + +bool handler::eventNeeded() const { + return MImpl->MEventNeeded; +} } // namespace _V1 } // namespace sycl diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 71ada4a1c5863..b548dc8fc351c 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -106,39 +106,44 @@ void queue::throw_asynchronous() { impl->throw_asynchronous(); } event queue::memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {}); + return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {DepEvent}); + return impl->memset(impl, Ptr, Value, Count, {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, DepEvents); + return impl->memset(impl, Ptr, Value, Count, DepEvents, + /*CallerNeedsEvent=*/true); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, + CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, DepEvents, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, DepEvents, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, @@ -150,20 +155,23 @@ event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents, + /*CallerNeedsEvent=*/true); } event queue::discard_or_return(const event &Event) { @@ -184,6 +192,11 @@ event queue::submit_impl(std::function CGH, queue SecondQueue, return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } +void queue::submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc) { + return impl->submit_without_event(CGH, impl, CodeLoc); +} + event queue::submit_impl_and_postprocess( std::function CGH, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess) { @@ -215,12 +228,14 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { "Should not be called in on graph recording."); auto LastEvent = QueueImpl->getLastEvent(); - if (QueueImpl->MDiscardEvents) { - std::cout << "Discard event enabled" << std::endl; + auto LastEventImpl = detail::getSyclObjImpl(LastEvent); + + // If either the queue discards events or the last event was otherwise + // discarded, we return them as they are. The barrier implementation will fall + // back to enqueuing a barrier in this case. + if (QueueImpl->MDiscardEvents || LastEventImpl->isDiscarded()) return LastEvent; - } - auto LastEventImpl = detail::getSyclObjImpl(LastEvent); // If last event is default constructed event then we want to associate it // with the queue and record submission time if profiling is enabled. Such // event corresponds to NOP and its submit time is same as start time and @@ -240,8 +255,12 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { - if (is_in_order() && !impl->getCommandGraph()) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if(!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); } @@ -262,8 +281,13 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, auto EventImpl = detail::getSyclObjImpl(Event); return !EventImpl->isContextInitialized() || EventImpl->isNOP(); }); - if (is_in_order() && !impl->getCommandGraph() && AllEventsEmptyOrNop) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents && + AllEventsEmptyOrNop) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if(!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, CodeLoc); @@ -333,7 +357,7 @@ event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, const std::vector &DepEvents) { return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, @@ -342,7 +366,7 @@ event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, const std::vector &DepEvents) { return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } bool queue::device_has(aspect Aspect) const { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6416e4c247841..e2740fda55f9b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3126,6 +3126,7 @@ _ZN4sycl3_V15queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE +_ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue29ext_oneapi_set_external_eventERKNS0_5eventE @@ -3628,10 +3629,14 @@ _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE @@ -4223,6 +4228,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context32atomic_memory_scope_capabiliti _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context9getNativeEv +_ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 35e353780d450..b4d4e7cdb7535 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -111,7 +111,7 @@ class MockHandler : public sycl::handler { public: MockHandler(std::shared_ptr Queue) - : sycl::handler(Queue, /* IsHost */ false) {} + : sycl::handler(Queue, /* IsHost */ false, /*CallerNeedsEvent*/ true) {} std::unique_ptr finalize() { auto CGH = static_cast(this); diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index 473ddcefe75fa..bf200ec660fb9 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -21,7 +21,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::vector ToEnqueue; - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); sycl::accessor B; @@ -33,8 +34,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::unique_ptr CmdGroup = MockCGH.finalize(); - detail::Command *NewCmd = - MS.addCG(std::move(CmdGroup), QueueDevImpl, ToEnqueue); + detail::Command *NewCmd = MS.addCG(std::move(CmdGroup), QueueDevImpl, + ToEnqueue, /*EventNeeded=*/true); // if MDeps is empty, accessor built from default ctor does not affect // dependency graph in accordance with SYCL 2020 diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index a995800643421..bd0df10d1309a 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -81,6 +81,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { std::unique_ptr CommandGroup(new detail::CGBarrier( std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), detail::CG::CGTYPE::BarrierWaitlist, {})); - MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl); + MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, + /*EventNeeded=*/true); } } diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index fc816d1a4f3af..0ca56e98a388b 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -60,7 +60,8 @@ class DependsOnTests : public ::testing::Test { std::vector ToEnqueue; // Emulating processing of command group function - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); for (auto EventImpl : Events) MockCGH.depends_on(detail::createSyclObjFromImpl(EventImpl)); @@ -84,7 +85,7 @@ class DependsOnTests : public ::testing::Test { detail::Command *NewCmd = MS.addCG( std::move(CmdGroup), Type == TestCGType::HOST_TASK ? MS.getDefaultHostQueue() : QueueDevImpl, - ToEnqueue); + ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 3389769569e5e..4c941258c8669 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -107,7 +107,8 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, /*SharedPtrStorage*/ {}, /*Requirements*/ {&MockReq}, /*Events*/ {}))}; - detail::EventImplPtr Event = MS.addCG(std::move(CG), QueueImpl); + detail::EventImplPtr Event = + MS.addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true); auto *Cmd = static_cast(Event->getCommand()); verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted); @@ -332,7 +333,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { AttachSchedulerWrapper AttachScheduler{MSPtr}; detail::EventImplPtr EventImplPtr; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -343,7 +345,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } // The buffers should have been released with graph cleanup once the work is @@ -393,7 +396,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { detail::EventImplPtr EventImplPtr; bool MockAuxResourceDeleted = false; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -413,7 +417,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } EventCompleted = false; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index f877042517fd0..3bd77e9fc36d0 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -32,6 +32,10 @@ class LimitedHandler { std::make_shared(); return sycl::detail::createSyclObjFromImpl(NewEvent); } + + sycl::detail::CG::CGTYPE getType() const { + return sycl::detail::CG::CGTYPE::None; + } }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp index 8b45c03e37f1f..1db16cbda1493 100644 --- a/sycl/unittests/scheduler/KernelFusion.cpp +++ b/sycl/unittests/scheduler/KernelFusion.cpp @@ -22,7 +22,8 @@ template detail::Command *CreateTaskCommand(MockScheduler &MS, detail::QueueImplPtr DevQueue, buffer &buf) { - MockHandlerCustomFinalize MockCGH(DevQueue, false); + MockHandlerCustomFinalize MockCGH(DevQueue, false, + /*CallerNeedsEvent=*/true); auto acc = buf.get_access(static_cast(MockCGH)); @@ -36,7 +37,8 @@ detail::Command *CreateTaskCommand(MockScheduler &MS, auto CmdGrp = MockCGH.finalize(); std::vector ToEnqueue; - detail::Command *NewCmd = MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue); + detail::Command *NewCmd = + MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index c97428b9d55c6..c27e4d672e0fa 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -147,7 +147,8 @@ TEST_F(SchedulerTest, QueueFlushing) { /*SharedPtrStorage*/ {}, /*Requirements*/ {}, /*Events*/ {}))}; - detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA}; + detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA, + /*EventNeeded=*/true}; MockReq.MDims = 1; (void)ExecCGCmd.addDep(detail::DepDesc(&AllocaCmd, &MockReq, &AllocaCmd), ToCleanUp); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 88ced1f25904a..addd9237d4a40 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -195,11 +195,12 @@ class MockScheduler : public sycl::detail::Scheduler { return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue); } - sycl::detail::Command * - addCG(std::unique_ptr CommandGroup, - sycl::detail::QueueImplPtr Queue, - std::vector &ToEnqueue) { - return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue) + sycl::detail::Command *addCG(std::unique_ptr CommandGroup, + sycl::detail::QueueImplPtr Queue, + std::vector &ToEnqueue, + bool EventNeeded) { + return MGraphBuilder + .addCG(std::move(CommandGroup), Queue, ToEnqueue, EventNeeded) .NewCmd; } @@ -226,8 +227,9 @@ sycl::detail::Requirement getMockRequirement(const MemObjT &MemObj) { class MockHandler : public sycl::handler { public: - MockHandler(std::shared_ptr Queue, bool IsHost) - : sycl::handler(Queue, IsHost) {} + MockHandler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : sycl::handler(Queue, IsHost, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; using sycl::handler::getType; @@ -293,8 +295,8 @@ class MockHandler : public sycl::handler { class MockHandlerCustomFinalize : public MockHandler { public: MockHandlerCustomFinalize(std::shared_ptr Queue, - bool IsHost) - : MockHandler(Queue, IsHost) {} + bool IsHost, bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 18c0b3e1a8070..10c3ed23dc8ac 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -20,8 +20,9 @@ inline constexpr auto DisableCleanupName = class MockHandlerStreamInit : public MockHandler { public: - MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost) - : MockHandler(Queue, IsHost) {} + MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; switch (getType()) { @@ -85,7 +86,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { /*PropList=*/{})); // Emulating processing of command group function - MockHandlerStreamInit MockCGH(HQueueImpl, true); + MockHandlerStreamInit MockCGH(HQueueImpl, true, /*CallerNeedsEvent=*/true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; @@ -118,7 +119,8 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { MockScheduler MS; std::vector AuxCmds; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds); + detail::Command *NewCmd = + MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds, /*EventNeeded=*/true); ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; ASSERT_GT(NewCmd->MDeps.size(), 0u) << "No deps appeared in the new exec kernel command"; From 658d3560c4e22b469c0edaf7cad5227419234c2c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 13 Jun 2024 01:36:21 -0700 Subject: [PATCH 2/8] Add unittests Signed-off-by: Larsen, Steffen --- .../oneapi/experimental/enqueue_functions.hpp | 6 +- sycl/source/detail/queue_impl.hpp | 14 +- sycl/unittests/Extensions/CMakeLists.txt | 1 + sycl/unittests/Extensions/DiscardEvent.cpp | 83 +++ .../Extensions/EnqueueFunctionsEvents.cpp | 471 ++++++++++++++++++ sycl/unittests/helpers/PiMockPlugin.hpp | 75 ++- .../scheduler/InOrderQueueSyncCheck.cpp | 34 +- 7 files changed, 636 insertions(+), 48 deletions(-) create mode 100644 sycl/unittests/Extensions/DiscardEvent.cpp create mode 100644 sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 6461ff122a19e..7f10dd7f79e85 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -211,7 +211,8 @@ template Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Range, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Range, KernelObj, + std::forward(Reductions)...); }); } @@ -234,7 +235,8 @@ template , Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); }); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 46fd926a667b1..ae04551929ac2 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -791,12 +791,12 @@ class queue_impl { // Hook to the scheduler to clean up any fusion command held on destruction. void cleanup_fusion_cmd(); - EventImplPtr insertHelperBarrier() { - const PluginPtr &Plugin = getPlugin(); - pi_event BarrierPiEvent = 0; - Plugin->call( - getHandleRef(), 0, nullptr, &BarrierPiEvent); - return std::make_shared(BarrierPiEvent, get_context()); + template + EventImplPtr insertHelperBarrier(const HandlerType &Handler) { + auto ResEvent = std::make_shared(Handler.MQueue); + getPlugin()->call( + Handler.MQueue->getHandleRef(), 0, nullptr, &ResEvent->getHandleRef()); + return ResEvent; } // template is needed for proper unit testing @@ -825,7 +825,7 @@ class queue_impl { // insert barriers between host_task enqueues. if (EventToBuildDeps->isDiscarded() && Handler.getType() == CG::CodeplayHostTask) - EventToBuildDeps = insertHelperBarrier(); + EventToBuildDeps = insertHelperBarrier(Handler); if (!EventToBuildDeps->isDiscarded()) Handler.depends_on(EventToBuildDeps); diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 491fa49225a81..c8c0e619ecc55 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -10,6 +10,7 @@ add_sycl_unittest(ExtensionsTests OBJECT USMP2P.cpp CompositeDevice.cpp OneAPIProd.cpp + EnqueueFunctionsEvents.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp new file mode 100644 index 0000000000000..01131e9e8e7a5 --- /dev/null +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -0,0 +1,83 @@ +//==------------------------- DiscardEvent.cpp -----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier( + pi_queue, pi_uint32, + const pi_event *, pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class DiscardEventTests : public ::testing::Test { +public: + DiscardEventTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(DiscardEventTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, 1); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp new file mode 100644 index 0000000000000..abc9ca6f3c9b4 --- /dev/null +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -0,0 +1,471 @@ +//==-------------------- EnqueueFunctionsEvents.cpp ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Tests the behavior of enqueue free functions when events can be discarded. + +#include +#include + +#include + +#include +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +inline pi_result after_piKernelGetInfo(pi_kernel kernel, + pi_kernel_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + constexpr char MockKernel[] = "TestKernel"; + if (param_name == PI_KERNEL_INFO_FUNCTION_NAME) { + if (param_value) { + assert(param_value_size == sizeof(MockKernel)); + std::memcpy(param_value, MockKernel, sizeof(MockKernel)); + } + if (param_value_size_ret) + *param_value_size_ret = sizeof(MockKernel); + } + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemcpy = 0; +inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, + const void *, size_t, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemcpy; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemset = 0; +inline pi_result redefined_piextUSMEnqueueMemset(pi_queue, void *, pi_int32, + size_t, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemset; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueuePrefetch = 0; +inline pi_result redefined_piextUSMEnqueuePrefetch(pi_queue, const void *, + size_t, + pi_usm_migration_flags, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueuePrefetch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemAdvise = 0; +inline pi_result redefined_piextUSMEnqueueMemAdvise(pi_queue, const void *, + size_t, pi_mem_advice, + pi_event *event) { + ++counter_piextUSMEnqueueMemAdvise; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class EnqueueFunctionsEventsTests : public ::testing::Test { +public: + EnqueueFunctionsEventsTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piextUSMEnqueueMemcpy = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::single_task>(CGH, []() {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::single_task>(Q, []() {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::single_task(Q, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for(CGH, range<1>{32}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::parallel_for(Q, range<1>{32}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch>( + CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, + [](nd_item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Dst, Src, N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::memset(Q, Dst, 1, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit( + Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::prefetch(Q, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, 1); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, 1); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index d9f18d9008f0d..1f7470f2c625d 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1004,7 +1004,8 @@ inline pi_result mock_piEnqueueKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1013,7 +1014,8 @@ inline pi_result mock_piextEnqueueCooperativeKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1021,14 +1023,16 @@ inline pi_result mock_piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } inline pi_result mock_piEnqueueEventsWaitWithBarrier( pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1037,7 +1041,8 @@ mock_piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1048,7 +1053,8 @@ inline pi_result mock_piEnqueueMemBufferReadRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1057,7 +1063,8 @@ mock_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1068,7 +1075,8 @@ inline pi_result mock_piEnqueueMemBufferWriteRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1078,7 +1086,8 @@ mock_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1089,7 +1098,8 @@ inline pi_result mock_piEnqueueMemBufferCopyRect( size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1100,7 +1110,8 @@ inline pi_result mock_piEnqueueMemBufferFill(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1109,7 +1120,8 @@ inline pi_result mock_piEnqueueMemImageRead( pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1120,7 +1132,8 @@ mock_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1130,7 +1143,8 @@ mock_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1140,7 +1154,8 @@ mock_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1151,7 +1166,8 @@ inline pi_result mock_piEnqueueMemBufferMap(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); auto parentDummyHandle = reinterpret_cast(buffer); *ret_map = (void *)(parentDummyHandle->MData); @@ -1163,7 +1179,8 @@ inline pi_result mock_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1228,7 +1245,8 @@ inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1238,7 +1256,8 @@ inline pi_result mock_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1248,7 +1267,8 @@ inline pi_result mock_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1256,7 +1276,8 @@ inline pi_result mock_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1299,7 +1320,8 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableWrite( pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1307,7 +1329,8 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1507,7 +1530,8 @@ inline pi_result mock_piextEnqueueReadHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1515,7 +1539,8 @@ inline pi_result mock_piextEnqueueWriteHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index ab37a667fc90a..3f97ffb003adc 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -19,10 +19,21 @@ using namespace sycl; using ::testing::An; +class MockQueueImpl : public sycl::detail::queue_impl { +public: + MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, + const sycl::async_handler &AsyncHandler, + const sycl::property_list &PropList) + : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} + using sycl::detail::queue_impl::finalizeHandler; +}; + // Define type with the only methods called by finalizeHandler class LimitedHandler { public: - LimitedHandler(sycl::detail::CG::CGTYPE CGType) : MCGType(CGType) {} + LimitedHandler(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : MCGType(CGType), MQueue(Queue) {} virtual ~LimitedHandler() {} virtual void depends_on(const sycl::detail::EventImplPtr &) {} @@ -38,14 +49,16 @@ class LimitedHandler { sycl::detail::CG::CGTYPE getType() { return MCGType; } sycl::detail::CG::CGTYPE MCGType; + std::shared_ptr MQueue; }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst // event as dependency to the new CG class LimitedHandlerSimulation : public LimitedHandler { public: - LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType) - : LimitedHandler(CGType) {} + LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : LimitedHandler(CGType, Queue) {} MOCK_METHOD1(depends_on, void(const sycl::detail::EventImplPtr &)); MOCK_METHOD1(depends_on, void(event Event)); @@ -53,15 +66,6 @@ class LimitedHandlerSimulation : public LimitedHandler { void(const std::vector &Events)); }; -class MockQueueImpl : public sycl::detail::queue_impl { -public: - MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, - const sycl::async_handler &AsyncHandler, - const sycl::property_list &PropList) - : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} - using sycl::detail::queue_impl::finalizeHandler; -}; - // Only check events dependency in queue_impl::finalizeHandler TEST_F(SchedulerTest, InOrderQueueSyncCheck) { sycl::unittest::PiMock Mock; @@ -76,13 +80,15 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); Queue->finalizeHandler(MockCGH, Event); From 8347467c4ce416b39a1c413352140ef48d8ad8c9 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 19 Jun 2024 07:08:34 -0700 Subject: [PATCH 3/8] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/include/sycl/handler.hpp | 2 +- sycl/include/sycl/queue.hpp | 1 - sycl/source/detail/handler_impl.hpp | 2 +- sycl/source/detail/queue_impl.cpp | 4 ++-- sycl/source/detail/queue_impl.hpp | 11 ++++------- sycl/source/detail/scheduler/graph_builder.cpp | 3 +-- sycl/source/handler.cpp | 10 ++++------ sycl/source/queue.cpp | 4 ++-- sycl/unittests/Extensions/DiscardEvent.cpp | 6 +++--- 9 files changed, 18 insertions(+), 25 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 63ba1599b1794..9e328d1740ca5 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -501,7 +501,7 @@ class __SYCL_EXPORT handler { /// is null if no secondary queue is associated with the submission. /// \param IsHost indicates if this handler is created for SYCL host device. /// \param CallerNeedsEvent indicates if the event resulting from this handler -/// is needed by the caller. + /// is needed by the caller. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 235b764acbc52..04b6969fe2b12 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2711,7 +2711,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { void submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc); - /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 90a083d0b44d3..f0df55d5e069b 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -35,7 +35,7 @@ class handler_impl { bool EventNeeded) : MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)), MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), - MEventNeeded(EventNeeded){}; + MEventNeeded(EventNeeded) {}; handler_impl() = default; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ce40b4a01745e..9ba1c2c64e1d7 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -656,8 +656,8 @@ bool queue_impl::ext_oneapi_empty() const { // which may happend despite the queue not being a discard event queue. if (MDefaultGraphDeps.LastEventPtr->isDiscarded()) return MDefaultGraphDeps.LastEventPtr - ->get_info() == - info::event_command_status::complete; + ->get_info() == + info::event_command_status::complete; } // Check the status of the backend queue if this is not a host queue. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ae04551929ac2..ce38232ec0895 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -365,9 +365,7 @@ class queue_impl { bool is_host() const { return MHostQueue; } /// \return true if the discard event property was set at time of creation. - bool hasDiscardEventsProperty() const { - return MDiscardEvents; - } + bool hasDiscardEventsProperty() const { return MDiscardEvents; } /// \return true if this queue allows for discarded events. bool supportsDiscardingPiEvents() const { @@ -877,16 +875,15 @@ class queue_impl { /// same as Self. /// \param SecondaryQueue is a pointer to the secondary queue. This may be the /// same as Self. - /// \param CallerNeedsEvent is a boolean indicating whether the event is required - /// by the user after the call. + /// \param CallerNeedsEvent is a boolean indicating whether the event is + /// required by the user after the call. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, - bool CallerNeedsEvent, - const detail::code_location &Loc, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess); /// Helper function for submitting a memory operation with a handler. diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 60ca41fd21931..31a732ae5ba35 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -943,8 +943,7 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, - bool EventNeeded, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ff3000972d1f9..1ce5f7b97940e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,7 +80,7 @@ void *getValueFromDynamicParameter( } // namespace detail - /// TODO: Unused. Remove with ABI break. +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, bool IsHost) : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} @@ -89,8 +89,8 @@ handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost) - : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, - /*CallerNeedsEvent=*/true) {} + : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, + /*CallerNeedsEvent=*/true) {} handler::handler(std::shared_ptr Queue, bool IsHost, bool CallerNeedsEvent) @@ -1830,8 +1830,6 @@ void handler::registerDynamicParameter( MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); } -bool handler::eventNeeded() const { - return MImpl->MEventNeeded; -} +bool handler::eventNeeded() const { return MImpl->MEventNeeded; } } // namespace _V1 } // namespace sycl diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 6f5aace35703d..9648431a5a429 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -242,7 +242,7 @@ event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { !impl->MIsProfilingEnabled) { event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); // If the last event was discarded, fall back to enqueuing a barrier. - if(!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) return InOrderLastEvent; } @@ -269,7 +269,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) { event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); // If the last event was discarded, fall back to enqueuing a barrier. - if(!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) return InOrderLastEvent; } diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp index 01131e9e8e7a5..5f8e9545b747e 100644 --- a/sycl/unittests/Extensions/DiscardEvent.cpp +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -35,9 +35,9 @@ inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; thread_local std::chrono::time_point timestamp_piEnqueueEventsWaitWithBarrier; -inline pi_result after_piEnqueueEventsWaitWithBarrier( - pi_queue, pi_uint32, - const pi_event *, pi_event *) { +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { ++counter_piEnqueueEventsWaitWithBarrier; timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); return PI_SUCCESS; From 215c5a41452304a9e8770fddd9d7fb29dfb64dee Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 19 Jun 2024 07:12:00 -0700 Subject: [PATCH 4/8] Fix unittests Signed-off-by: Larsen, Steffen --- sycl/unittests/Extensions/DiscardEvent.cpp | 4 +- .../Extensions/EnqueueFunctionsEvents.cpp | 51 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp index 5f8e9545b747e..dc729c74084e0 100644 --- a/sycl/unittests/Extensions/DiscardEvent.cpp +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -75,8 +75,8 @@ TEST_F(DiscardEventTests, BarrierBeforeHostTask) { [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); }).wait(); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); - ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); } diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index abc9ca6f3c9b4..842e3cf271216 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -112,6 +112,9 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { void SetUp() override { counter_piEnqueueKernelLaunch = 0; counter_piextUSMEnqueueMemcpy = 0; + counter_piextUSMEnqueueMemset = 0; + counter_piextUSMEnqueuePrefetch = 0; + counter_piextUSMEnqueueMemAdvise = 0; counter_piEnqueueEventsWaitWithBarrier = 0; } @@ -127,7 +130,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { oneapiext::single_task>(CGH, []() {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { @@ -136,7 +139,7 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { oneapiext::single_task>(Q, []() {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { @@ -154,7 +157,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { oneapiext::submit(Q, [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { @@ -172,7 +175,7 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { oneapiext::single_task(Q, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { @@ -183,7 +186,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { @@ -192,7 +195,7 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { @@ -211,7 +214,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { oneapiext::parallel_for(CGH, range<1>{32}, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { @@ -229,7 +232,7 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { oneapiext::parallel_for(Q, range<1>{32}, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { @@ -241,7 +244,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { @@ -251,7 +254,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { @@ -270,7 +273,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { @@ -288,7 +291,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { @@ -303,7 +306,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); @@ -319,7 +322,7 @@ TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); @@ -336,7 +339,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { oneapiext::submit(Q, [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); @@ -352,7 +355,7 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { oneapiext::memcpy(Q, Dst, Src, N); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); @@ -369,7 +372,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemset, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); free(Dst, Q); } @@ -383,7 +386,7 @@ TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { oneapiext::memset(Q, Dst, 1, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueMemset, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); free(Dst, Q); } @@ -398,7 +401,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { oneapiext::submit( Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueuePrefetch, 1); + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); free(Dst, Q); } @@ -412,7 +415,7 @@ TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { oneapiext::prefetch(Q, Dst, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueuePrefetch, 1); + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); free(Dst, Q); } @@ -428,7 +431,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); }); - ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); free(Dst, Q); } @@ -442,7 +445,7 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); - ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, 1); + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); free(Dst, Q); } @@ -463,8 +466,8 @@ TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); }).wait(); - ASSERT_EQ(counter_piEnqueueKernelLaunch, 1); - ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, 1); + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); } From b6a0ab2cfea144102c8b2570c785f71c69436324 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 19 Jun 2024 22:06:43 -0700 Subject: [PATCH 5/8] Fix symbols Signed-off-by: Larsen, Steffen --- sycl/include/sycl/handler.hpp | 2 +- sycl/test/abi/sycl_symbols_windows.dump | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 9e328d1740ca5..9251c1b485123 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -527,7 +527,7 @@ class __SYCL_EXPORT handler { void setType(detail::CG::CGTYPE Type) { MCGType = Type; } - detail::CG::CGTYPE getType() const { return MCGType; } + detail::CG::CGTYPE getType() { return MCGType; } void throwIfActionIsCreated() { if (detail::CG::None != getType()) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e8610211e8572..250a51e06c208 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -597,7 +597,9 @@ ??0half@host_half_impl@detail@_V1@sycl@@QEAA@G@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N1@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N1@Z ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0host_selector@_V1@sycl@@QEAA@XZ @@ -4075,6 +4077,7 @@ ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVqueue@67@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXXZ +?eventNeeded@handler@_V1@sycl@@AEBA_NXZ ?ext_codeplay_supports_fusion@queue@_V1@sycl@@QEBA_NXZ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z @@ -4509,11 +4512,13 @@ ?markBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z ?markNoLongerBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ +?mem_advise@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAX_KHAEBUcode_location@detail@45@@Z ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHV423@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KW4_pi_mem_advice@@AEBUcode_location@detail@23@@Z +?memcpy@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXPEBX_KAEBUcode_location@detail@45@@Z ?memcpy@handler@_V1@sycl@@QEAAXPEAXPEBX_K@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBUcode_location@detail@23@@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4524,6 +4529,7 @@ ?memcpyToDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_N_K2@Z ?memcpyToDeviceGlobal@queue@_V1@sycl@@AEAA?AVevent@23@PEAXPEBX_N_K3AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?memcpyToHostOnlyDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_K_N11@Z +?memset@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXH_KAEBUcode_location@detail@45@@Z ?memset@handler@_V1@sycl@@QEAAXPEAXH_K@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4632,6 +4638,7 @@ ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemset2D@handler@_V1@sycl@@AEAA_NXZ From e45063b9fd9b2177923c4e3db008be25e36dbc2c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 24 Jun 2024 21:48:12 -0700 Subject: [PATCH 6/8] Fix check for discard queue in empty Signed-off-by: Larsen, Steffen --- sycl/source/detail/queue_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 9ba1c2c64e1d7..53151a5edbacb 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -654,7 +654,7 @@ bool queue_impl::ext_oneapi_empty() const { // Otherwise, check if the last event is finished. // Note that we fall back to the backend query if the event was discarded, // which may happend despite the queue not being a discard event queue. - if (MDefaultGraphDeps.LastEventPtr->isDiscarded()) + if (!MDefaultGraphDeps.LastEventPtr->isDiscarded()) return MDefaultGraphDeps.LastEventPtr ->get_info() == info::event_command_status::complete; From b070d526a3f584be9d4bb77de72ede09106979d9 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 28 Jun 2024 00:43:59 -0700 Subject: [PATCH 7/8] Remove unused APIs Signed-off-by: Larsen, Steffen --- sycl/include/sycl/handler.hpp | 20 -------------------- sycl/source/handler.cpp | 12 ------------ sycl/test/abi/sycl_symbols_linux.dump | 4 ---- 3 files changed, 36 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 9251c1b485123..ff714fc34468d 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -461,26 +461,6 @@ template bool range_size_fits_in_size_t(const range &r) { /// \ingroup sycl_api class __SYCL_EXPORT handler { private: - /// Constructs SYCL handler from queue. - /// - /// \param Queue is a SYCL queue. - /// \param IsHost indicates if this handler is created for SYCL host device. - /// TODO: Unused. Remove with ABI break. - handler(std::shared_ptr Queue, bool IsHost); - - /// Constructs SYCL handler from the associated queue and the submission's - /// primary and secondary queue. - /// - /// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or - /// SecondaryQueue. - /// \param PrimaryQueue is the primary SYCL queue of the submission. - /// \param SecondaryQueue is the secondary SYCL queue of the submission. This - /// is null if no secondary queue is associated with the submission. - /// \param IsHost indicates if this handler is created for SYCL host device. - /// TODO: Unused. Remove with ABI break. - handler(std::shared_ptr Queue, - std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, bool IsHost); /// Constructs SYCL handler from queue. /// diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1ce5f7b97940e..c07a6a0831416 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,18 +80,6 @@ void *getValueFromDynamicParameter( } // namespace detail -/// TODO: Unused. Remove with ABI break. -handler::handler(std::shared_ptr Queue, bool IsHost) - : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} - -/// TODO: Unused. Remove with ABI break. -handler::handler(std::shared_ptr Queue, - std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, - bool IsHost) - : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, - /*CallerNeedsEvent=*/true) {} - handler::handler(std::shared_ptr Queue, bool IsHost, bool CallerNeedsEvent) : handler(Queue, Queue, nullptr, IsHost, CallerNeedsEvent) {} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 5e80ccce7e57f..6e94720705694 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3640,14 +3640,10 @@ _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE -_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb -_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE -_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb -_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE From 47fc2749f74d5bb57da80bebd45876bde07ce2be Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 28 Jun 2024 01:48:26 -0700 Subject: [PATCH 8/8] Revert "Remove unused APIs" This reverts commit b070d526a3f584be9d4bb77de72ede09106979d9. --- sycl/include/sycl/handler.hpp | 20 ++++++++++++++++++++ sycl/source/handler.cpp | 12 ++++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 4 ++++ 3 files changed, 36 insertions(+) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ff714fc34468d..9251c1b485123 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -461,6 +461,26 @@ template bool range_size_fits_in_size_t(const range &r) { /// \ingroup sycl_api class __SYCL_EXPORT handler { private: + /// Constructs SYCL handler from queue. + /// + /// \param Queue is a SYCL queue. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. + handler(std::shared_ptr Queue, bool IsHost); + + /// Constructs SYCL handler from the associated queue and the submission's + /// primary and secondary queue. + /// + /// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or + /// SecondaryQueue. + /// \param PrimaryQueue is the primary SYCL queue of the submission. + /// \param SecondaryQueue is the secondary SYCL queue of the submission. This + /// is null if no secondary queue is associated with the submission. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. + handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, bool IsHost); /// Constructs SYCL handler from queue. /// diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c07a6a0831416..1ce5f7b97940e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,6 +80,18 @@ void *getValueFromDynamicParameter( } // namespace detail +/// TODO: Unused. Remove with ABI break. +handler::handler(std::shared_ptr Queue, bool IsHost) + : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} + +/// TODO: Unused. Remove with ABI break. +handler::handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, + bool IsHost) + : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, + /*CallerNeedsEvent=*/true) {} + handler::handler(std::shared_ptr Queue, bool IsHost, bool CallerNeedsEvent) : handler(Queue, Queue, nullptr, IsHost, CallerNeedsEvent) {} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6e94720705694..5e80ccce7e57f 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3640,10 +3640,14 @@ _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE