diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d484e10b5e5f5..b2db2f5cfb53a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -74,7 +74,10 @@ event queue_impl::memset(const std::shared_ptr &Self, event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. - if (!MSupportOOO) + // FIXME these events are stored for level zero until as a workaround, remove + // once piEventRelease no longer calls wait on the event in the plugin. + if (!MSupportOOO || + getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); return MDiscardEvents ? createDiscardedEvent() : ResEvent; } @@ -96,7 +99,10 @@ event queue_impl::memcpy(const std::shared_ptr &Self, event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. - if (!MSupportOOO) + // FIXME these events are stored for level zero until as a workaround, remove + // once piEventRelease no longer calls wait on the event in the plugin. + if (!MSupportOOO || + getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); return MDiscardEvents ? createDiscardedEvent() : ResEvent; } @@ -119,7 +125,10 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. - if (!MSupportOOO) + // FIXME these events are stored for level zero until as a workaround, remove + // once piEventRelease no longer calls wait on the event in the plugin. + if (!MSupportOOO || + getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); return MDiscardEvents ? createDiscardedEvent() : ResEvent; } @@ -132,17 +141,13 @@ void queue_impl::addEvent(const event &Event) { // if there is no command on the event, we cannot track it with MEventsWeak // as that will leave it with no owner. Track in MEventsShared only if we're // unable to call piQueueFinish during wait. - if (is_host() || !MSupportOOO) + // FIXME these events are stored for level zero until as a workaround, + // remove once piEventRelease no longer calls wait on the event in the + // plugin. + if (is_host() || !MSupportOOO || + getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(Event); - } - // As long as the queue supports piQueueFinish we only need to store events - // with command nodes in the following cases: - // 1. Unenqueued commands, since they aren't covered by piQueueFinish. - // 2. Kernels with streams, since they are not supported by post enqueue - // cleanup. - // 3. Host tasks, for both reasons. - else if (is_host() || !MSupportOOO || EImpl->getHandleRef() == nullptr || - EImpl->needsCleanupAfterWait()) { + } else { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); @@ -153,7 +158,10 @@ void queue_impl::addEvent(const event &Event) { /// but some events have no other owner. In this case, /// addSharedEvent will have the queue track the events via a shared pointer. void queue_impl::addSharedEvent(const event &Event) { - assert(is_host() || !MSupportOOO); + // FIXME The assertion should be corrected once the Level Zero workaround is + // removed. + assert(is_host() || !MSupportOOO || + getPlugin().getBackend() == backend::ext_oneapi_level_zero); std::lock_guard Lock(MMutex); // Events stored in MEventsShared are not released anywhere else aside from // calls to queue::wait/wait_and_throw, which a user application might not @@ -286,31 +294,50 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // directly. Otherwise, only wait for unenqueued or host task events, starting // from the latest submitted task in order to minimize total amount of calls, // then handle the rest with piQueueFinish. - const bool SupportsPiFinish = !is_host() && MSupportOOO; - for (auto EventImplWeakPtrIt = WeakEvents.rbegin(); - EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) { - if (std::shared_ptr EventImplSharedPtr = - EventImplWeakPtrIt->lock()) { - // A nullptr PI event indicates that piQueueFinish will not cover it, - // either because it's a host task event or an unenqueued one. - if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) { - EventImplSharedPtr->wait(EventImplSharedPtr); - } - } - } - if (SupportsPiFinish) { - const detail::plugin &Plugin = getPlugin(); - Plugin.call(getHandleRef()); + // TODO the new workflow has worse performance with Level Zero, keep the old + // behavior until this is addressed + if (!is_host() && + getPlugin().getBackend() == backend::ext_oneapi_level_zero) { for (std::weak_ptr &EventImplWeakPtr : WeakEvents) if (std::shared_ptr EventImplSharedPtr = EventImplWeakPtr.lock()) - if (EventImplSharedPtr->needsCleanupAfterWait()) - EventImplSharedPtr->cleanupCommand(EventImplSharedPtr); - assert(SharedEvents.empty() && "Queues that support calling piQueueFinish " - "shouldn't have shared events"); - } else { + EventImplSharedPtr->wait(EventImplSharedPtr); for (event &Event : SharedEvents) Event.wait(); + } else { + bool SupportsPiFinish = !is_host() && MSupportOOO; + for (auto EventImplWeakPtrIt = WeakEvents.rbegin(); + EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) { + if (std::shared_ptr EventImplSharedPtr = + EventImplWeakPtrIt->lock()) { + // A nullptr PI event indicates that piQueueFinish will not cover it, + // either because it's a host task event or an unenqueued one. + if (!SupportsPiFinish || + nullptr == EventImplSharedPtr->getHandleRef()) { + EventImplSharedPtr->wait(EventImplSharedPtr); + } + } + } + if (SupportsPiFinish) { + const detail::plugin &Plugin = getPlugin(); + Plugin.call(getHandleRef()); + for (std::weak_ptr &EventImplWeakPtr : WeakEvents) + if (std::shared_ptr EventImplSharedPtr = + EventImplWeakPtr.lock()) + EventImplSharedPtr->cleanupCommand(EventImplSharedPtr); + // FIXME these events are stored for level zero until as a workaround, + // remove once piEventRelease no longer calls wait on the event in the + // plugin. + if (Plugin.getBackend() == backend::ext_oneapi_level_zero) { + SharedEvents.clear(); + } + assert(SharedEvents.empty() && + "Queues that support calling piQueueFinish " + "shouldn't have shared events"); + } else { + for (event &Event : SharedEvents) + Event.wait(); + } } #ifdef XPTI_ENABLE_INSTRUMENTATION instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);