From 89f1a3fcfcf7918f76730fec75c7137befb40980 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 19 Nov 2024 00:14:46 -0800 Subject: [PATCH] [SYCL] Optimize OOO profiling tag when profiling is enabled The case where profiling tags are enqueued onto a queue that already has profiling enabled, the timestamp enqueue is redundant, as the event resulting from the barrier would be sufficient. This commit short-circuits this case to simply return the event of a barrier submission. Signed-off-by: Larsen, Steffen --- .../sycl/ext/oneapi/experimental/profiling_tag.hpp | 8 ++++++++ sycl/unittests/Extensions/ProfilingTag.cpp | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp b/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp index dec91f77a7c7c..e21ce21b2c70e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp @@ -22,6 +22,14 @@ inline event submit_profiling_tag(queue &Queue, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { if (Queue.get_device().has(aspect::ext_oneapi_queue_profiling_tag)) { + // If the queue is out-of-order and profiling is enabled, the implementation + // can save some operations by just using the required barrier event + // directly. + if (!Queue.is_in_order() && + Queue.has_property()) + return Queue.ext_oneapi_submit_barrier(); + + // Otherwise, we use the internal implementation of the profiling tag. return Queue.submit( [=](handler &CGH) { sycl::detail::HandlerAccess::internalProfilingTagImpl(CGH); diff --git a/sycl/unittests/Extensions/ProfilingTag.cpp b/sycl/unittests/Extensions/ProfilingTag.cpp index 7b18b9ba00e4e..bfdf36652f6d4 100644 --- a/sycl/unittests/Extensions/ProfilingTag.cpp +++ b/sycl/unittests/Extensions/ProfilingTag.cpp @@ -92,6 +92,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, @@ -101,8 +103,11 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); + // As an optimization, the implementation will use a single barrier when + // submitting a profiling tag on an out-of-order queue with profiling enabled. sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); - ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{0}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrier); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value());