From b6f2f83611d9ca1c555da5f4df2026926bcefe05 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 28 May 2025 12:46:11 -0700 Subject: [PATCH] [NFC][SYCL] More `queue_impl` passing by raw ptr/ref Same as https://github.com/intel/llvm/pull/18712, part of a bigger refactoring around internal RT APIs passing raw references instead of `std::shared_ptr<*_impl>`, similar to what have been implemented for `device_impl` earlier. --- sycl/source/detail/helpers.cpp | 8 ++++---- sycl/source/detail/helpers.hpp | 2 +- sycl/source/detail/jit_compiler.cpp | 10 +++++----- sycl/source/detail/jit_compiler.hpp | 3 +-- sycl/source/detail/scheduler/commands.cpp | 19 +++++++++---------- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/detail/scheduler/scheduler.hpp | 2 +- sycl/source/handler.cpp | 2 +- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 9ba94e3805ca7..095eb6db55238 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -38,9 +38,9 @@ markBufferAsInternal(const std::shared_ptr &BufImpl) { } std::tuple -retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, +retrieveKernelBinary(queue_impl &Queue, const char *KernelName, CGExecKernel *KernelCG) { - device_impl &Dev = Queue->getDeviceImpl(); + device_impl &Dev = Queue.getDeviceImpl(); bool isNvidia = Dev.getBackend() == backend::ext_oneapi_cuda; bool isHIP = Dev.getBackend() == backend::ext_oneapi_hip; if (isNvidia || isHIP) { @@ -59,7 +59,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, if (DeviceImage == DeviceImages.end()) { return {nullptr, nullptr}; } - auto ContextImpl = Queue->getContextImplPtr(); + auto ContextImpl = Queue.getContextImplPtr(); ur_program_handle_t Program = detail::ProgramManager::getInstance().createURProgram( **DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); @@ -80,7 +80,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, DeviceImage = SyclKernelImpl->getDeviceImage()->get_bin_image_ref(); Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else { - auto ContextImpl = Queue->getContextImplPtr(); + auto ContextImpl = Queue.getContextImplPtr(); DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( KernelName, ContextImpl, &Dev); Program = detail::ProgramManager::getInstance().createURProgram( diff --git a/sycl/source/detail/helpers.hpp b/sycl/source/detail/helpers.hpp index 57287959b7575..dd93ac60b39e5 100644 --- a/sycl/source/detail/helpers.hpp +++ b/sycl/source/detail/helpers.hpp @@ -27,7 +27,7 @@ void waitEvents(std::vector DepEvents); #endif std::tuple -retrieveKernelBinary(const QueueImplPtr &, const char *KernelName, +retrieveKernelBinary(queue_impl &Queue, const char *KernelName, CGExecKernel *CGKernel = nullptr); } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index ffce162ecbab4..189bd3d145309 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -124,8 +124,8 @@ translateBinaryImageFormat(ur::DeviceBinaryType Type) { } } -static ::jit_compiler::BinaryFormat getTargetFormat(const QueueImplPtr &Queue) { - auto Backend = Queue->getDeviceImpl().getBackend(); +static ::jit_compiler::BinaryFormat getTargetFormat(queue_impl &Queue) { + auto Backend = Queue.getDeviceImpl().getBackend(); switch (Backend) { case backend::ext_oneapi_level_zero: case backend::opencl: @@ -143,7 +143,7 @@ static ::jit_compiler::BinaryFormat getTargetFormat(const QueueImplPtr &Queue) { #endif // _WIN32 ur_kernel_handle_t jit_compiler::materializeSpecConstants( - const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage, + queue_impl &Queue, const RTDeviceBinaryImage *BinImage, KernelNameStrRefT KernelName, const std::vector &SpecConstBlob) { #ifndef _WIN32 @@ -220,8 +220,8 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( } RTDeviceBinaryImage MaterializedRTDevBinImage{&MaterializedRawDeviceImage}; - const auto &Context = Queue->get_context(); - const auto &Device = Queue->get_device(); + const auto &Context = Queue.get_context(); + const auto &Device = Queue.get_device(); auto NewKernel = PM.getOrCreateMaterializedKernel( MaterializedRTDevBinImage, Context, Device, KernelName, SpecConstBlob); diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 22778fde819a7..0f0a3f3f5738c 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -32,13 +32,12 @@ using JITEnvVar = DynArray; namespace sycl { inline namespace _V1 { namespace detail { -using QueueImplPtr = std::shared_ptr; class jit_compiler { public: ur_kernel_handle_t - materializeSpecConstants(const QueueImplPtr &Queue, + materializeSpecConstants(queue_impl &Queue, const RTDeviceBinaryImage *BinImage, KernelNameStrRefT KernelName, const std::vector &SpecConstBlob); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 739242128f0a2..5baa1078d9ebc 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2383,7 +2383,7 @@ void SetArgBasedOnType( } static ur_result_t SetKernelParamsAndLaunch( - const QueueImplPtr &Queue, std::vector &Args, + queue_impl &Queue, std::vector &Args, const std::shared_ptr &DeviceImageImpl, ur_kernel_handle_t Kernel, NDRDescT &NDRDesc, std::vector &RawEvents, detail::event_impl *OutEventImpl, @@ -2395,8 +2395,7 @@ static ur_result_t SetKernelParamsAndLaunch( int KernelNumArgs = 0, detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, bool KernelHasSpecialCaptures = true) { - assert(Queue && "Kernel submissions should have an associated queue"); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); if (SYCLConfig::get()) { std::vector Empty; @@ -2434,7 +2433,7 @@ static ur_result_t SetKernelParamsAndLaunch( auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc, - Queue->getContextImplPtr(), Arg, NextTrueIndex); + Queue.getContextImplPtr(), Arg, NextTrueIndex); }; applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } @@ -2450,7 +2449,7 @@ static ur_result_t SetKernelParamsAndLaunch( Kernel, ImplicitLocalArg.value(), WorkGroupMemorySize, nullptr); } - adjustNDRangePerKernel(NDRDesc, Kernel, Queue->getDeviceImpl()); + adjustNDRangePerKernel(NDRDesc, Kernel, Queue.getDeviceImpl()); // Remember this information before the range dimensions are reversed const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); @@ -2464,7 +2463,7 @@ static ur_result_t SetKernelParamsAndLaunch( LocalSize = &NDRDesc.LocalSize[0]; else { Adapter->call( - Kernel, Queue->getDeviceImpl().getHandleRef(), + Kernel, Queue.getDeviceImpl().getHandleRef(), UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), RequiredWGSize, /* pPropSizeRet = */ nullptr); @@ -2504,7 +2503,7 @@ static ur_result_t SetKernelParamsAndLaunch( ur_event_handle_t UREvent = nullptr; ur_result_t Error = Adapter->call_nocheck( - Queue->getHandleRef(), Kernel, NDRDesc.Dims, + Queue.getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, property_list.size(), property_list.data(), RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], @@ -2523,7 +2522,7 @@ static ur_result_t SetKernelParamsAndLaunch( Args...); } return Adapter->call_nocheck(Args...); - }(Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], + }(Queue.getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], OutEventImpl ? &UREvent : nullptr); @@ -2773,7 +2772,7 @@ void enqueueImpKernel( } Error = SetKernelParamsAndLaunch( - Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, + *Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, BinImage, KernelName, KernelFuncPtr, KernelNumArgs, @@ -3279,7 +3278,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { const RTDeviceBinaryImage *BinImage = nullptr; if (detail::SYCLConfig::get()) { std::tie(BinImage, std::ignore) = - retrieveKernelBinary(MQueue, KernelName.data()); + retrieveKernelBinary(*MQueue, KernelName.data()); assert(BinImage && "Failed to obtain a binary image."); } enqueueImpKernel( diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 9a05b9740b2d9..025faba6064ee 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -615,7 +615,7 @@ void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) { } ur_kernel_handle_t Scheduler::completeSpecConstMaterialization( - [[maybe_unused]] const QueueImplPtr &Queue, + [[maybe_unused]] queue_impl &Queue, [[maybe_unused]] const RTDeviceBinaryImage *BinImage, [[maybe_unused]] KernelNameStrRefT KernelName, [[maybe_unused]] std::vector &SpecConstBlob) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a8f7235ac0d05..69448ca817937 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -453,7 +453,7 @@ class Scheduler { void deferMemObjRelease(const std::shared_ptr &MemObj); ur_kernel_handle_t completeSpecConstMaterialization( - const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage, + queue_impl &Queue, const RTDeviceBinaryImage *BinImage, KernelNameStrRefT KernelName, std::vector &SpecConstBlob); void releaseResources(BlockingT Blocking = BlockingT::BLOCKING); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5361f19d83a52..4fb580e1ca12b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -582,7 +582,7 @@ event handler::finalize() { const detail::RTDeviceBinaryImage *BinImage = nullptr; if (detail::SYCLConfig::get()) { std::tie(BinImage, std::ignore) = - detail::retrieveKernelBinary(MQueue, MKernelName.data()); + detail::retrieveKernelBinary(*MQueue, MKernelName.data()); assert(BinImage && "Failed to obtain a binary image."); } enqueueImpKernel(