diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 526f160c6596b..379a8d1d211f2 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -19,14 +19,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) Name(Info.Name.data()) #endif { - init(Name.data()); -} - -void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { - auto &PM = detail::ProgramManager::getInstance(); - MUsesAssert = PM.kernelUsesAssert(KernelName); - MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Non-legacy implementation either fills out the data during image + // registration after this constructor is called, or uses default values + // if this instance of DeviceKernelInfo corresponds to an interop kernel. MInitialized.store(true); #endif } @@ -36,9 +32,19 @@ void DeviceKernelInfo::initIfEmpty(const CompileTimeKernelInfoTy &Info) { if (MInitialized.load()) return; - CompileTimeKernelInfoTy::operator=(Info); - Name = Info.Name.data(); - init(Name.data()); + // If this function is called, then this is a default initialized + // device kernel info created from older headers and stored in global handler. + // In that case, fetch the proper instance from program manager and copy its + // values. + auto &PM = detail::ProgramManager::getInstance(); + DeviceKernelInfo &PMDeviceKernelInfo = + PM.getDeviceKernelInfo(KernelNameStrRefT(Info.Name)); + + PMDeviceKernelInfo.CompileTimeKernelInfoTy::operator=(Info); + PMDeviceKernelInfo.Name = Info.Name.data(); + + MUsesAssert = PMDeviceKernelInfo.MUsesAssert; + MImplicitLocalArgPos = PMDeviceKernelInfo.MImplicitLocalArgPos; } #endif @@ -78,18 +84,25 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; } -bool DeviceKernelInfo::usesAssert() { +bool DeviceKernelInfo::usesAssert() const { assertInitialized(); return MUsesAssert; } -const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { +const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() const { assertInitialized(); return MImplicitLocalArgPos; } +void DeviceKernelInfo::setUsesAssert() { MUsesAssert = true; } + +void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) { + assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos); + MImplicitLocalArgPos = Pos; +} + bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; } -void DeviceKernelInfo::assertInitialized() { +void DeviceKernelInfo::assertInitialized() const { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES assert(MInitialized.load() && "Data needs to be initialized before use"); #endif diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 0ea4ff2d051e6..a5d6c2527a6bb 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -108,11 +108,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); FastKernelSubcacheT &getKernelSubcache(); - bool usesAssert(); - const std::optional &getImplicitLocalArgPos(); + bool usesAssert() const; + const std::optional &getImplicitLocalArgPos() const; + + void setUsesAssert(); + void setImplicitLocalArgPos(int Pos); private: - void assertInitialized(); + void assertInitialized() const; bool isCompileTimeInfoSet() const; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/get_device_kernel_info.cpp b/sycl/source/detail/get_device_kernel_info.cpp index 084eeeb60d714..536ce75b82143 100644 --- a/sycl/source/detail/get_device_kernel_info.cpp +++ b/sycl/source/detail/get_device_kernel_info.cpp @@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { #endif DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info); + return ProgramManager::getInstance().getDeviceKernelInfo(Info); } } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index dfed6881a3ef8..a72013e174023 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed &&Kernel, MCreatedFromSource(true), MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this() : nullptr), - MIsInterop(true), MKernelArgMaskPtr{ArgMask}, - MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { + MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true), + MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter().call( @@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl.shared_from_this()), MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, - MInteropDeviceKernelInfo(MIsInterop - ? createCompileTimeKernelInfo(getName()) - : createCompileTimeKernelInfo()) { + MOwnsDeviceKernelInfo(MDeviceImageImpl->getOriginMask() & + ~ImageOriginSYCLOffline), + MDeviceKernelInfo(MOwnsDeviceKernelInfo + ? createCompileTimeKernelInfo(getName()) + : createCompileTimeKernelInfo()) { // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel // path. diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 7d83d4ecf68cc..8219b5b86ba3e 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -240,9 +240,9 @@ class kernel_impl { std::string_view getName() const; DeviceKernelInfo &getDeviceKernelInfo() { - return MIsInterop - ? MInteropDeviceKernelInfo - : ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + return MOwnsDeviceKernelInfo + ? MDeviceKernelInfo + : ProgramManager::getInstance().getDeviceKernelInfo( KernelNameStrT(getName())); } @@ -259,9 +259,11 @@ class kernel_impl { std::mutex *MCacheMutex = nullptr; mutable std::string MName; - // It is used for the interop kernels only. + // Used for images that aren't obtained with standard SYCL offline + // compilation. // For regular kernel we get DeviceKernelInfo from the ProgramManager. - DeviceKernelInfo MInteropDeviceKernelInfo; + bool MOwnsDeviceKernelInfo = false; + DeviceKernelInfo MDeviceKernelInfo; bool isBuiltInKernel(device_impl &Device) const; void checkIfValidForNumArgsInfoQuery() const; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 07ed72c0df423..e73c4b3c70f3e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1797,8 +1797,11 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &AssertUsedRange = Img.getAssertUsed(); if (AssertUsedRange.isAvailable()) - for (const auto &Prop : AssertUsedRange) - m_KernelUsesAssert.insert(Prop->Name); + for (const auto &Prop : AssertUsedRange) { + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setUsesAssert(); + } } void ProgramManager::cacheKernelImplicitLocalArg( @@ -1807,36 +1810,27 @@ void ProgramManager::cacheKernelImplicitLocalArg( Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) for (auto Prop : ImplicitLocalArgRange) { - m_KernelImplicitLocalArgPos[Prop->Name] = - DeviceBinaryProperty(Prop).asUint32(); + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32()); } } -std::optional -ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; -} - -DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( - const CompileTimeKernelInfoTy &Info) { +DeviceKernelInfo & +ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - auto [Iter, Inserted] = - m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info); - if (!Inserted) - Iter->second.setCompileTimeInfoIfNeeded(Info); - return Iter->second; + auto It = m_DeviceKernelInfoMap.find(KernelNameStrT{Info.Name.data()}); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setCompileTimeInfoIfNeeded(Info); + return It->second; } DeviceKernelInfo & -ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { +ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(KernelName)}; - auto Result = - m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo); - return Result.first->second; + auto It = m_DeviceKernelInfoMap.find(KernelName); + assert(It != m_DeviceKernelInfoMap.end()); + return It->second; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, @@ -2039,6 +2033,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); KernelIDs->push_back(It->second); + CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; + m_DeviceKernelInfoMap.try_emplace(KernelNameStrT(name), + DefaultCompileTimeInfo); + // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; } @@ -2232,8 +2230,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { if (--RefCount == 0) { // TODO aggregate all these maps into a single one since their entries // share lifetime. - m_KernelUsesAssert.erase(Name); - m_KernelImplicitLocalArgPos.erase(Name); m_DeviceKernelInfoMap.erase(Name); m_KernelNameRefCount.erase(RefCountIt); if (Name2IDIt != m_KernelName2KernelIDs.end()) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b9d0dc700f77c..c7fcd73b8ae43 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -365,19 +365,13 @@ class ProgramManager { ProgramManager(); ~ProgramManager() = default; - template - bool kernelUsesAssert(const NameT &KernelName) const { - return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - } - SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional - kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - DeviceKernelInfo & - getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); - DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); + DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -406,12 +400,6 @@ class ProgramManager { /// Dumps image to current directory void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; - /// Add info on kernels using assert into cache - void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); - - /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); @@ -518,14 +506,6 @@ class ProgramManager { bool m_UseSpvFile = false; RTDeviceBinaryImageUPtr m_SpvFileImage; - // std::less<> is a transparent comparator that enabled comparison between - // different types without temporary key_type object creation. This includes - // standard overloads, such as comparison between std::string and - // std::string_view or just char*. - using KernelUsesAssertSet = std::set>; - KernelUsesAssertSet m_KernelUsesAssert; - std::unordered_map m_KernelImplicitLocalArgPos; - // Map for storing device kernel information. Runtime lookup should be avoided // by caching the pointers when possible. std::unordered_map m_DeviceKernelInfoMap; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 26477c99be62c..f776983b69080 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -567,7 +567,7 @@ event handler::finalize() { // Fetch the device kernel info pointer if it hasn't been set (e.g. // in kernel bundle or free function cases). impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + &detail::ProgramManager::getInstance().getDeviceKernelInfo( toKernelNameStrT(MKernelName))); } assert(impl->MKernelData.getKernelName() == MKernelName); @@ -974,7 +974,7 @@ void handler::extractArgsAndReqs() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) { impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + &detail::ProgramManager::getInstance().getDeviceKernelInfo( detail::toKernelNameStrT(MKernel->getName()))); } #endif @@ -2249,8 +2249,7 @@ void handler::setKernelNameBasedCachePtr( HandlerInfo.IsESIMD = impl->MKernelIsESIMD; HandlerInfo.HasSpecialCaptures = impl->MKernelHasSpecialCaptures; impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( - HandlerInfo)); + &detail::ProgramManager::getInstance().getDeviceKernelInfo(HandlerInfo)); } void handler::setKernelInfo( diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 1bcbfa7676255..40025f19abddf 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -73,13 +73,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_EliminatedKernelArgMasks; } - KernelUsesAssertSet &getKernelUsesAssert() { return m_KernelUsesAssert; } - - std::unordered_map & - getKernelImplicitLocalArgPos() { - return m_KernelImplicitLocalArgPos; - } - std::unordered_map> & getHostPipes() { @@ -311,11 +304,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, "Kernel name reference count " + CommentPostfix); EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedImgCount) << "Eliminated kernel arg mask " + CommentPostfix; - checkContainer(PM.getKernelUsesAssert(), ExpectedEntryCount, - generateRefNames(ImgIds, "Kernel"), - "KernelUsesAssert " + CommentPostfix); - EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedEntryCount) - << "Kernel implicit local arg pos " + CommentPostfix; if (!MultipleImgsPerEntryTestCase) { // FIXME expected to fail for now, device globals cleanup seems to be @@ -365,10 +353,6 @@ TEST(ImageRemoval, BaseContainers) { generateRefName("B", "HostPipe").c_str()); PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), generateRefName("C", "HostPipe").c_str()); - std::vector KernelNames = - generateRefNames({"A", "B", "C"}, "Kernel"); - for (const std::string &Name : KernelNames) - PM.getOrCreateDeviceKernelInfo(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -392,8 +376,6 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval, TestBinaries); - std::string KernelName = generateRefName("A", "Kernel"); - PM.getOrCreateDeviceKernelInfo(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal",