Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
Name(Info.Name.data())
#endif
{
init(Name.data());
}

void DeviceKernelInfo::init(KernelNameStrRefT KernelName) {
auto &PM = detail::ProgramManager::getInstance();
MUsesAssert = PM.kernelUsesAssert(KernelName);
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Non-legacy implementation either fills out the data during image
// registration after this constructor is called, or uses default values
// if this instance of DeviceKernelInfo corresponds to an interop kernel.
MInitialized.store(true);
#endif
}
Expand All @@ -36,9 +32,19 @@ void DeviceKernelInfo::initIfEmpty(const CompileTimeKernelInfoTy &Info) {
if (MInitialized.load())
return;

CompileTimeKernelInfoTy::operator=(Info);
Name = Info.Name.data();
init(Name.data());
// If this function is called, then this is a default initialized
// device kernel info created from older headers and stored in global handler.
// In that case, fetch the proper instance from program manager and copy its
// values.
auto &PM = detail::ProgramManager::getInstance();
DeviceKernelInfo &PMDeviceKernelInfo =
PM.getDeviceKernelInfo(KernelNameStrRefT(Info.Name));

PMDeviceKernelInfo.CompileTimeKernelInfoTy::operator=(Info);
PMDeviceKernelInfo.Name = Info.Name.data();

MUsesAssert = PMDeviceKernelInfo.MUsesAssert;
MImplicitLocalArgPos = PMDeviceKernelInfo.MImplicitLocalArgPos;
}
#endif

Expand Down Expand Up @@ -78,18 +84,25 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}
bool DeviceKernelInfo::usesAssert() {
bool DeviceKernelInfo::usesAssert() const {
assertInitialized();
return MUsesAssert;
}
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() const {
assertInitialized();
return MImplicitLocalArgPos;
}

void DeviceKernelInfo::setUsesAssert() { MUsesAssert = true; }

void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) {
assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos);
MImplicitLocalArgPos = Pos;
}

bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }

void DeviceKernelInfo::assertInitialized() {
void DeviceKernelInfo::assertInitialized() const {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
Expand Down
9 changes: 6 additions & 3 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache();
bool usesAssert();
const std::optional<int> &getImplicitLocalArgPos();
bool usesAssert() const;
const std::optional<int> &getImplicitLocalArgPos() const;

void setUsesAssert();
void setImplicitLocalArgPos(int Pos);

private:
void assertInitialized();
void assertInitialized() const;
bool isCompileTimeInfoSet() const;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/get_device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() {
#endif

DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info);
return ProgramManager::getInstance().getDeviceKernelInfo(Info);
}

} // namespace detail
Expand Down
12 changes: 7 additions & 5 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MCreatedFromSource(true),
MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this()
: nullptr),
MIsInterop(true), MKernelArgMaskPtr{ArgMask},
MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true),
MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
ur_context_handle_t UrContext = nullptr;
// Using the adapter from the passed ContextImpl
getAdapter().call<UrApiKind::urKernelGetInfo>(
Expand All @@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MKernelBundleImpl(KernelBundleImpl.shared_from_this()),
MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop),
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex},
MInteropDeviceKernelInfo(MIsInterop
? createCompileTimeKernelInfo(getName())
: createCompileTimeKernelInfo()) {
MOwnsDeviceKernelInfo(MDeviceImageImpl->getOriginMask() &
~ImageOriginSYCLOffline),
MDeviceKernelInfo(MOwnsDeviceKernelInfo
? createCompileTimeKernelInfo(getName())
: createCompileTimeKernelInfo()) {
// Enable USM indirect access for interop and non-sycl-jit source kernels.
// sycl-jit kernels will enable this if needed through the regular kernel
// path.
Expand Down
12 changes: 7 additions & 5 deletions sycl/source/detail/kernel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ class kernel_impl {
std::string_view getName() const;

DeviceKernelInfo &getDeviceKernelInfo() {
return MIsInterop
? MInteropDeviceKernelInfo
: ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
return MOwnsDeviceKernelInfo
? MDeviceKernelInfo
: ProgramManager::getInstance().getDeviceKernelInfo(
KernelNameStrT(getName()));
}

Expand All @@ -259,9 +259,11 @@ class kernel_impl {
std::mutex *MCacheMutex = nullptr;
mutable std::string MName;

// It is used for the interop kernels only.
// Used for images that aren't obtained with standard SYCL offline
// compilation.
// For regular kernel we get DeviceKernelInfo from the ProgramManager.
DeviceKernelInfo MInteropDeviceKernelInfo;
bool MOwnsDeviceKernelInfo = false;
DeviceKernelInfo MDeviceKernelInfo;

bool isBuiltInKernel(device_impl &Device) const;
void checkIfValidForNumArgsInfoQuery() const;
Expand Down
48 changes: 22 additions & 26 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1797,8 +1797,11 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) {
const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
Img.getAssertUsed();
if (AssertUsedRange.isAvailable())
for (const auto &Prop : AssertUsedRange)
m_KernelUsesAssert.insert(Prop->Name);
for (const auto &Prop : AssertUsedRange) {
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
assert(It != m_DeviceKernelInfoMap.end());
It->second.setUsesAssert();
}
}

void ProgramManager::cacheKernelImplicitLocalArg(
Expand All @@ -1807,36 +1810,27 @@ void ProgramManager::cacheKernelImplicitLocalArg(
Img.getImplicitLocalArg();
if (ImplicitLocalArgRange.isAvailable())
for (auto Prop : ImplicitLocalArgRange) {
m_KernelImplicitLocalArgPos[Prop->Name] =
DeviceBinaryProperty(Prop).asUint32();
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
assert(It != m_DeviceKernelInfoMap.end());
It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32());
}
}

std::optional<int>
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
if (it != m_KernelImplicitLocalArgPos.end())
return it->second;
return {};
}

DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
const CompileTimeKernelInfoTy &Info) {
DeviceKernelInfo &
ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
auto [Iter, Inserted] =
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info);
if (!Inserted)
Iter->second.setCompileTimeInfoIfNeeded(Info);
return Iter->second;
auto It = m_DeviceKernelInfoMap.find(KernelNameStrT{Info.Name.data()});
assert(It != m_DeviceKernelInfoMap.end());
It->second.setCompileTimeInfoIfNeeded(Info);
return It->second;
}

DeviceKernelInfo &
ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) {
ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(KernelName)};
auto Result =
m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo);
return Result.first->second;
auto It = m_DeviceKernelInfoMap.find(KernelName);
assert(It != m_DeviceKernelInfoMap.end());
return It->second;
}

static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg,
Expand Down Expand Up @@ -2039,6 +2033,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
KernelIDs->push_back(It->second);

CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)};
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT(name),
DefaultCompileTimeInfo);

// Keep track of image to kernel name reference count for cleanup.
m_KernelNameRefCount[name]++;
}
Expand Down Expand Up @@ -2232,8 +2230,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
if (--RefCount == 0) {
// TODO aggregate all these maps into a single one since their entries
// share lifetime.
m_KernelUsesAssert.erase(Name);
m_KernelImplicitLocalArgPos.erase(Name);
m_DeviceKernelInfoMap.erase(Name);
m_KernelNameRefCount.erase(RefCountIt);
if (Name2IDIt != m_KernelName2KernelIDs.end())
Expand Down
28 changes: 4 additions & 24 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,19 +365,13 @@ class ProgramManager {
ProgramManager();
~ProgramManager() = default;

template <typename NameT>
bool kernelUsesAssert(const NameT &KernelName) const {
return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
}

SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }

std::optional<int>
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const;
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);

DeviceKernelInfo &
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName);
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName);

std::set<const RTDeviceBinaryImage *>
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
Expand Down Expand Up @@ -406,12 +400,6 @@ class ProgramManager {
/// Dumps image to current directory
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;

/// Add info on kernels using assert into cache
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);

/// Add info on kernels using local arg into cache
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);

std::set<const RTDeviceBinaryImage *>
collectDependentDeviceImagesForVirtualFunctions(
const RTDeviceBinaryImage &Img, const device_impl &Dev);
Expand Down Expand Up @@ -518,14 +506,6 @@ class ProgramManager {
bool m_UseSpvFile = false;
RTDeviceBinaryImageUPtr m_SpvFileImage;

// std::less<> is a transparent comparator that enabled comparison between
// different types without temporary key_type object creation. This includes
// standard overloads, such as comparison between std::string and
// std::string_view or just char*.
using KernelUsesAssertSet = std::set<KernelNameStrT, std::less<>>;
KernelUsesAssertSet m_KernelUsesAssert;
std::unordered_map<KernelNameStrT, int> m_KernelImplicitLocalArgPos;

// Map for storing device kernel information. Runtime lookup should be avoided
// by caching the pointers when possible.
std::unordered_map<KernelNameStrT, DeviceKernelInfo> m_DeviceKernelInfoMap;
Expand Down
7 changes: 3 additions & 4 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ event handler::finalize() {
// Fetch the device kernel info pointer if it hasn't been set (e.g.
// in kernel bundle or free function cases).
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
toKernelNameStrT(MKernelName)));
}
assert(impl->MKernelData.getKernelName() == MKernelName);
Expand Down Expand Up @@ -974,7 +974,7 @@ void handler::extractArgsAndReqs() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) {
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
detail::toKernelNameStrT(MKernel->getName())));
}
#endif
Expand Down Expand Up @@ -2249,8 +2249,7 @@ void handler::setKernelNameBasedCachePtr(
HandlerInfo.IsESIMD = impl->MKernelIsESIMD;
HandlerInfo.HasSpecialCaptures = impl->MKernelHasSpecialCaptures;
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
HandlerInfo));
&detail::ProgramManager::getInstance().getDeviceKernelInfo(HandlerInfo));
}

void handler::setKernelInfo(
Expand Down
18 changes: 0 additions & 18 deletions sycl/unittests/program_manager/Cleanup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager {
return m_EliminatedKernelArgMasks;
}

KernelUsesAssertSet &getKernelUsesAssert() { return m_KernelUsesAssert; }

std::unordered_map<sycl::detail::KernelNameStrT, int> &
getKernelImplicitLocalArgPos() {
return m_KernelImplicitLocalArgPos;
}

std::unordered_map<std::string,
std::unique_ptr<sycl::detail::HostPipeMapEntry>> &
getHostPipes() {
Expand Down Expand Up @@ -311,11 +304,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM,
"Kernel name reference count " + CommentPostfix);
EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedImgCount)
<< "Eliminated kernel arg mask " + CommentPostfix;
checkContainer(PM.getKernelUsesAssert(), ExpectedEntryCount,
generateRefNames(ImgIds, "Kernel"),
"KernelUsesAssert " + CommentPostfix);
EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedEntryCount)
<< "Kernel implicit local arg pos " + CommentPostfix;

if (!MultipleImgsPerEntryTestCase) {
// FIXME expected to fail for now, device globals cleanup seems to be
Expand Down Expand Up @@ -365,10 +353,6 @@ TEST(ImageRemoval, BaseContainers) {
generateRefName("B", "HostPipe").c_str());
PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(),
generateRefName("C", "HostPipe").c_str());
std::vector<std::string> KernelNames =
generateRefNames({"A", "B", "C"}, "Kernel");
for (const std::string &Name : KernelNames)
PM.getOrCreateDeviceKernelInfo(Name);

checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(),
{"A", "B", "C"}, "check failed before removal");
Expand All @@ -392,8 +376,6 @@ TEST(ImageRemoval, MultipleImagesPerEntry) {
convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval,
TestBinaries);

std::string KernelName = generateRefName("A", "Kernel");
PM.getOrCreateDeviceKernelInfo(KernelName);
checkAllInvolvedContainers(
PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(),
/*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal",
Expand Down
Loading