Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 136 additions & 62 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ runKernelWithArg(KernelType KernelName, ArgType Arg) {
KernelName(Arg);
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// The pure virtual class aimed to store lambda/functors of any type.
class HostKernelBase {
public:
Expand All @@ -160,74 +161,147 @@ class HostKernelBase {
// NOTE: InstatiateKernelOnHost() should not be called.
virtual void InstantiateKernelOnHost() = 0;
};
#endif

// Class which stores specific lambda object.
template <class KernelType, class KernelArgType, int Dims>
class HostKernel : public HostKernelBase {
using IDBuilder = sycl::detail::Builder;
KernelType MKernel;
// Allowing accessing MKernel from 'ResetHostKernelHelper' method of
// 'sycl::handler'
friend class sycl::handler;
class HostKernel
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
: public HostKernelBase
#endif
{
// SYCL kernels must be device-copyable, so simply storing bytes is enough for
// RT purposes. However, accessor/stream don't seem to be
// `std::trivially_copyable`, so we still do placement new/manual destructor
// invocation as some e2e tests would fail otherwise.
std::unique_ptr<char[]> KernelBytes;
void (*KernelDeleter)(void *) = nullptr;
// NOTE: This is *NOT* for debugger only. Host-side optimizations affect
// device code linking, see
// `test-e2e/SeparateCompile/sycl-external-within-staticlib.cpp`.
void (*InstantiateOnHostHelper)(void *) = nullptr;

public:
HostKernel(KernelType Kernel) : MKernel(Kernel) {}

char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }

~HostKernel() = default;

// This function is needed for host-side compilation to keep kernels
// instantitated. This is important for debuggers to be able to associate
// kernel code instructions with source code lines.
// NOTE: InstatiateKernelOnHost() should not be called.
void InstantiateKernelOnHost() override {
if constexpr (std::is_same_v<KernelArgType, void>) {
runKernelWithoutArg(MKernel);
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
sycl::id ID = InitializedVal<Dims, id>::template get<0>();
runKernelWithArg<const KernelArgType &>(MKernel, ID);
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> ||
std::is_same_v<KernelArgType, item<Dims, false>>) {
constexpr bool HasOffset =
std::is_same_v<KernelArgType, item<Dims, true>>;
if constexpr (!HasOffset) {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
template <class KernelType, class KernelArgType, int Dims>
struct InstantiateKernelOnHostHelper {
static void foo(void *ptr) {
auto &MKernel = *static_cast<KernelType *>(ptr);
using IDBuilder = sycl::detail::Builder;
if constexpr (std::is_same_v<KernelArgType, void>) {
runKernelWithoutArg(MKernel);
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
sycl::id ID = InitializedVal<Dims, id>::template get<0>();
runKernelWithArg<const KernelArgType &>(MKernel, ID);
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> ||
std::is_same_v<KernelArgType, item<Dims, false>>) {
constexpr bool HasOffset =
std::is_same_v<KernelArgType, item<Dims, true>>;
if constexpr (!HasOffset) {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
} else {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
}
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
sycl::range<Dims> Range =
InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
sycl::group<Dims> Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
sycl::item<Dims, true> GlobalItem =
IDBuilder::createItem<Dims, true>(Range, ID, ID);
sycl::item<Dims, false> LocalItem =
IDBuilder::createItem<Dims, false>(Range, ID);
KernelArgType NDItem =
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
runKernelWithArg<const KernelArgType>(MKernel, NDItem);
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
sycl::range<Dims> Range =
InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
KernelArgType Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
runKernelWithArg<KernelArgType>(MKernel, Group);
} else {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
// Assume that anything else can be default-constructed. If not, this
// should fail to compile and the implementor should implement a generic
// case for the new argument type.
runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
}
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
sycl::group<Dims> Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
sycl::item<Dims, true> GlobalItem =
IDBuilder::createItem<Dims, true>(Range, ID, ID);
sycl::item<Dims, false> LocalItem =
IDBuilder::createItem<Dims, false>(Range, ID);
KernelArgType NDItem =
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
runKernelWithArg<const KernelArgType>(MKernel, NDItem);
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
KernelArgType Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
runKernelWithArg<KernelArgType>(MKernel, Group);
} else {
// Assume that anything else can be default-constructed. If not, this
// should fail to compile and the implementor should implement a generic
// case for the new argument type.
runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
}
};

template <typename KernelType> struct Deleter {
static void execute(void *p) {
static_cast<KernelType *>(p)->~KernelType();
}
};

public:
HostKernel() = default;
HostKernel(HostKernel &&Other)
: KernelBytes(std::move(Other.KernelBytes)),
KernelDeleter(Other.KernelDeleter) {
Other.KernelDeleter = nullptr;
}
HostKernel &operator=(HostKernel &&Other) {
if (KernelDeleter)
KernelDeleter(KernelBytes.get());
KernelBytes = std::move(Other.KernelBytes);
KernelDeleter = Other.KernelDeleter;
Other.KernelDeleter = nullptr;
return *this;
}

// Can't specify explicit template parameters when invoking a ctor, so has to
// be a static member function.
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
template <class KernelType, class KernelArgType, int Dims>
static HostKernel create(KernelType Kernel) {
HostKernel Tmp;
Tmp.KernelBytes.reset(
new (std::align_val_t(alignof(KernelType))) char[sizeof(Kernel)]);
// Note, `device_copyable` isn't the same as `std::is_trivially_copyable`,
// so `memcpy` wouldn't be enough.
new (Tmp.KernelBytes.get()) KernelType(Kernel);
Tmp.KernelDeleter = &Deleter<KernelType>::execute;
Tmp.InstantiateOnHostHelper = &InstantiateKernelOnHostHelper<KernelType, KernelArgType, Dims>::foo;
return Tmp;
}
#else
template <class KernelType, class KernelArgType, int Dims>
static std::unique_ptr<HostKernelBase> create(KernelType Kernel) {
auto Unique = std::make_unique<HostKernel>();
Unique->KernelBytes.reset(
new (std::align_val_t(alignof(KernelType))) char[sizeof(Kernel)]);
// Note, `device_copyable` isn't the same as `std::is_trivially_copyable`,
// so `memcpy` wouldn't be enough.
new (Unique->KernelBytes.get()) KernelType(Kernel);
Unique->KernelDeleter = &Deleter<KernelType>::execute;
Unique->InstantiateOnHostHelper = &InstantiateKernelOnHostHelper<KernelType, KernelArgType, Dims>::foo;
return Unique;
}
#endif

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
char *getPtr() { return KernelBytes.get(); }
~HostKernel() {
if (KernelDeleter)
KernelDeleter(KernelBytes.get());
}
#else
// Non-preview needs `override`s.
char *getPtr() override { return KernelBytes.get(); }
~HostKernel() override {
if (KernelDeleter)
KernelDeleter(KernelBytes.get());
}

void InstantiateKernelOnHost() override {}
#endif
};

} // namespace detail
Expand Down
19 changes: 14 additions & 5 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,8 @@ class __SYCL_EXPORT handler {
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
LambdaArgType>::value;

MHostKernel = std::make_unique<
detail::HostKernel<KernelType, LambdaArgType, Dims>>(KernelFunc);
MHostKernel =
detail::HostKernel::create<KernelType, LambdaArgType, Dims>(KernelFunc);

constexpr bool KernelHasName =
detail::getKernelName<KernelName>() != nullptr &&
Expand Down Expand Up @@ -753,9 +753,14 @@ class __SYCL_EXPORT handler {
if (KernelHasName) {
// TODO support ESIMD in no-integration-header case too.
clearArgs();
extractArgsAndReqsFromLambda(MHostKernel->getPtr(),
detail::getKernelParamDescs<KernelName>(),
detail::isKernelESIMD<KernelName>());
extractArgsAndReqsFromLambda(
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
MHostKernel.getPtr(),
#else
MHostKernel->getPtr(),
#endif
detail::getKernelParamDescs<KernelName>(),
detail::isKernelESIMD<KernelName>());
MKernelName = detail::getKernelName<KernelName>();
} else {
// In case w/o the integration header it is necessary to process
Expand Down Expand Up @@ -3434,7 +3439,11 @@ class __SYCL_EXPORT handler {
/// Pattern that is used to fill memory object in case command type is fill.
std::vector<unsigned char> MPattern;
/// Storage for a lambda or function object.
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
detail::HostKernel MHostKernel;
#else
std::unique_ptr<detail::HostKernelBase> MHostKernel;
#endif

detail::code_location MCodeLoc = {};
bool MIsFinalized = false;
Expand Down
20 changes: 17 additions & 3 deletions sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#pragma once

#include <sycl/accessor.hpp> // for AccessorImplHost, AccessorImplPtr
#include <sycl/detail/cg_types.hpp> // for ArgDesc, HostTask, HostKernelBase
#include <sycl/detail/cg_types.hpp> // for ArgDesc, HostTask, HostKernel
#include <sycl/detail/common.hpp> // for code_location
#include <sycl/detail/helpers.hpp> // for context_impl
#include <sycl/detail/ur.hpp> // for ur_rect_region_t, ur_rect_offset_t
Expand Down Expand Up @@ -251,7 +251,11 @@ class CGExecKernel : public CG {
public:
/// Stores ND-range description.
NDRDescT MNDRDesc;
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
std::shared_ptr<HostKernel> MHostKernel;
#else
std::shared_ptr<HostKernelBase> MHostKernel;
#endif
std::shared_ptr<detail::kernel_impl> MSyclKernel;
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
std::vector<ArgDesc> MArgs;
Expand All @@ -266,7 +270,12 @@ class CGExecKernel : public CG {
bool MKernelUsesClusterLaunch = false;
size_t MKernelWorkGroupMemorySize = 0;

CGExecKernel(NDRDescT NDRDesc, std::shared_ptr<HostKernelBase> HKernel,
CGExecKernel(NDRDescT NDRDesc,
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
HostKernel &&HKernel,
#else
std::shared_ptr<HostKernelBase> HKernel,
#endif
std::shared_ptr<detail::kernel_impl> SyclKernel,
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
Expand All @@ -277,7 +286,12 @@ class CGExecKernel : public CG {
bool KernelIsCooperative, bool MKernelUsesClusterLaunch,
size_t KernelWorkGroupMemorySize, detail::code_location loc = {})
: CG(Type, std::move(CGData), std::move(loc)),
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
MNDRDesc(std::move(NDRDesc)),
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
MHostKernel(std::make_shared<detail::HostKernel>(std::move(HKernel))),
#else
MHostKernel(std::move(HKernel)),
#endif
MSyclKernel(std::move(SyclKernel)),
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
MKernelName(std::move(KernelName)), MStreams(std::move(Streams)),
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/jit_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1049,9 +1049,9 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,

std::unique_ptr<detail::CG> FusedCG;
FusedCG.reset(new detail::CGExecKernel(
NDRDesc, nullptr, nullptr, std::move(KernelBundleImplPtr),
std::move(CGData), std::move(FusedArgs), FusedOrCachedKernelName, {}, {},
CGType::Kernel, KernelCacheConfig, false /* KernelIsCooperative */,
NDRDesc, {}, nullptr, std::move(KernelBundleImplPtr), std::move(CGData),
std::move(FusedArgs), FusedOrCachedKernelName, {}, {}, CGType::Kernel,
KernelCacheConfig, false /* KernelIsCooperative */,
false /* KernelUsesClusterLaunch*/, 0 /* KernelWorkGroupMemorySize */));
return FusedCG;
}
Expand Down
8 changes: 3 additions & 5 deletions sycl/unittests/scheduler/SchedulerTestUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,7 @@ class MockHandler : public sycl::handler {
std::vector<std::shared_ptr<sycl::detail::stream_impl>> &getStreamStorage() {
return MStreamStorage;
}
std::unique_ptr<sycl::detail::HostKernelBase> &getHostKernel() {
return MHostKernel;
}
auto &getHostKernel() { return MHostKernel; }
std::vector<std::vector<char>> &getArgsStorage() {
return impl->CGData.MArgsStorage;
}
Expand Down Expand Up @@ -264,8 +262,8 @@ class MockHandler : public sycl::handler {
template <typename KernelType, typename ArgType, int Dims,
typename KernelName>
void setHostKernel(KernelType Kernel) {
static_cast<sycl::handler *>(this)->MHostKernel.reset(
new sycl::detail::HostKernel<KernelType, ArgType, Dims>(Kernel));
static_cast<sycl::handler *>(this)->MHostKernel =
sycl::detail::HostKernel::create<KernelType, ArgType, Dims>(Kernel);
}

template <int Dims> void setNDRangeDesc(sycl::nd_range<Dims> Range) {
Expand Down
Loading