diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index e622db50dd364..1f6ecb54a0cce 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -130,6 +130,7 @@ class ModuleDesc { EntryPointGroup EntryPoints; bool IsTopLevel = false; mutable std::optional Reqs; + bool IsDummyImage = false; public: struct Properties { @@ -225,6 +226,9 @@ class ModuleDesc { void saveSplitInformationAsMetadata(); + ModuleDesc makeDummy() const; + bool isDummyImage() { return IsDummyImage; } + #ifndef NDEBUG void verifyESIMDProperty() const; void dump() const; diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 904424f93dae6..ade3955600564 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -816,6 +816,15 @@ void ModuleDesc::saveSplitInformationAsMetadata() { SpecConstantsPass::SPEC_CONST_DEFAULT_VAL_MODULE_MD_STRING); } +ModuleDesc ModuleDesc::makeDummy() const { + ModuleDesc MD(CloneModule(getModule())); + MD.EntryPoints = EntryPoints; + MD.IsTopLevel = IsTopLevel; + MD.Reqs = Reqs; + MD.IsDummyImage = true; + return MD; +} + void EntryPointGroup::saveNames(std::vector &Dest) const { Dest.reserve(Dest.size() + Functions.size()); std::transform(Functions.begin(), Functions.end(), diff --git a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp index 126a03bdf03bf..b8cbf4debec85 100644 --- a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp @@ -665,6 +665,24 @@ void processDeclaredVirtualFunctionSets( StringMap> &VirtualFunctionSets) { if (!F->hasFnAttribute("calls-indirectly")) return; + + // "Construction" kernels which reference vtables but do not actually + // perform any virtual calls have the calls-indirectly attribute + // attached by SYCLVirtualFunctionAnalysis pass. We do not want to + // attach sycl_used_aspects metadata to such kernels. + bool hasVirtualCall = false; + for (const Instruction &I : instructions(F)) { + const auto *CI = dyn_cast(&I); + if (!CI) + continue; + if (CI->isIndirectCall() && CI->hasFnAttr("virtual-call")) { + hasVirtualCall = true; + break; + } + } + if (!hasVirtualCall) + return; + Attribute CallsIndirectlyAttr = F->getFnAttribute("calls-indirectly"); SmallVector DeclaredVirtualFunctionSetNames; CallsIndirectlyAttr.getValueAsString().split(DeclaredVirtualFunctionSetNames, diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-1.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-1.ll index 709ca33eae3b0..5d7df6548282a 100644 --- a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-1.ll +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-1.ll @@ -6,8 +6,9 @@ define spir_func void @vfn() #0 { ret void } -; CHECK: @foo() #1 !sycl_used_aspects ![[#aspects]] -define spir_kernel void @foo() #1 { +; CHECK: @foo({{.*}}) #1 !sycl_used_aspects ![[#aspects]] +define spir_kernel void @foo(ptr %f) #1 { + call void %f() #2 ret void } @@ -15,6 +16,7 @@ define spir_kernel void @foo() #1 { attributes #0 = { "indirectly-callable"="_ZTSv" } attributes #1 = { "calls-indirectly"="_ZTSv" } +attributes #2 = { "virtual-call" } !sycl_aspects = !{!0} !0 = !{!"fp64", i32 6} diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-2.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-2.ll index ae600413378f1..6cdc09d9c74fb 100644 --- a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-2.ll +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-2.ll @@ -15,8 +15,9 @@ define spir_func void @vfnBar() #1 { ret void } -; CHECK: @kernel() #2 !sycl_used_aspects ![[#aspectsKernel:]] -define spir_kernel void @kernel() #2 { +; CHECK: @kernel({{.*}}) #2 !sycl_used_aspects ![[#aspectsKernel:]] +define spir_kernel void @kernel(ptr %f) #2 { + call void %f() #3 ret void } @@ -27,6 +28,7 @@ define spir_kernel void @kernel() #2 { attributes #0 = { "indirectly-callable"="setFoo" } attributes #1 = { "indirectly-callable"="setBar" } attributes #2 = { "calls-indirectly"="setFoo,setBar" } +attributes #3 = { "virtual-call" } !sycl_aspects = !{!0} !0 = !{!"fp64", i32 6} diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-3.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-3.ll index ada0f533ced56..eb3e4e4b9c566 100644 --- a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-3.ll +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/VirtualFunctions/virtual-functions-3.ll @@ -25,13 +25,15 @@ define spir_func void @subBar() { ret void } -; CHECK: @kernelA() #2 !sycl_used_aspects ![[#aspectsFoo]] -define spir_kernel void @kernelA() #2 { +; CHECK: @kernelA({{.*}}) #2 !sycl_used_aspects ![[#aspectsFoo]] +define spir_kernel void @kernelA(ptr %f) #2 { + call void %f() #4 ret void } -; CHECK: @kernelB() #3 !sycl_used_aspects ![[#aspectsBar]] -define spir_kernel void @kernelB() #3 { +; CHECK: @kernelB({{.*}}) #3 !sycl_used_aspects ![[#aspectsBar]] +define spir_kernel void @kernelB(ptr %f) #3 { + call void %f() #4 ret void } @@ -42,6 +44,7 @@ attributes #0 = { "indirectly-callable"="setFoo" } attributes #1 = { "indirectly-callable"="setBar" } attributes #2 = { "calls-indirectly"="setFoo" } attributes #3 = { "calls-indirectly"="setBar" } +attributes #4 = { "virtual-call" } !sycl_aspects = !{!0} !0 = !{!"fp64", i32 6} diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll new file mode 100644 index 0000000000000..c2f7a3f81f85b --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll @@ -0,0 +1,37 @@ +; RUN: sycl-post-link -split=auto -properties -S < %s -o %t.table +; RUN: FileCheck %s --input-file=%t.table --check-prefix=CHECK-TABLE +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-FP64-SPLIT +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-FP64-DUMMY +; RUN: FileCheck %s --input-file=%t_1.prop --check-prefix=CHECK-FP64-DUMMY-PROPS +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix=CHECK-FP32-SPLIT + +; CHECK-TABLE: _0.prop +; CHECK-TABLE-NEXT: _1.prop +; CHECK-TABLE-NEXT: _2.prop + +; CHECK-FP64-SPLIT: define spir_func void @bar() +; CHECK-FP32-SPLIT: define spir_func void @foo() + +; CHECK-FP64-DUMMY: define spir_func void @bar() +; CHECK-FP64-DUMMY-NEXT: entry: +; CHECK-FP64-DUMMY-NEXT: ret void + +; CHECK-FP64-DUMMY-PROPS: dummy-image=1 + +define spir_func void @foo() #1 { + %x = alloca float + ret void +} + +define spir_func void @bar() #1 !sycl_used_aspects !1 { + %x = alloca double + %d = load double, ptr %x + %res = fadd double %d, %d + ret void +} + +attributes #1 = { "sycl-module-id"="v.cpp" "indirectly-callable"="setA" } + +!sycl_aspects = !{!0} +!0 = !{!"fp64", i32 6} +!1 = !{i32 6} diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e84faf464c42e..0d012546a4fe7 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -59,6 +59,7 @@ #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include @@ -296,11 +297,35 @@ void saveModuleIR(Module &M, StringRef OutFilename) { MPM.run(M, MAM); } -std::string saveModuleIR(Module &M, int I, StringRef Suff) { - DUMP_ENTRY_POINTS(M, EmitOnlyKernelsAsEntryPoints, "saving IR"); +std::unique_ptr makeDummyImageIR(const Module &M) { + auto MCopy = CloneModule(M); + for (Function &F : MCopy->functions()) { + if (!F.hasFnAttribute("indirectly-callable")) + continue; + + F.erase(F.begin(), F.end()); + BasicBlock *newBB = BasicBlock::Create(F.getContext(), "entry", &F); + IRBuilder<> builder(newBB); + if (F.getReturnType()->isVoidTy()) + builder.CreateRetVoid(); + else + builder.CreateRet(UndefValue::get(F.getReturnType())); + } + return MCopy; +} + +std::string saveModuleIR(module_split::ModuleDesc &MD, int I, StringRef Suff) { + std::unique_ptr Storage; + Module *M = &MD.getModule(); + if (MD.isDummyImage()) { + Storage = makeDummyImageIR(MD.getModule()); + M = Storage.get(); + } + + DUMP_ENTRY_POINTS(*M, EmitOnlyKernelsAsEntryPoints, "saving IR"); StringRef FileExt = (OutputAssembly) ? ".ll" : ".bc"; std::string OutFilename = makeResultFileName(FileExt, I, Suff); - saveModuleIR(M, OutFilename); + saveModuleIR(*M, OutFilename); return OutFilename; } @@ -318,6 +343,9 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, NewSuff += Target; } + if (MD.isDummyImage()) + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, "dummy-image", 1); + std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, NewSuff); raw_fd_ostream SCOut(SCFile, EC); @@ -425,7 +453,7 @@ void saveModule(std::vector> &OutTables, BaseTriple.Ir = IRFilename.str(); } else { MD.cleanup(); - BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); + BaseTriple.Ir = saveModuleIR(MD, I, Suffix); } if (DoSymGen) { // save the names of the entry points - the symbol table @@ -741,6 +769,20 @@ bool isTargetCompatibleWithModule(const std::string &Target, return true; } +bool hasVirtualFunctionsAndOptionalKernelFeatures(const Module &M) { + bool hasVirtualFunctions = false; + bool hasOptionalKernelFeatures = false; + for (const Function &F : M.functions()) { + if (F.hasFnAttribute("indirectly-callable")) + hasVirtualFunctions = true; + if (F.getMetadata("sycl_used_aspects")) + hasOptionalKernelFeatures = true; + if (hasVirtualFunctions && hasOptionalKernelFeatures) + break; + } + return hasVirtualFunctions && hasOptionalKernelFeatures; +} + std::vector> processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. @@ -895,6 +937,21 @@ processInputModule(std::unique_ptr M) { ++ID; } + + // For kernels with virtual functions and optional kernel features, generate + // a dummy image to avoid link errors. A dummy image for a set of virtual + // functions is a module with the same set of virtual functions, but with + // those function bodies replaced with just a return. + bool dummyEmitted = false; + for (module_split::ModuleDesc &IrMD : MMs) { + if ((dummyEmitted = hasVirtualFunctionsAndOptionalKernelFeatures( + IrMD.getModule()))) { + auto DummyImage = IrMD.makeDummy(); + saveModule(Tables, DummyImage, ID, OutIRFileName); + } + } + if (dummyEmitted) + ++ID; } return Tables; } diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 633a4269e1e78..a08c052f237fe 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -20,6 +20,35 @@ namespace sycl { inline namespace _V1 { namespace detail { +void printAspects(std::ostream &Out, ByteArray BA) { + BA.dropBytes(8); + Out << "["; + for (int i = 0; !BA.empty(); ++i) { + auto Aspect = BA.consume(); + switch (Aspect) { +#define __SYCL_ASPECT(ASPECT, ID) \ + case sycl::aspect::ASPECT: \ + Out << #ASPECT; \ + break; +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) \ + case sycl::aspect::ASPECT: \ + Out << #ASPECT; \ + break; +#include +#include +#undef __SYCL_ASPECT +#undef __SYCL_ASPECT_DEPRECATED + default: + Out << "unknown (" << static_cast(Aspect) << ")"; + break; + } + if (i != 0) + Out << ", "; + } + Out << "]"; + return; +} + std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { switch (P.Prop->Type) { case SYCL_PROPERTY_TYPE_UINT32: @@ -42,6 +71,20 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { Out << P.asUint32(); break; case SYCL_PROPERTY_TYPE_BYTE_ARRAY: { + // Special case for aspects, print out the aspect names + if (P.Prop->Name == std::string_view("aspects")) { + printAspects(Out, P.asByteArray()); + break; + } + + // Special case for these properties, print out the value as a string + if (P.Prop->Name == std::string_view("virtual-functions-set") || + P.Prop->Name == std::string_view("uses-virtual-functions-set")) { + Out << P.asStringView(); + break; + } + + // Otherwise, print out the byte array as hex ByteArray BA = P.asByteArray(); std::ios_base::fmtflags FlagsBackup = Out.flags(); Out << std::hex; @@ -52,7 +95,7 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { break; } case SYCL_PROPERTY_TYPE_STRING: - Out << P.asCString(); + Out << P.asStringView(); break; default: assert(false && "Unsupported property"); @@ -77,14 +120,14 @@ ByteArray DeviceBinaryProperty::asByteArray() const { return {Data, Prop->ValSize}; } -const char *DeviceBinaryProperty::asCString() const { +std::string_view DeviceBinaryProperty::asStringView() const { assert((Prop->Type == SYCL_PROPERTY_TYPE_STRING || Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY) && "property type mismatch"); assert(Prop->ValSize > 0 && "property size mismatch"); // Byte array stores its size in first 8 bytes size_t Shift = Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? 8 : 0; - return ur::cast(Prop->ValAddr) + Shift; + return {ur::cast(Prop->ValAddr) + Shift, Prop->ValSize - Shift}; } void RTDeviceBinaryImage::PropertyRange::init(sycl_device_binary Bin, diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 9aab698ef2749..546f88d2216d4 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -73,7 +73,7 @@ class DeviceBinaryProperty { uint32_t asUint32() const; ByteArray asByteArray() const; - const char *asCString() const; + std::string_view asStringView() const; protected: friend std::ostream &operator<<(std::ostream &Out, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 035ca965ce2e5..ef0357118c0f6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -692,10 +692,11 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( std::set HandledSets; std::queue WorkList; for (const sycl_device_binary_property &VFProp : Img.getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); // Device image passed to this function is expected to contain SYCL kernels // and therefore it may only use virtual function sets, but cannot provide - // them. We expect to see just a single property here + // them. Additionally, it cannot be a dummy image. + // We expect to see just a single property here assert(std::string(VFProp->Name) == "uses-virtual-functions-set" && "Unexpected virtual function property"); for (const auto &SetName : detail::split_string(StrValue, ',')) { @@ -716,23 +717,41 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // virtual-functions-set properties, but their handling is the same: we // just grab all sets they reference and add them for consideration if // we haven't done so already. + bool isDummyImage = false; for (const sycl_device_binary_property &VFProp : BinImage->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + if (VFProp->Name == std::string_view("dummy-image")) { + isDummyImage = true; + continue; + } + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); for (const auto &SetName : detail::split_string(StrValue, ',')) { if (HandledSets.insert(SetName).second) WorkList.push(SetName); } } - // TODO: Complete this part about handling of incompatible device images. // If device image uses the same virtual function set, then we only // link it if it is compatible. // However, if device image provides virtual function set and it is // incompatible, then we should link its "dummy" version to avoid link // errors about unresolved external symbols. - if (doesDevSupportDeviceRequirements(Dev, *BinImage)) + // Note: we only link when exactly one of + // doesDevSupportDeviceRequirements(Dev, *BinImage) and + // isDummyImage is true. We don't want to link every dummy image, + // otherwise we could run into linking errors defining the same symbol + // multiple times. For every image providing virtual functions that has + // a dummy image, the dummy image will have the same device requirements + // as the original image. So when the dummy image does support the + // device requirements, we know that the corresponding image providing + // actual definitions will be linked and not the dummy. And vice versa: + // when the dummy image does not support the device requirements, we + // know the corresponding image providing virtual functions was not + // linked and we must link the dummy image. + if (doesDevSupportDeviceRequirements(Dev, *BinImage) + isDummyImage == + 1) { DeviceImagesToLink.insert(BinImage); + } } } @@ -1825,6 +1844,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { else Img = std::make_unique(RawImg); + if (std::getenv("SYCL_PRINT_IMAGES")) + Img->print(); + static uint32_t SequenceID = 0; // Fill the kernel argument mask map @@ -1850,7 +1872,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // Record mapping between virtual function sets and device images for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + if (VFProp->Name == std::string_view("dummy-image")) + continue; + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); for (const auto &SetName : detail::split_string(StrValue, ',')) m_VFSet2BinImage[SetName].insert(Img.get()); } diff --git a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp new file mode 100644 index 0000000000000..79a302dde4bf2 --- /dev/null +++ b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp @@ -0,0 +1,77 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +struct set_fp64; + +struct Base { + bool fooCalled = false; + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(syclext::indirectly_callable) + virtual void foo() { fooCalled = true; } + + bool barCalled = false; + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(syclext::indirectly_callable_in) + virtual void bar() { + // this virtual function uses double + volatile double d = 3.14; + barCalled = true; + } +}; + +class Constructor; +class Use; +class UseFP64; + +int main() { + // Selected device may not support 'fp64' aspect + sycl::queue Q; + + Base *Obj = sycl::malloc_shared(1, Q); + + Q.single_task([=]() { + // Even though at LLVM IR level this kernel does reference 'Base::foo' + // and 'Base::bar' through global variable containing `vtable` for `Base`, + // we do not consider the kernel to be using `fp64` optional feature. + new (Obj) Base; + }).wait(); + + Q.single_task(syclext::properties{syclext::assume_indirect_calls}, + [=]() { + // This kernel is not considered to be using any optional + // features, because virtual functions in default set do + // not use any. + Obj->foo(); + }) + .wait(); + + if (Q.get_device().has(sycl::aspect::fp64)) { + Q.single_task( + syclext::properties{syclext::assume_indirect_calls_to}, + [=]() { + // This kernel is considered to be using 'fp64' optional feature, + // because there is a virtual function in 'set_fp64' which uses + // double. + Obj->bar(); + }) + .wait(); + } + + int nfails = 0; + if (!Obj->fooCalled) { + std::cerr << "Error: 'foo' was not called\n"; + ++nfails; + } + if (Q.get_device().has(sycl::aspect::fp64) && !Obj->barCalled) { + std::cerr << "Error: 'bar' was not called\n"; + ++nfails; + } + if (!Q.get_device().has(sycl::aspect::fp64) && Obj->barCalled) { + std::cerr << "Error: 'bar' was called, but should not have been\n"; + ++nfails; + } + + return 0; +} \ No newline at end of file diff --git a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp index 61e0c5f5f32a5..3ebc7d6ce7fd8 100644 --- a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp +++ b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp @@ -18,6 +18,7 @@ class KernelD; class KernelE; class KernelF; class KernelG; +class KernelH; } // namespace VirtualFunctionsTest @@ -39,6 +40,7 @@ KERNEL_INFO(KernelD) KERNEL_INFO(KernelE) KERNEL_INFO(KernelF) KERNEL_INFO(KernelG) +KERNEL_INFO(KernelH) #undef KERNEL_INFO @@ -48,9 +50,13 @@ KERNEL_INFO(KernelG) static sycl::unittest::MockDeviceImage generateImage(std::initializer_list KernelNames, - const std::string &VFSets, bool UsesVFSets, unsigned char Magic) { + const std::string &VFSets, bool UsesVFSets, unsigned char Magic, + bool IsDummyImage = false, + std::vector Aspects = {}) { sycl::unittest::MockPropertySet PropSet; - std::vector Props; + + // Construct virtual function properties + std::vector VFProps; uint64_t PropSize = VFSets.size(); std::vector Storage(/* bytes for size */ 8 + PropSize + /* null terminator */ 1); @@ -64,9 +70,22 @@ generateImage(std::initializer_list KernelNames, sycl::unittest::MockProperty Prop(PropName, Storage, SYCL_PROPERTY_TYPE_BYTE_ARRAY); - Props.push_back(Prop); - PropSet.insert(__SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS, std::move(Props)); + VFProps.push_back(Prop); + if (IsDummyImage) + VFProps.emplace_back("dummy-image", std::vector(4), + SYCL_PROPERTY_TYPE_UINT32); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS, + std::move(VFProps)); + + // Construct device requirement properties + std::vector DeviceRequirmentsProps; + DeviceRequirmentsProps.emplace_back(sycl::unittest::makeAspectsProp(Aspects)); + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS, + std::move(DeviceRequirmentsProps)); + + // Assemble final device image std::vector Bin{Magic}; std::vector Entries = @@ -99,6 +118,9 @@ static constexpr unsigned PROGRAM_E0 = 37; static constexpr unsigned PROGRAM_F = 41; static constexpr unsigned PROGRAM_F0 = 47; static constexpr unsigned PROGRAM_F1 = 53; +static constexpr unsigned PROGRAM_H = 59; +static constexpr unsigned PROGRAM_H0 = 61; +static constexpr unsigned PROGRAM_H0d = 67; // Device images with no entires are ignored by SYCL RT during registration. // Therefore, we have to provide some kernel names to make the test work, even @@ -128,10 +150,16 @@ static sycl::unittest::MockDeviceImage Imgs[] = { generateImage({"KernelF"}, "set-f", /* uses vf set */ true, PROGRAM_F), generateImage({"DummyKernel7"}, "set-f", /* provides vf set */ false, PROGRAM_F0), - generateImage({"KernelG"}, "set-f", /* uses vf set */ true, PROGRAM_F1)}; + generateImage({"KernelG"}, "set-f", /* uses vf set */ true, PROGRAM_F1), + generateImage({"KernelH"}, "set-h", /* uses vf set */ true, PROGRAM_H, + false, {}), + generateImage({"DummyKernel7"}, "set-h", /* provides vf set */ false, + PROGRAM_H0, /* isDummy */ false, {sycl::aspect::fp64}), + generateImage({"DummyKernel7d"}, "set-h", /* provides vf set */ false, + PROGRAM_H0d, /* isDummy */ true, {sycl::aspect::fp64})}; // Registers mock devices images in the SYCL RT -static sycl::unittest::MockDeviceImageArray<15> ImgArray{Imgs}; +static sycl::unittest::MockDeviceImageArray ImgArray{Imgs}; TEST(VirtualFunctions, SingleKernelUsesSingleVFSet) { sycl::unittest::UrMock<> Mock; @@ -262,4 +290,101 @@ TEST(VirtualFunctions, TwoKernelsShareTheSameSet) { PROGRAM_F * PROGRAM_F0 * PROGRAM_F1); } +struct MockDeviceData { + std::string Extensions; + ur_device_handle_t getHandle() { + return reinterpret_cast(this); + } + static MockDeviceData *fromHandle(ur_device_handle_t handle) { + return reinterpret_cast(handle); + } +}; + +MockDeviceData MockDevices[] = { + {"cl_khr_fp64"}, + {""}, +}; + +static ur_result_t redefinedDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) { + **params.ppNumDevices = static_cast(std::size(MockDevices)); + return UR_RESULT_SUCCESS; + } + + if (*params.pphDevices) { + assert(*params.pNumEntries <= std::size(MockDevices)); + for (uint32_t i = 0; i < *params.pNumEntries; ++i) { + (*params.pphDevices)[i] = MockDevices[i].getHandle(); + } + } + + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto *params = reinterpret_cast(pParams); + if (*params->ppropName == UR_DEVICE_INFO_EXTENSIONS) { + const std::string &Extensions = + MockDeviceData::fromHandle(*params->phDevice)->Extensions; + if (*params->ppPropValue) { + assert(*params->ppropSize >= Extensions.size() + 1); + std::memcpy(*params->ppPropValue, Extensions.data(), + Extensions.size() + 1); + } + if (*params->ppPropSizeRet && + **params->ppPropSizeRet < Extensions.size() + 1) + **params->ppPropSizeRet = Extensions.size() + 1; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_SUCCESS; +} + +TEST(VirtualFunctions, DummyImages) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(sycl::aspect_selector({sycl::aspect::fp64})); + EXPECT_TRUE(Q.get_device().has(sycl::aspect::fp64)); + + CapturedLinkingData.clear(); + + // KernelF uses set "set-h" that is also used by KernelG + Q.single_task([=]() {}); + // When we submit this kernel, we expect that two programs were created (one + // for KernelH, another providing "set-h" + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); + // Both programs should be linked together. + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + // The module providing set-h is set up to use fp64, + // and since the device support fp64, we link the + // non-dummy version that provides set-h. + EXPECT_TRUE( + CapturedLinkingData.LinkedProgramsContains({PROGRAM_H, PROGRAM_H0})); + EXPECT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + PROGRAM_H * PROGRAM_H0); + + CapturedLinkingData.clear(); + + EXPECT_EQ(Plt.get_devices().size(), 2u); + sycl::queue Q2(sycl::aspect_selector({}, {sycl::aspect::fp64})); + + // We now repeat what we did launching KernelH but on another + // device that does not support fp64. + Q2.single_task([=]() {}); + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + + // However, this time, we expect the dummy image to be linked + // as the device does not support fp64. + EXPECT_TRUE( + CapturedLinkingData.LinkedProgramsContains({PROGRAM_H, PROGRAM_H0d})); + EXPECT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + PROGRAM_H * PROGRAM_H0d); +} + // TODO: Add test cases for kernel_bundle usage diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index fea80d6b08c3e..0c2d375c3e810 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -558,7 +558,7 @@ inline MockProperty makeAspectsProp(const std::vector &Aspects) { uint64_t ValDataSize = ValData.size(); std::uninitialized_copy(&ValDataSize, &ValDataSize + sizeof(uint64_t), ValData.data()); - auto *AspectsPtr = reinterpret_cast(&Aspects[0]); + auto *AspectsPtr = reinterpret_cast(Aspects.data()); std::uninitialized_copy(AspectsPtr, AspectsPtr + Aspects.size(), ValData.data() + BYTES_FOR_SIZE); return {"aspects", ValData, SYCL_PROPERTY_TYPE_BYTE_ARRAY};