diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 66511838bdf7f..463181fd7ca5d 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -580,7 +580,7 @@ class kernel_bundle_impl "kernel_bundle state does not match the state of the SYCLBIN file."); std::vector BestImages = - SYCLBIN->getBestCompatibleImages(Devs); + SYCLBIN->getBestCompatibleImages(Devs, State); MDeviceImages.reserve(BestImages.size()); for (const detail::RTDeviceBinaryImage *Image : BestImages) MDeviceImages.emplace_back(device_image_impl::create( diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index d1be8d4df6848..90513f253a2cb 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -272,21 +272,36 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) : SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)}, SYCLBINContentCopySize{SYCLBINSize}, ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) { - size_t NumJITBinaries = 0, NumNativeBinaries = 0; - for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { - NumJITBinaries += AM.IRModules.size(); - NumNativeBinaries += AM.NativeDeviceCodeImages.size(); - } - DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries); - JITDeviceBinaryImages.reserve(NumJITBinaries); - NativeDeviceBinaryImages.reserve(NumNativeBinaries); + AbstractModuleDescriptors = std::unique_ptr( + new AbstractModuleDesc[ParsedSYCLBIN.AbstractModules.size()]); + + size_t NumBinaries = 0; + for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) + NumBinaries += AM.IRModules.size() + AM.NativeDeviceCodeImages.size(); + DeviceBinaries.reserve(NumBinaries); + BinaryImages = std::unique_ptr( + new RTDeviceBinaryImage[NumBinaries]); + + RTDeviceBinaryImage *CurrentBinaryImagesStart = BinaryImages.get(); + for (size_t I = 0; I < getNumAbstractModules(); ++I) { + SYCLBIN::AbstractModule &AM = ParsedSYCLBIN.AbstractModules[I]; + AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I]; + + // Set up the abstract module descriptor. + AMDesc.NumJITBinaries = AM.IRModules.size(); + AMDesc.NumNativeBinaries = AM.NativeDeviceCodeImages.size(); + AMDesc.JITBinaries = CurrentBinaryImagesStart; + AMDesc.NativeBinaries = CurrentBinaryImagesStart + AMDesc.NumJITBinaries; + CurrentBinaryImagesStart += + AMDesc.NumJITBinaries + AM.NativeDeviceCodeImages.size(); - for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { // Construct properties from SYCLBIN metadata. std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = convertAbstractModuleProperties(AM); - for (SYCLBIN::IRModule &IRM : AM.IRModules) { + for (size_t J = 0; J < AM.IRModules.size(); ++J) { + SYCLBIN::IRModule &IRM = AM.IRModules[J]; + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; DeviceBinary.Kind = 4; @@ -309,11 +324,12 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) DeviceBinary.PropertySetsEnd = BinPropertySets.data() + BinPropertySets.size(); // Create an image from it. - JITDeviceBinaryImages.emplace_back(&DeviceBinary); + AMDesc.JITBinaries[J] = RTDeviceBinaryImage{&DeviceBinary}; } - for (const SYCLBIN::NativeDeviceCodeImage &NDCI : - AM.NativeDeviceCodeImages) { + for (size_t J = 0; J < AM.NativeDeviceCodeImages.size(); ++J) { + const SYCLBIN::NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J]; + assert(NDCI.Metadata != nullptr); PropertySet &NDCIMetadataProps = (*NDCI.Metadata) [PropertySetRegistry::SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA]; @@ -346,7 +362,7 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) DeviceBinary.PropertySetsEnd = BinPropertySets.data() + BinPropertySets.size(); // Create an image from it. - NativeDeviceBinaryImages.emplace_back(&DeviceBinary); + AMDesc.NativeBinaries[J] = RTDeviceBinaryImage{&DeviceBinary}; } } } @@ -394,33 +410,44 @@ SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) { } std::vector -SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev) { - auto SelectCompatibleImages = - [&](const std::vector &Imgs) { - std::vector CompatImgs; - for (const RTDeviceBinaryImage &Img : Imgs) - if (doesDevSupportDeviceRequirements(Dev, Img) && - doesImageTargetMatchDevice(Img, Dev)) - CompatImgs.push_back(&Img); - return CompatImgs; - }; - - // Try with native images first. - std::vector NativeImgs = - SelectCompatibleImages(NativeDeviceBinaryImages); - if (!NativeImgs.empty()) - return NativeImgs; - - // If there were no native images, pick JIT images. - return SelectCompatibleImages(JITDeviceBinaryImages); +SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev, bundle_state State) { + auto GetCompatibleImage = [&](const RTDeviceBinaryImage *Imgs, + size_t NumImgs) { + const RTDeviceBinaryImage *CompatImagePtr = + std::find_if(Imgs, Imgs + NumImgs, [&](const RTDeviceBinaryImage &Img) { + return doesDevSupportDeviceRequirements(Dev, Img) && + doesImageTargetMatchDevice(Img, Dev); + }); + return (CompatImagePtr != Imgs + NumImgs) ? CompatImagePtr : nullptr; + }; + + std::vector Images; + for (size_t I = 0; I < getNumAbstractModules(); ++I) { + const AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I]; + // If the target state is executable, try with native images first. + if (State == bundle_state::executable) { + if (const RTDeviceBinaryImage *CompatImagePtr = GetCompatibleImage( + AMDesc.NativeBinaries, AMDesc.NumNativeBinaries)) { + Images.push_back(CompatImagePtr); + continue; + } + } + + // Otherwise, select the first compatible JIT binary. + if (const RTDeviceBinaryImage *CompatImagePtr = + GetCompatibleImage(AMDesc.JITBinaries, AMDesc.NumJITBinaries)) + Images.push_back(CompatImagePtr); + } + return Images; } std::vector -SYCLBINBinaries::getBestCompatibleImages(devices_range Devs) { +SYCLBINBinaries::getBestCompatibleImages(devices_range Devs, + bundle_state State) { std::set Images; for (device_impl &Dev : Devs) { std::vector BestImagesForDev = - getBestCompatibleImages(Dev); + getBestCompatibleImages(Dev, State); Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend()); } return {Images.cbegin(), Images.cend()}; diff --git a/sycl/source/detail/syclbin.hpp b/sycl/source/detail/syclbin.hpp index 4f56eb594ac0c..02b9ec8348c99 100644 --- a/sycl/source/detail/syclbin.hpp +++ b/sycl/source/detail/syclbin.hpp @@ -124,9 +124,9 @@ struct SYCLBINBinaries { ~SYCLBINBinaries() = default; std::vector - getBestCompatibleImages(device_impl &Dev); + getBestCompatibleImages(device_impl &Dev, bundle_state State); std::vector - getBestCompatibleImages(devices_range Dev); + getBestCompatibleImages(devices_range Dev, bundle_state State); uint8_t getState() const { PropertySet &GlobalMetadata = @@ -143,6 +143,10 @@ struct SYCLBINBinaries { std::vector<_sycl_device_binary_property_set_struct> & convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM); + size_t getNumAbstractModules() const { + return ParsedSYCLBIN.AbstractModules.size(); + } + std::unique_ptr SYCLBINContentCopy = nullptr; size_t SYCLBINContentCopySize = 0; @@ -156,8 +160,16 @@ struct SYCLBINBinaries { BinaryPropertySets; std::vector DeviceBinaries; - std::vector JITDeviceBinaryImages; - std::vector NativeDeviceBinaryImages; + + struct AbstractModuleDesc { + size_t NumJITBinaries; + size_t NumNativeBinaries; + RTDeviceBinaryImage *JITBinaries; + RTDeviceBinaryImage *NativeBinaries; + }; + + std::unique_ptr AbstractModuleDescriptors; + std::unique_ptr BinaryImages; }; } // namespace detail diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 8f131061c60a5..18364e6b05ace 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index efa3accb63332..39d5d45bb30ad 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -8,12 +8,13 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dae_input.cpp b/sycl/test-e2e/SYCLBIN/dae_input.cpp index b096f4febcf6e..5ede0f660383c 100644 --- a/sycl/test-e2e/SYCLBIN/dae_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dae_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for using a kernel from a SYCLBIN with a dead argument. // RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dae_object.cpp b/sycl/test-e2e/SYCLBIN/dae_object.cpp index 0fb9a6e7c4d4f..01ff96df8ff7b 100644 --- a/sycl/test-e2e/SYCLBIN/dae_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dae_object.cpp @@ -8,10 +8,11 @@ // REQUIRES: aspect-usm_device_allocations -// -- Test for using a kernel from a SYCLBIN with a dead argument. +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. +// -- Test for using a kernel from a SYCLBIN with a dead argument. // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp index 9a53470cb9594..0b9bdc8267859 100644 --- a/sycl/test-e2e/SYCLBIN/dg_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -8,14 +8,15 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for using device globals in SYCLBIN. // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19533 - // RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp index 5dc3d83ed0986..b13e26e56a0fd 100644 --- a/sycl/test-e2e/SYCLBIN/dg_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -8,17 +8,15 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Test for using device globals in SYCLBIN. // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - -// XFAIL: cuda -// XFAIL-TRACKER: CMPLRLLVM-68859 - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index 696e020c76255..143d14ad775af 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index e91ae90228cf1..c6d2d07f68b37 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -9,12 +9,13 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin