From 06ca0c264a27d2d2abdb048291311df03f143055 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 25 Sep 2025 05:59:39 -0700 Subject: [PATCH] [SYCL][SYCLBIN] Select best images based on state Previously, the implementation would prioritize native device code binaries over IR modules indiscriminately. However, for input state and object state, native device code images are unsuitable. As such, this commit makes it so that the native device code images are only selected when the target state is executable. As an effect of this change, input and object tests will no longer run for CUDA and HIP. This is appropriate given these targets only currently support generating native device code images, which are meant to be as close to executable state as possible. Note that the driver doesn't currently make such limitations of the generated SYCLBIN files, so producing input and object state binaries with CUDA and HIP targets will result in kernel bundles with no binary images. Signed-off-by: Larsen, Steffen --- sycl/source/detail/kernel_bundle_impl.hpp | 2 +- sycl/source/detail/syclbin.cpp | 97 ++++++++++++------- sycl/source/detail/syclbin.hpp | 20 +++- sycl/test-e2e/SYCLBIN/basic_input.cpp | 4 + sycl/test-e2e/SYCLBIN/basic_object.cpp | 7 +- sycl/test-e2e/SYCLBIN/dae_input.cpp | 4 + sycl/test-e2e/SYCLBIN/dae_object.cpp | 7 +- sycl/test-e2e/SYCLBIN/dg_input.cpp | 7 +- sycl/test-e2e/SYCLBIN/dg_object.cpp | 10 +- .../optional_kernel_features_input.cpp | 4 + .../optional_kernel_features_object.cpp | 7 +- 11 files changed, 111 insertions(+), 58 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 66511838bdf7f..463181fd7ca5d 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -580,7 +580,7 @@ class kernel_bundle_impl "kernel_bundle state does not match the state of the SYCLBIN file."); std::vector BestImages = - SYCLBIN->getBestCompatibleImages(Devs); + SYCLBIN->getBestCompatibleImages(Devs, State); MDeviceImages.reserve(BestImages.size()); for (const detail::RTDeviceBinaryImage *Image : BestImages) MDeviceImages.emplace_back(device_image_impl::create( diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index d1be8d4df6848..90513f253a2cb 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -272,21 +272,36 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) : SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)}, SYCLBINContentCopySize{SYCLBINSize}, ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) { - size_t NumJITBinaries = 0, NumNativeBinaries = 0; - for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { - NumJITBinaries += AM.IRModules.size(); - NumNativeBinaries += AM.NativeDeviceCodeImages.size(); - } - DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries); - JITDeviceBinaryImages.reserve(NumJITBinaries); - NativeDeviceBinaryImages.reserve(NumNativeBinaries); + AbstractModuleDescriptors = std::unique_ptr( + new AbstractModuleDesc[ParsedSYCLBIN.AbstractModules.size()]); + + size_t NumBinaries = 0; + for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) + NumBinaries += AM.IRModules.size() + AM.NativeDeviceCodeImages.size(); + DeviceBinaries.reserve(NumBinaries); + BinaryImages = std::unique_ptr( + new RTDeviceBinaryImage[NumBinaries]); + + RTDeviceBinaryImage *CurrentBinaryImagesStart = BinaryImages.get(); + for (size_t I = 0; I < getNumAbstractModules(); ++I) { + SYCLBIN::AbstractModule &AM = ParsedSYCLBIN.AbstractModules[I]; + AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I]; + + // Set up the abstract module descriptor. + AMDesc.NumJITBinaries = AM.IRModules.size(); + AMDesc.NumNativeBinaries = AM.NativeDeviceCodeImages.size(); + AMDesc.JITBinaries = CurrentBinaryImagesStart; + AMDesc.NativeBinaries = CurrentBinaryImagesStart + AMDesc.NumJITBinaries; + CurrentBinaryImagesStart += + AMDesc.NumJITBinaries + AM.NativeDeviceCodeImages.size(); - for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { // Construct properties from SYCLBIN metadata. std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = convertAbstractModuleProperties(AM); - for (SYCLBIN::IRModule &IRM : AM.IRModules) { + for (size_t J = 0; J < AM.IRModules.size(); ++J) { + SYCLBIN::IRModule &IRM = AM.IRModules[J]; + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; DeviceBinary.Kind = 4; @@ -309,11 +324,12 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) DeviceBinary.PropertySetsEnd = BinPropertySets.data() + BinPropertySets.size(); // Create an image from it. - JITDeviceBinaryImages.emplace_back(&DeviceBinary); + AMDesc.JITBinaries[J] = RTDeviceBinaryImage{&DeviceBinary}; } - for (const SYCLBIN::NativeDeviceCodeImage &NDCI : - AM.NativeDeviceCodeImages) { + for (size_t J = 0; J < AM.NativeDeviceCodeImages.size(); ++J) { + const SYCLBIN::NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J]; + assert(NDCI.Metadata != nullptr); PropertySet &NDCIMetadataProps = (*NDCI.Metadata) [PropertySetRegistry::SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA]; @@ -346,7 +362,7 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) DeviceBinary.PropertySetsEnd = BinPropertySets.data() + BinPropertySets.size(); // Create an image from it. - NativeDeviceBinaryImages.emplace_back(&DeviceBinary); + AMDesc.NativeBinaries[J] = RTDeviceBinaryImage{&DeviceBinary}; } } } @@ -394,33 +410,44 @@ SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) { } std::vector -SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev) { - auto SelectCompatibleImages = - [&](const std::vector &Imgs) { - std::vector CompatImgs; - for (const RTDeviceBinaryImage &Img : Imgs) - if (doesDevSupportDeviceRequirements(Dev, Img) && - doesImageTargetMatchDevice(Img, Dev)) - CompatImgs.push_back(&Img); - return CompatImgs; - }; - - // Try with native images first. - std::vector NativeImgs = - SelectCompatibleImages(NativeDeviceBinaryImages); - if (!NativeImgs.empty()) - return NativeImgs; - - // If there were no native images, pick JIT images. - return SelectCompatibleImages(JITDeviceBinaryImages); +SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev, bundle_state State) { + auto GetCompatibleImage = [&](const RTDeviceBinaryImage *Imgs, + size_t NumImgs) { + const RTDeviceBinaryImage *CompatImagePtr = + std::find_if(Imgs, Imgs + NumImgs, [&](const RTDeviceBinaryImage &Img) { + return doesDevSupportDeviceRequirements(Dev, Img) && + doesImageTargetMatchDevice(Img, Dev); + }); + return (CompatImagePtr != Imgs + NumImgs) ? CompatImagePtr : nullptr; + }; + + std::vector Images; + for (size_t I = 0; I < getNumAbstractModules(); ++I) { + const AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I]; + // If the target state is executable, try with native images first. + if (State == bundle_state::executable) { + if (const RTDeviceBinaryImage *CompatImagePtr = GetCompatibleImage( + AMDesc.NativeBinaries, AMDesc.NumNativeBinaries)) { + Images.push_back(CompatImagePtr); + continue; + } + } + + // Otherwise, select the first compatible JIT binary. + if (const RTDeviceBinaryImage *CompatImagePtr = + GetCompatibleImage(AMDesc.JITBinaries, AMDesc.NumJITBinaries)) + Images.push_back(CompatImagePtr); + } + return Images; } std::vector -SYCLBINBinaries::getBestCompatibleImages(devices_range Devs) { +SYCLBINBinaries::getBestCompatibleImages(devices_range Devs, + bundle_state State) { std::set Images; for (device_impl &Dev : Devs) { std::vector BestImagesForDev = - getBestCompatibleImages(Dev); + getBestCompatibleImages(Dev, State); Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend()); } return {Images.cbegin(), Images.cend()}; diff --git a/sycl/source/detail/syclbin.hpp b/sycl/source/detail/syclbin.hpp index 4f56eb594ac0c..02b9ec8348c99 100644 --- a/sycl/source/detail/syclbin.hpp +++ b/sycl/source/detail/syclbin.hpp @@ -124,9 +124,9 @@ struct SYCLBINBinaries { ~SYCLBINBinaries() = default; std::vector - getBestCompatibleImages(device_impl &Dev); + getBestCompatibleImages(device_impl &Dev, bundle_state State); std::vector - getBestCompatibleImages(devices_range Dev); + getBestCompatibleImages(devices_range Dev, bundle_state State); uint8_t getState() const { PropertySet &GlobalMetadata = @@ -143,6 +143,10 @@ struct SYCLBINBinaries { std::vector<_sycl_device_binary_property_set_struct> & convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM); + size_t getNumAbstractModules() const { + return ParsedSYCLBIN.AbstractModules.size(); + } + std::unique_ptr SYCLBINContentCopy = nullptr; size_t SYCLBINContentCopySize = 0; @@ -156,8 +160,16 @@ struct SYCLBINBinaries { BinaryPropertySets; std::vector DeviceBinaries; - std::vector JITDeviceBinaryImages; - std::vector NativeDeviceBinaryImages; + + struct AbstractModuleDesc { + size_t NumJITBinaries; + size_t NumNativeBinaries; + RTDeviceBinaryImage *JITBinaries; + RTDeviceBinaryImage *NativeBinaries; + }; + + std::unique_ptr AbstractModuleDescriptors; + std::unique_ptr BinaryImages; }; } // namespace detail diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 8f131061c60a5..18364e6b05ace 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index efa3accb63332..39d5d45bb30ad 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -8,12 +8,13 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dae_input.cpp b/sycl/test-e2e/SYCLBIN/dae_input.cpp index b096f4febcf6e..5ede0f660383c 100644 --- a/sycl/test-e2e/SYCLBIN/dae_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dae_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for using a kernel from a SYCLBIN with a dead argument. // RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dae_object.cpp b/sycl/test-e2e/SYCLBIN/dae_object.cpp index 0fb9a6e7c4d4f..01ff96df8ff7b 100644 --- a/sycl/test-e2e/SYCLBIN/dae_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dae_object.cpp @@ -8,10 +8,11 @@ // REQUIRES: aspect-usm_device_allocations -// -- Test for using a kernel from a SYCLBIN with a dead argument. +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. +// -- Test for using a kernel from a SYCLBIN with a dead argument. // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp index 9a53470cb9594..0b9bdc8267859 100644 --- a/sycl/test-e2e/SYCLBIN/dg_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -8,14 +8,15 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for using device globals in SYCLBIN. // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19533 - // RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp index 5dc3d83ed0986..b13e26e56a0fd 100644 --- a/sycl/test-e2e/SYCLBIN/dg_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -8,17 +8,15 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Test for using device globals in SYCLBIN. // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - -// XFAIL: cuda -// XFAIL-TRACKER: CMPLRLLVM-68859 - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index 696e020c76255..143d14ad775af 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -8,6 +8,10 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce input-state SYCLBIN files. + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index e91ae90228cf1..c6d2d07f68b37 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -9,12 +9,13 @@ // REQUIRES: aspect-usm_device_allocations +// UNSUPPORTED: cuda, hip +// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device +// binaries and can therefore not produce object-state SYCLBIN files. + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// UNSUPPORTED: hip -// UNSUPPORTED-INTENDED: HIP backend does not implement linking. - // RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{run} %t.out %t.syclbin