Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ class kernel_bundle_impl
"kernel_bundle state does not match the state of the SYCLBIN file.");

std::vector<const detail::RTDeviceBinaryImage *> BestImages =
SYCLBIN->getBestCompatibleImages(Devs);
SYCLBIN->getBestCompatibleImages(Devs, State);
MDeviceImages.reserve(BestImages.size());
for (const detail::RTDeviceBinaryImage *Image : BestImages)
MDeviceImages.emplace_back(device_image_impl::create(
Expand Down
97 changes: 62 additions & 35 deletions sycl/source/detail/syclbin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,21 +272,36 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
: SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)},
SYCLBINContentCopySize{SYCLBINSize},
ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) {
size_t NumJITBinaries = 0, NumNativeBinaries = 0;
for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) {
NumJITBinaries += AM.IRModules.size();
NumNativeBinaries += AM.NativeDeviceCodeImages.size();
}
DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries);
JITDeviceBinaryImages.reserve(NumJITBinaries);
NativeDeviceBinaryImages.reserve(NumNativeBinaries);
AbstractModuleDescriptors = std::unique_ptr<AbstractModuleDesc[]>(
new AbstractModuleDesc[ParsedSYCLBIN.AbstractModules.size()]);

size_t NumBinaries = 0;
for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules)
NumBinaries += AM.IRModules.size() + AM.NativeDeviceCodeImages.size();
DeviceBinaries.reserve(NumBinaries);
BinaryImages = std::unique_ptr<RTDeviceBinaryImage[]>(
new RTDeviceBinaryImage[NumBinaries]);

RTDeviceBinaryImage *CurrentBinaryImagesStart = BinaryImages.get();
for (size_t I = 0; I < getNumAbstractModules(); ++I) {
SYCLBIN::AbstractModule &AM = ParsedSYCLBIN.AbstractModules[I];
AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I];

// Set up the abstract module descriptor.
AMDesc.NumJITBinaries = AM.IRModules.size();
AMDesc.NumNativeBinaries = AM.NativeDeviceCodeImages.size();
AMDesc.JITBinaries = CurrentBinaryImagesStart;
AMDesc.NativeBinaries = CurrentBinaryImagesStart + AMDesc.NumJITBinaries;
CurrentBinaryImagesStart +=
AMDesc.NumJITBinaries + AM.NativeDeviceCodeImages.size();

for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) {
// Construct properties from SYCLBIN metadata.
std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets =
convertAbstractModuleProperties(AM);

for (SYCLBIN::IRModule &IRM : AM.IRModules) {
for (size_t J = 0; J < AM.IRModules.size(); ++J) {
SYCLBIN::IRModule &IRM = AM.IRModules[J];

sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back();
DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION;
DeviceBinary.Kind = 4;
Expand All @@ -309,11 +324,12 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
DeviceBinary.PropertySetsEnd =
BinPropertySets.data() + BinPropertySets.size();
// Create an image from it.
JITDeviceBinaryImages.emplace_back(&DeviceBinary);
AMDesc.JITBinaries[J] = RTDeviceBinaryImage{&DeviceBinary};
}

for (const SYCLBIN::NativeDeviceCodeImage &NDCI :
AM.NativeDeviceCodeImages) {
for (size_t J = 0; J < AM.NativeDeviceCodeImages.size(); ++J) {
const SYCLBIN::NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J];

assert(NDCI.Metadata != nullptr);
PropertySet &NDCIMetadataProps = (*NDCI.Metadata)
[PropertySetRegistry::SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA];
Expand Down Expand Up @@ -346,7 +362,7 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
DeviceBinary.PropertySetsEnd =
BinPropertySets.data() + BinPropertySets.size();
// Create an image from it.
NativeDeviceBinaryImages.emplace_back(&DeviceBinary);
AMDesc.NativeBinaries[J] = RTDeviceBinaryImage{&DeviceBinary};
}
}
}
Expand Down Expand Up @@ -394,33 +410,44 @@ SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) {
}

std::vector<const RTDeviceBinaryImage *>
SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev) {
auto SelectCompatibleImages =
[&](const std::vector<RTDeviceBinaryImage> &Imgs) {
std::vector<const RTDeviceBinaryImage *> CompatImgs;
for (const RTDeviceBinaryImage &Img : Imgs)
if (doesDevSupportDeviceRequirements(Dev, Img) &&
doesImageTargetMatchDevice(Img, Dev))
CompatImgs.push_back(&Img);
return CompatImgs;
};

// Try with native images first.
std::vector<const RTDeviceBinaryImage *> NativeImgs =
SelectCompatibleImages(NativeDeviceBinaryImages);
if (!NativeImgs.empty())
return NativeImgs;

// If there were no native images, pick JIT images.
return SelectCompatibleImages(JITDeviceBinaryImages);
SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev, bundle_state State) {
auto GetCompatibleImage = [&](const RTDeviceBinaryImage *Imgs,
size_t NumImgs) {
const RTDeviceBinaryImage *CompatImagePtr =
std::find_if(Imgs, Imgs + NumImgs, [&](const RTDeviceBinaryImage &Img) {
return doesDevSupportDeviceRequirements(Dev, Img) &&
doesImageTargetMatchDevice(Img, Dev);
});
return (CompatImagePtr != Imgs + NumImgs) ? CompatImagePtr : nullptr;
};

std::vector<const RTDeviceBinaryImage *> Images;
for (size_t I = 0; I < getNumAbstractModules(); ++I) {
const AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I];
// If the target state is executable, try with native images first.
if (State == bundle_state::executable) {
if (const RTDeviceBinaryImage *CompatImagePtr = GetCompatibleImage(
AMDesc.NativeBinaries, AMDesc.NumNativeBinaries)) {
Images.push_back(CompatImagePtr);
continue;
}
}

// Otherwise, select the first compatible JIT binary.
if (const RTDeviceBinaryImage *CompatImagePtr =
GetCompatibleImage(AMDesc.JITBinaries, AMDesc.NumJITBinaries))
Images.push_back(CompatImagePtr);
}
return Images;
}

std::vector<const RTDeviceBinaryImage *>
SYCLBINBinaries::getBestCompatibleImages(devices_range Devs) {
SYCLBINBinaries::getBestCompatibleImages(devices_range Devs,
bundle_state State) {
std::set<const RTDeviceBinaryImage *> Images;
for (device_impl &Dev : Devs) {
std::vector<const RTDeviceBinaryImage *> BestImagesForDev =
getBestCompatibleImages(Dev);
getBestCompatibleImages(Dev, State);
Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend());
}
return {Images.cbegin(), Images.cend()};
Expand Down
20 changes: 16 additions & 4 deletions sycl/source/detail/syclbin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ struct SYCLBINBinaries {
~SYCLBINBinaries() = default;

std::vector<const RTDeviceBinaryImage *>
getBestCompatibleImages(device_impl &Dev);
getBestCompatibleImages(device_impl &Dev, bundle_state State);
std::vector<const RTDeviceBinaryImage *>
getBestCompatibleImages(devices_range Dev);
getBestCompatibleImages(devices_range Dev, bundle_state State);

uint8_t getState() const {
PropertySet &GlobalMetadata =
Expand All @@ -143,6 +143,10 @@ struct SYCLBINBinaries {
std::vector<_sycl_device_binary_property_set_struct> &
convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM);

size_t getNumAbstractModules() const {
return ParsedSYCLBIN.AbstractModules.size();
}

std::unique_ptr<char[]> SYCLBINContentCopy = nullptr;
size_t SYCLBINContentCopySize = 0;

Expand All @@ -156,8 +160,16 @@ struct SYCLBINBinaries {
BinaryPropertySets;

std::vector<sycl_device_binary_struct> DeviceBinaries;
std::vector<RTDeviceBinaryImage> JITDeviceBinaryImages;
std::vector<RTDeviceBinaryImage> NativeDeviceBinaryImages;

struct AbstractModuleDesc {
size_t NumJITBinaries;
size_t NumNativeBinaries;
RTDeviceBinaryImage *JITBinaries;
RTDeviceBinaryImage *NativeBinaries;
};

std::unique_ptr<AbstractModuleDesc[]> AbstractModuleDescriptors;
std::unique_ptr<RTDeviceBinaryImage[]> BinaryImages;
};

} // namespace detail
Expand Down
4 changes: 4 additions & 0 deletions sycl/test-e2e/SYCLBIN/basic_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce input-state SYCLBIN files.

// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input
// -- state.

Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/SYCLBIN/basic_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce object-state SYCLBIN files.

// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object
// -- state.

// UNSUPPORTED: hip
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.

// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/basic_kernel.cpp -o %t.syclbin
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out %t.syclbin
Expand Down
4 changes: 4 additions & 0 deletions sycl/test-e2e/SYCLBIN/dae_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce input-state SYCLBIN files.

// -- Test for using a kernel from a SYCLBIN with a dead argument.

// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin
Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/SYCLBIN/dae_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

// REQUIRES: aspect-usm_device_allocations

// -- Test for using a kernel from a SYCLBIN with a dead argument.
// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce object-state SYCLBIN files.

// UNSUPPORTED: hip
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.
// -- Test for using a kernel from a SYCLBIN with a dead argument.

// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin
// RUN: %{build} -o %t.out
Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/SYCLBIN/dg_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce input-state SYCLBIN files.

// -- Test for using device globals in SYCLBIN.

// UNSUPPORTED: opencl && gpu
// UNSUPPORTED-TRACKER: GSD-4287

// UNSUPPORTED: cuda
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19533

// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out %t.syclbin
Expand Down
10 changes: 4 additions & 6 deletions sycl/test-e2e/SYCLBIN/dg_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,15 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce object-state SYCLBIN files.

// -- Test for using device globals in SYCLBIN.

// UNSUPPORTED: opencl && gpu
// UNSUPPORTED-TRACKER: GSD-4287

// UNSUPPORTED: hip
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.

// XFAIL: cuda
// XFAIL-TRACKER: CMPLRLLVM-68859

// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out %t.syclbin
Expand Down
4 changes: 4 additions & 0 deletions sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce input-state SYCLBIN files.

// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing
// the use of optional kernel features.

Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@

// REQUIRES: aspect-usm_device_allocations

// UNSUPPORTED: cuda, hip
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
// binaries and can therefore not produce object-state SYCLBIN files.

// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing
// the use of optional kernel features.

// UNSUPPORTED: hip
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.

// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/optional_kernel_features.cpp -o %t.syclbin
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out %t.syclbin
Expand Down