Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/source/detail/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#define __SYCL_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64"
#define __SYCL_DEVICE_BINARY_TARGET_AMDGCN "amdgcn"
#define __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU "native_cpu"
// JIT compilation targets for CUDA & HIP devices.
#define __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64 "llvm_nvptx64"
#define __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN "llvm_amdgcn"

/// Device binary image property set names recognized by the SYCL runtime.
/// Name must be consistent with
Expand Down
100 changes: 65 additions & 35 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image,

const char *getArchName(const device &Device) {
namespace syclex = sycl::ext::oneapi::experimental;
auto Arch = Device.get_info<syclex::info::device::architecture>();
auto Arch = getSyclObjImpl(Device)->getDeviceArch();
switch (Arch) {
#define __SYCL_ARCHITECTURE(ARCH, VAL) \
case syclex::architecture::ARCH: \
Expand Down Expand Up @@ -1369,45 +1369,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap(

// Here, we aim to select all the device images from the
// [ItBegin, ItEnd) range that are AOT compiled for Device
// (checked using info::device::architecture) or JIT compiled.
// (checked using info::device::architecture) or JIT compiled.
// This selection will then be passed to urDeviceSelectBinary
// for final selection.
std::string_view ArchName = getArchName(Device);
std::vector<RTDeviceBinaryImage *> DeviceFilteredImgs;
DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd));
for (auto It = ItBegin; It != ItEnd; ++It) {
auto PropRange = It->second->getDeviceRequirements();
auto PropIt =
std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) {
return Prop->Name == std::string_view("compile_target");
});
auto AddImg = [&]() { DeviceFilteredImgs.push_back(It->second); };

// Device image has no compile_target property, so it is JIT compiled.
if (PropIt == PropRange.end()) {
AddImg();
continue;
}

// Device image has the compile_target property, so it is AOT compiled for
// some device, check if that architecture is Device's architecture.
auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray();
CompileTargetByteArray.dropBytes(8);
std::string_view CompileTarget(
reinterpret_cast<const char *>(&CompileTargetByteArray[0]),
CompileTargetByteArray.size());
// Note: there are no explicit targets for CPUs, so on x86_64,
// intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
// compile target image.
// TODO: When dedicated targets for CPU are added, (i.e.
// -fsycl-targets=intel_cpu_spr etc.) remove this special
// handling of CPU targets.
if ((ArchName == CompileTarget) ||
(CompileTarget == "spir64_x86_64" &&
(ArchName == "x86_64" || ArchName == "intel_cpu_spr" ||
ArchName == "intel_cpu_gnr"))) {
AddImg();
}
if (doesImageTargetMatchDevice(*It->second, Device))
DeviceFilteredImgs.push_back(It->second);
}

if (DeviceFilteredImgs.empty())
Expand Down Expand Up @@ -3405,6 +3374,67 @@ checkDevSupportDeviceRequirements(const device &Dev,
return {};
}

bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
const device &Dev) {
auto PropRange = Img.getDeviceRequirements();
auto PropIt =
std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) {
return Prop->Name == std::string_view("compile_target");
});
// Device image has no compile_target property, check target.
if (PropIt == PropRange.end()) {
sycl::backend BE = Dev.get_backend();
const char *Target = Img.getRawData().DeviceTargetSpec;
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
return (BE == sycl::backend::opencl ||
BE == sycl::backend::ext_oneapi_level_zero);
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) {
return Dev.is_cpu();
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) {
return Dev.is_gpu() && (BE == sycl::backend::opencl ||
BE == sycl::backend::ext_oneapi_level_zero);
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) {
return Dev.is_accelerator();
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0 ||
strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64) == 0) {
return BE == sycl::backend::ext_oneapi_cuda;
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0 ||
strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN) == 0) {
return BE == sycl::backend::ext_oneapi_hip;
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) {
return BE == sycl::backend::ext_oneapi_native_cpu;
}
assert(false && "Unexpected image target");
return false;
}

// Device image has the compile_target property, so it is AOT compiled for
// some device, check if that architecture is Device's architecture.
auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray();
// Drop 8 bytes describing the size of the byte array.
CompileTargetByteArray.dropBytes(8);
std::string_view CompileTarget(
reinterpret_cast<const char *>(&CompileTargetByteArray[0]),
CompileTargetByteArray.size());
std::string_view ArchName = getArchName(Dev);
// Note: there are no explicit targets for CPUs, so on x86_64,
// intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
// compile target image.
// TODO: When dedicated targets for CPU are added, (i.e.
// -fsycl-targets=intel_cpu_spr etc.) remove this special
// handling of CPU targets.
return ((ArchName == CompileTarget) ||
(CompileTarget == "spir64_x86_64" &&
(ArchName == "x86_64" || ArchName == "intel_cpu_spr" ||
ArchName == "intel_cpu_gnr")));
}

} // namespace detail
} // namespace _V1
} // namespace sycl
Expand Down
3 changes: 3 additions & 0 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ checkDevSupportDeviceRequirements(const device &Dev,
const RTDeviceBinaryImage &BinImages,
const NDRDescT &NDRDesc = {});

bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
const device &Dev);

// This value must be the same as in libdevice/device_itt.h.
// See sycl/doc/design/ITTAnnotations.md for more info.
static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
Expand Down
31 changes: 1 addition & 30 deletions sycl/source/kernel_bundle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,35 +307,6 @@ std::vector<kernel_id> get_kernel_ids() {
bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
if (KernelIDs.empty())
return true;
// TODO: also need to check that the architecture specified by the
// "-fsycl-targets" flag matches the device when we are able to get the
// device's arch.
auto doesImageTargetMatchDevice = [](const device &Dev,
const detail::RTDeviceBinaryImage &Img) {
const char *Target = Img.getRawData().DeviceTargetSpec;
auto BE = Dev.get_backend();
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
return (BE == sycl::backend::opencl ||
BE == sycl::backend::ext_oneapi_level_zero);
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) ==
0) {
return Dev.is_cpu();
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) {
return Dev.is_gpu() && (BE == sycl::backend::opencl ||
BE == sycl::backend::ext_oneapi_level_zero);
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) {
return Dev.is_accelerator();
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) {
return BE == sycl::backend::ext_oneapi_cuda;
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) {
return BE == sycl::backend::ext_oneapi_hip;
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) {
return BE == sycl::backend::ext_oneapi_native_cpu;
}

return false;
};

// One kernel may be contained in several binary images depending on the
// number of targets. This kernel is compatible with the device if there is
// at least one image (containing this kernel) whose aspects are supported by
Expand All @@ -347,7 +318,7 @@ bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
if (std::none_of(BinImages.begin(), BinImages.end(),
[&](const detail::RTDeviceBinaryImage *Img) {
return doesDevSupportDeviceRequirements(Dev, *Img) &&
doesImageTargetMatchDevice(Dev, *Img);
doesImageTargetMatchDevice(*Img, Dev);
}))
return false;
}
Expand Down
12 changes: 11 additions & 1 deletion sycl/unittests/program_manager/CompileTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,14 @@ class NDRangeKernel;
class RangeKernel;
class NoDeviceKernel;
class JITFallbackKernel;
class SKLOnlyKernel;

MOCK_INTEGRATION_HEADER(SingleTaskKernel)
MOCK_INTEGRATION_HEADER(NDRangeKernel)
MOCK_INTEGRATION_HEADER(RangeKernel)
MOCK_INTEGRATION_HEADER(NoDeviceKernel)
MOCK_INTEGRATION_HEADER(JITFallbackKernel)
MOCK_INTEGRATION_HEADER(SKLOnlyKernel)

static sycl::unittest::MockDeviceImage Img[] = {
sycl::unittest::generateDefaultImage({"SingleTaskKernel"}),
Expand Down Expand Up @@ -93,7 +95,8 @@ static sycl::unittest::MockDeviceImage Img[] = {
sycl::unittest::generateDefaultImage({"JITFallbackKernel"}),
sycl::unittest::generateImageWithCompileTarget("JITFallbackKernel",
"intel_gpu_bdw"),
};
sycl::unittest::generateImageWithCompileTarget("SKLOnlyKernel",
"intel_gpu_skl")};

static sycl::unittest::MockDeviceImageArray<std::size(Img)> ImgArray{Img};

Expand Down Expand Up @@ -336,3 +339,10 @@ TEST_F(CompileTargetTest, JITFallbackKernel) {
ASSERT_EQ(createWithILLog.size(), 1U);
EXPECT_EQ(createWithILLog.back(), "JITFallbackKernel");
}

TEST_F(CompileTargetTest, IsCompatible) {
device Skl{archSelector(syclex::architecture::intel_gpu_skl)};
EXPECT_TRUE(sycl::is_compatible<SKLOnlyKernel>(Skl));
device Pvc{archSelector(syclex::architecture::intel_gpu_pvc)};
EXPECT_FALSE(sycl::is_compatible<SKLOnlyKernel>(Pvc));
}
Loading