diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 827ee61ef8110..35f313ceec3f5 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -31,6 +31,9 @@ #define __SYCL_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64" #define __SYCL_DEVICE_BINARY_TARGET_AMDGCN "amdgcn" #define __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU "native_cpu" +// JIT compilation targets for CUDA & HIP devices. +#define __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64 "llvm_nvptx64" +#define __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN "llvm_amdgcn" /// Device binary image property set names recognized by the SYCL runtime. /// Name must be consistent with diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d51bdb13de37f..6c04644639847 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1341,7 +1341,7 @@ void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, const char *getArchName(const device &Device) { namespace syclex = sycl::ext::oneapi::experimental; - auto Arch = Device.get_info(); + auto Arch = getSyclObjImpl(Device)->getDeviceArch(); switch (Arch) { #define __SYCL_ARCHITECTURE(ARCH, VAL) \ case syclex::architecture::ARCH: \ @@ -1369,45 +1369,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( // Here, we aim to select all the device images from the // [ItBegin, ItEnd) range that are AOT compiled for Device - // (checked using info::device::architecture) or JIT compiled. + // (checked using info::device::architecture) or JIT compiled. // This selection will then be passed to urDeviceSelectBinary // for final selection. - std::string_view ArchName = getArchName(Device); std::vector DeviceFilteredImgs; DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd)); for (auto It = ItBegin; It != ItEnd; ++It) { - auto PropRange = It->second->getDeviceRequirements(); - auto PropIt = - std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) { - return Prop->Name == std::string_view("compile_target"); - }); - auto AddImg = [&]() { DeviceFilteredImgs.push_back(It->second); }; - - // Device image has no compile_target property, so it is JIT compiled. - if (PropIt == PropRange.end()) { - AddImg(); - continue; - } - - // Device image has the compile_target property, so it is AOT compiled for - // some device, check if that architecture is Device's architecture. - auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray(); - CompileTargetByteArray.dropBytes(8); - std::string_view CompileTarget( - reinterpret_cast(&CompileTargetByteArray[0]), - CompileTargetByteArray.size()); - // Note: there are no explicit targets for CPUs, so on x86_64, - // intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64 - // compile target image. - // TODO: When dedicated targets for CPU are added, (i.e. - // -fsycl-targets=intel_cpu_spr etc.) remove this special - // handling of CPU targets. - if ((ArchName == CompileTarget) || - (CompileTarget == "spir64_x86_64" && - (ArchName == "x86_64" || ArchName == "intel_cpu_spr" || - ArchName == "intel_cpu_gnr"))) { - AddImg(); - } + if (doesImageTargetMatchDevice(*It->second, Device)) + DeviceFilteredImgs.push_back(It->second); } if (DeviceFilteredImgs.empty()) @@ -3405,6 +3374,67 @@ checkDevSupportDeviceRequirements(const device &Dev, return {}; } +bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img, + const device &Dev) { + auto PropRange = Img.getDeviceRequirements(); + auto PropIt = + std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) { + return Prop->Name == std::string_view("compile_target"); + }); + // Device image has no compile_target property, check target. + if (PropIt == PropRange.end()) { + sycl::backend BE = Dev.get_backend(); + const char *Target = Img.getRawData().DeviceTargetSpec; + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) { + return (BE == sycl::backend::opencl || + BE == sycl::backend::ext_oneapi_level_zero); + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) { + return Dev.is_cpu(); + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) { + return Dev.is_gpu() && (BE == sycl::backend::opencl || + BE == sycl::backend::ext_oneapi_level_zero); + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) { + return Dev.is_accelerator(); + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0 || + strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64) == 0) { + return BE == sycl::backend::ext_oneapi_cuda; + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0 || + strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN) == 0) { + return BE == sycl::backend::ext_oneapi_hip; + } + if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) { + return BE == sycl::backend::ext_oneapi_native_cpu; + } + assert(false && "Unexpected image target"); + return false; + } + + // Device image has the compile_target property, so it is AOT compiled for + // some device, check if that architecture is Device's architecture. + auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray(); + // Drop 8 bytes describing the size of the byte array. + CompileTargetByteArray.dropBytes(8); + std::string_view CompileTarget( + reinterpret_cast(&CompileTargetByteArray[0]), + CompileTargetByteArray.size()); + std::string_view ArchName = getArchName(Dev); + // Note: there are no explicit targets for CPUs, so on x86_64, + // intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64 + // compile target image. + // TODO: When dedicated targets for CPU are added, (i.e. + // -fsycl-targets=intel_cpu_spr etc.) remove this special + // handling of CPU targets. + return ((ArchName == CompileTarget) || + (CompileTarget == "spir64_x86_64" && + (ArchName == "x86_64" || ArchName == "intel_cpu_spr" || + ArchName == "intel_cpu_gnr"))); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 8ab12229fc6c5..0586a41a83540 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -58,6 +58,9 @@ checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &BinImages, const NDRDescT &NDRDesc = {}); +bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img, + const device &Dev); + // This value must be the same as in libdevice/device_itt.h. // See sycl/doc/design/ITTAnnotations.md for more info. static constexpr uint32_t inline ITTSpecConstId = 0xFF747469; diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 5e81084c9aaaa..a5bf9245c6d82 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -307,35 +307,6 @@ std::vector get_kernel_ids() { bool is_compatible(const std::vector &KernelIDs, const device &Dev) { if (KernelIDs.empty()) return true; - // TODO: also need to check that the architecture specified by the - // "-fsycl-targets" flag matches the device when we are able to get the - // device's arch. - auto doesImageTargetMatchDevice = [](const device &Dev, - const detail::RTDeviceBinaryImage &Img) { - const char *Target = Img.getRawData().DeviceTargetSpec; - auto BE = Dev.get_backend(); - if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) { - return (BE == sycl::backend::opencl || - BE == sycl::backend::ext_oneapi_level_zero); - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == - 0) { - return Dev.is_cpu(); - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) { - return Dev.is_gpu() && (BE == sycl::backend::opencl || - BE == sycl::backend::ext_oneapi_level_zero); - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) { - return Dev.is_accelerator(); - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) { - return BE == sycl::backend::ext_oneapi_cuda; - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) { - return BE == sycl::backend::ext_oneapi_hip; - } else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) { - return BE == sycl::backend::ext_oneapi_native_cpu; - } - - return false; - }; - // One kernel may be contained in several binary images depending on the // number of targets. This kernel is compatible with the device if there is // at least one image (containing this kernel) whose aspects are supported by @@ -347,7 +318,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { if (std::none_of(BinImages.begin(), BinImages.end(), [&](const detail::RTDeviceBinaryImage *Img) { return doesDevSupportDeviceRequirements(Dev, *Img) && - doesImageTargetMatchDevice(Dev, *Img); + doesImageTargetMatchDevice(*Img, Dev); })) return false; } diff --git a/sycl/unittests/program_manager/CompileTarget.cpp b/sycl/unittests/program_manager/CompileTarget.cpp index 4d0bfaae055c1..f39e4d717c59e 100644 --- a/sycl/unittests/program_manager/CompileTarget.cpp +++ b/sycl/unittests/program_manager/CompileTarget.cpp @@ -59,12 +59,14 @@ class NDRangeKernel; class RangeKernel; class NoDeviceKernel; class JITFallbackKernel; +class SKLOnlyKernel; MOCK_INTEGRATION_HEADER(SingleTaskKernel) MOCK_INTEGRATION_HEADER(NDRangeKernel) MOCK_INTEGRATION_HEADER(RangeKernel) MOCK_INTEGRATION_HEADER(NoDeviceKernel) MOCK_INTEGRATION_HEADER(JITFallbackKernel) +MOCK_INTEGRATION_HEADER(SKLOnlyKernel) static sycl::unittest::MockDeviceImage Img[] = { sycl::unittest::generateDefaultImage({"SingleTaskKernel"}), @@ -93,7 +95,8 @@ static sycl::unittest::MockDeviceImage Img[] = { sycl::unittest::generateDefaultImage({"JITFallbackKernel"}), sycl::unittest::generateImageWithCompileTarget("JITFallbackKernel", "intel_gpu_bdw"), -}; + sycl::unittest::generateImageWithCompileTarget("SKLOnlyKernel", + "intel_gpu_skl")}; static sycl::unittest::MockDeviceImageArray ImgArray{Img}; @@ -336,3 +339,10 @@ TEST_F(CompileTargetTest, JITFallbackKernel) { ASSERT_EQ(createWithILLog.size(), 1U); EXPECT_EQ(createWithILLog.back(), "JITFallbackKernel"); } + +TEST_F(CompileTargetTest, IsCompatible) { + device Skl{archSelector(syclex::architecture::intel_gpu_skl)}; + EXPECT_TRUE(sycl::is_compatible(Skl)); + device Pvc{archSelector(syclex::architecture::intel_gpu_pvc)}; + EXPECT_FALSE(sycl::is_compatible(Pvc)); +}