diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e192f5c701d8a..82d0644b2a23e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2719,7 +2719,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } if (GenerateIntrinsics && - !(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) { + !(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()))) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIceil: case Builtin::BIceilf: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f8f2ff94e2a2c..263f41171a359 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5533,7 +5533,7 @@ class OffloadingActionBuilder final { // AOT compilation. bool SYCLDeviceLibLinked = false; Action *NativeCPULib = nullptr; - if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) { + if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) { bool UseJitLink = IsSPIR && Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final { ++NumOfDeviceLibLinked; Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); - if (TC->getTriple().isNVPTX() || - (TC->getTriple().isSPIR() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga)) { + if (TC->getTriple().isSPIR() && + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_fpga) { auto *SYCLDeviceLibsInputAction = C.MakeAction(*InputArg, types::TY_Object); auto *SYCLDeviceLibsUnbundleAction = diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 37589f00c4140..d6e1d96502a4e 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // spir64 target is actually JIT compilation, so we defer selection of // bfloat16 libraries to runtime. For AOT we need libraries, but skip - // for Nvidia. - NeedLibs = - Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX(); + // for Nvidia and AMD. + NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch && + !Triple.isNVPTX() && !Triple.isAMDGCN(); UseNative = false; if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && C.hasOffloadToolChain()) { @@ -212,6 +212,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SmallVector LibraryList; const llvm::opt::ArgList &Args = C.getArgs(); + // For NVPTX and AMDGCN we only use one single bitcode library and ignore + // manually specified SYCL device libraries. + bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN(); + struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; @@ -233,10 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) NoDeviceLibs = true; + bool PrintUnusedLibWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal"); + DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) || + (K == "internal" && NoDeviceLibs); + PrintUnusedLibWarning = false; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -247,10 +254,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = true && !NoDeviceLibs; + DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs; + PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs; } + if (PrintUnusedLibWarning) + C.getDriver().Diag(diag::warn_ignored_clang_option) + << A->getSpelling() << A->getAsString(Args); } } + + if (TargetTriple.isNVPTX() && !NoDeviceLibs) + LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + + if (TargetTriple.isAMDGCN() && !NoDeviceLibs) + LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc")); + + if (IgnoreSingleLibs && !NoDeviceLibs) + return LibraryList; + using SYCLDeviceLibsList = SmallVector; const SYCLDeviceLibsList SYCLDeviceWrapperLibs = { @@ -304,10 +325,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; - if (TargetTriple.isNVPTX() || - (TargetTriple.isSPIR() && - TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)) - // For NVidia or FPGA, we are unbundling objects. + if (TargetTriple.isSPIR() && + TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) // For new offload model, we use packaged .bc files. @@ -323,7 +343,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, }; addLibraries(SYCLDeviceWrapperLibs); - if (IsSpirvAOT || TargetTriple.isNVPTX()) + if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); bool NativeBfloatLibs; @@ -551,7 +571,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand( this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; StringRef LibPostfix = ".bc"; - if (IsNVPTX || IsFPGA) { + if (IsFPGA) { LibPostfix = ".o"; if (HostTC->getTriple().isWindowsMSVCEnvironment() && C.getDriver().IsCLMode()) diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp index af94dada263d1..5c282449dc851 100644 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp @@ -5,8 +5,10 @@ // intrinsics. This allows the driver to link in the libdevice definitions for // cosf etc. later in the driver flow. -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index 389cdc641119c..cf46fc5b696d5 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -37,17 +37,19 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object -// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object +// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl) /// Check that we only unbundle an archive once. // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \ diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index db5e024363b02..324c5aa9cdd0e 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -53,28 +53,22 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object -// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object -// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl) // /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ @@ -97,28 +91,22 @@ // CHK-PHASES: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35) -// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object -// CHK-PHASES: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object -// CHK-PHASES: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object -// CHK-PHASES: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35) - // CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35) -// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35) -// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35) -// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35) -// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object -// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35) +// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35) +// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35) +// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35) +// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35) +// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object +// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl) /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index f05492d777977..dff0f66ba25b9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL ) foreach( file subnormal_use_default subnormal_disable ) link_bc( TARGET ${file} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll ) install( FILES $ ARCHIVE @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Enable SPIR-V builtin function declarations, so they don't # have to be explicity declared in the soruce. list( APPEND flags -Xclang -fdeclare-spirv-builtins) - set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 2a843dd67fb8c..4711b9eb3e3b8 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -86,12 +86,15 @@ endfunction() # Custom target to create # * INPUT ... # List of bytecode files to link together +# * RSP_DIR +# Directory where a response file should be placed +# (Only needed for WIN32 or CYGWIN) # * DEPENDENCIES ... # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG "" - "TARGET" + "TARGET;RSP_DIR" "INPUTS;DEPENDENCIES" ${ARGN} ) @@ -100,7 +103,7 @@ function(link_bc) if( WIN32 OR CYGWIN ) # Create a response file in case the number of inputs exceeds command-line # character limits on certain platforms. - file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) + file( TO_CMAKE_PATH ${ARG_RSP_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) file( WRITE ${RSP_FILE} ${RSP_INPUT} ) @@ -216,6 +219,50 @@ function(add_libclc_alias alias target) endfunction(add_libclc_alias alias target) +# Runs opt and prepare-builtins on a bitcode file specified by lib_tgt +# +# ARGUMENTS: +# * LIB_TGT string +# Target name that becomes dependent on the out file named LIB_TGT.bc +# * IN_FILE string +# Target name of the input bytecode file +# * OUT_DIR string +# Name of the directory where the output should be placed +# * DEPENDENCIES ... +# List of extra dependencies to inject +function(process_bc out_file) + cmake_parse_arguments(ARG + "" + "LIB_TGT;IN_FILE;OUT_DIR" + "OPT_FLAGS;DEPENDENCIES" + ${ARGN}) + add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${ARG_LIB_TGT}.bc + ${ARG_IN_FILE} + DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES} + ) + add_custom_target( ${ARG_LIB_TGT} + ALL DEPENDS ${ARG_LIB_TGT}.bc + ) + set_target_properties( ${ARG_LIB_TGT} + PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc + ) + + set( builtins_opt_lib $ ) + + # Add prepare target + add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} + COMMAND ${prepare_builtins_exe} -o ${ARG_OUT_DIR}/${out_file} + ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} ${prepare_builtins_target} ) + add_custom_target( prepare-${out_file} ALL + DEPENDS ${ARG_OUT_DIR}/${out_file} + ) + set_target_properties( prepare-${out_file} + PROPERTIES TARGET_FILE ${ARG_OUT_DIR}/${out_file} + ) +endfunction() + # add_libclc_builtin_set(arch_suffix # TRIPLE string # Triple used to compile @@ -291,44 +338,28 @@ macro(add_libclc_builtin_set arch_suffix) link_bc( TARGET ${builtins_link_lib_tgt} INPUTS ${bytecode_files} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} DEPENDENCIES ${builtins_comp_lib_tgt} ) set( builtins_link_lib $ ) - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) + add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${builtins_link_lib} prepare_builtins ) - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} - PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc - ) + set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - set( builtins_opt_lib $ ) - - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} - COMMAND ${prepare_builtins_exe} -o ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL - DEPENDS ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) - set_target_properties( prepare-${obj_suffix} - PROPERTIES TARGET_FILE ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) + process_bc(${arch_suffix}.bc + LIB_TGT ${builtins_opt_lib_tgt} + IN_FILE ${builtins_link_lib} + OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + OPT_FLAGS ${ARG_OPT_FLAGS} + DEPENDENCIES ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. + set( obj_suffix ${arch_suffix}.bc ) add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix}) set( builtins_lib $ ) diff --git a/libdevice/CMakeLists.txt b/libdevice/CMakeLists.txt index b6078f9a44cf8..564391547677f 100644 --- a/libdevice/CMakeLists.txt +++ b/libdevice/CMakeLists.txt @@ -1,6 +1,8 @@ # Utility project providing various functionalities for SPIR-V devices # without native support of these functionalities. +include(${CMAKE_CURRENT_SOURCE_DIR}/../libclc/cmake/modules/AddLibclc.cmake) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c1aac6d017eff..5314c2bc7ad33 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,26 +1,31 @@ set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(obj_new_offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) - set(lib-suffix obj) - set(new-offload-lib-suffix new.obj) + set(obj-suffix obj) + set(obj-new-offload-suffix new.obj) set(spv_binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") set(install_dest_spv bin) - set(devicelib_host_static sycl-devicelib-host.lib) - set(devicelib_host_static_new_offload sycl-devicelib-host.new.lib) + set(devicelib_host_static_obj sycl-devicelib-host.lib) + set(devicelib_host_static_obj-new-offload sycl-devicelib-host.new.lib) else() - set(lib-suffix o) - set(new-offload-lib-suffix new.o) + set(obj-suffix o) + set(obj-new-offload-suffix new.o) set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(install_dest_spv lib${LLVM_LIBDIR_SUFFIX}) - set(devicelib_host_static libsycl-devicelib-host.a) - set(devicelib_host_static_new_offload libsycl-devicelib-host.new.a) + set(devicelib_host_static_obj libsycl-devicelib-host.a) + set(devicelib_host_static_obj-new-offload libsycl-devicelib-host.new.a) endif() +set(spv-suffix spv) +set(bc-suffix bc) set(bc_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(install_dest_lib lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj-new-offload lib${LLVM_LIBDIR_SUFFIX}) set(install_dest_bc lib${LLVM_LIBDIR_SUFFIX}) set(clang $) set(llvm-ar $) +set(llvm-link $) +set(llvm-opt $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -46,112 +51,149 @@ if (NOT SYCL_LIBDEVICE_GCC_TOOLCHAIN STREQUAL "") list(APPEND compile_opts "--gcc-toolchain=${SYCL_LIBDEVICE_GCC_TOOLCHAIN}") endif() -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") - list(APPEND compile_opts - "-fno-sycl-libspirv" - "-fno-bundle-offload-arch" - "-nocudalib" - "--cuda-gpu-arch=sm_50") -endif() - if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) endif() -add_custom_target(libsycldevice-obj) -add_custom_target(libsycldevice-obj-new-offload) -add_custom_target(libsycldevice-spv) -add_custom_target(libsycldevice-bc) - -add_custom_target(libsycldevice DEPENDS - libsycldevice-obj - libsycldevice-bc - libsycldevice-obj-new-offload - libsycldevice-spv) - -function(add_devicelib_obj obj_filename) - cmake_parse_arguments(OBJ "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-obj-file ${obj_binary_dir}/${obj_filename}.${lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target ${obj_filename}-obj) - add_custom_target(${devicelib-obj-target} DEPENDS ${devicelib-obj-file}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target}) - install(FILES ${devicelib-obj-file} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - - set(devicelib-obj-file-new-offload ${obj_new_offload_binary_dir}/${obj_filename}.${new-offload-lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file-new-offload} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file-new-offload} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target-new-offload ${obj_filename}-new-offload-obj) - add_custom_target(${devicelib-obj-target-new-offload} DEPENDS ${devicelib-obj-file-new-offload}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target-new-offload}) - install(FILES ${devicelib-obj-file-new-offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) -endfunction() +add_custom_target(libsycldevice) + +set(filetypes obj obj-new-offload spv bc) + +foreach(filetype IN LISTS filetypes) + add_custom_target(libsycldevice-${filetype}) + add_dependencies(libsycldevice libsycldevice-${filetype}) +endforeach() -function(add_devicelib_spv spv_filename) - cmake_parse_arguments(SPV "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-spv-file ${spv_binary_dir}/${spv_filename}.spv) - add_custom_command(OUTPUT ${devicelib-spv-file} - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} ${SPV_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${SPV_SRC} - -o ${devicelib-spv-file} - MAIN_DEPENDENCY ${SPV_SRC} - DEPENDS ${SPV_DEP} - VERBATIM) - set(devicelib-spv-target ${spv_filename}-spv) - add_custom_target(${devicelib-spv-target} DEPENDS ${devicelib-spv-file}) - add_dependencies(libsycldevice-spv ${devicelib-spv-target}) - install(FILES ${devicelib-spv-file} - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) +# For NVPTX and AMDGCN each device libary is compiled into a single bitcode +# file and all files created this way are linked into one large bitcode +# library. +# Additional compilation options are needed for compiling each device library. +set(devicelib_arch) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch cuda) + set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda" + "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") + set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false") +endif() +if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch amd) + set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" + "-Xsycl-target-backend" "--offload-arch=gfx940") + set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false") +endif() + + +set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv) +set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir) +set(obj-new-offload_device_compile_opts -fsycl -c --offload-new-driver + -foffload-lto=thin ${sycl_targets_opt}) +set(obj_device_compile_opts -fsycl -c ${sycl_targets_opt}) + +# Compiles and installs a single device library. +# +# Arguments: +# * FILETYPE +# Specifies the output file type of the compilation and its repsective +# installation directory. +# Adds a new target that the libsycldevice-FILETYPE target will depend on. +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(compile_lib filename) + cmake_parse_arguments(ARG + "" + "FILETYPE" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + set(devicelib-file + ${${ARG_FILETYPE}_binary_dir}/${filename}.${${ARG_FILETYPE}-suffix}) + + add_custom_command( + OUTPUT ${devicelib-file} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + ${CMAKE_CURRENT_SOURCE_DIR}/${ARG_SRC} -o ${devicelib-file} + MAIN_DEPENDENCY ${ARG_SRC} + DEPENDS ${ARG_DEPENDENCIES} + VERBATIM + ) + set(devicelib-${ARG_FILETYPE}-target ${filename}-${ARG_FILETYPE}) + add_custom_target(${devicelib-${ARG_FILETYPE}-target} + DEPENDS ${devicelib-file}) + add_dependencies(libsycldevice-${ARG_FILETYPE} + ${devicelib-${ARG_FILETYPE}-target}) + + install( FILES ${devicelib-file} + DESTINATION ${install_dest_${ARG_FILETYPE}} + COMPONENT libsycldevice) endfunction() -function(add_devicelib_bc bc_filename) - cmake_parse_arguments(BC "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-bc-file ${bc_binary_dir}/${bc_filename}.bc) - add_custom_command(OUTPUT ${devicelib-bc-file} - COMMAND ${clang} -fsycl-device-only - -fsycl-device-obj=llvmir ${compile_opts} - ${BC_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${BC_SRC} - -o ${devicelib-bc-file} - MAIN_DEPENDENCY ${BC_SRC} - DEPENDS ${BC_DEP} - VERBATIM) - set(devicelib-bc-target ${bc_filename}-bc) - add_custom_target(${devicelib-bc-target} DEPENDS ${devicelib-bc-file}) - add_dependencies(libsycldevice-bc ${devicelib-bc-target}) - install(FILES ${devicelib-bc-file} - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) +# Appends a list to a global property. +# +# Arguments: +# * PROPERTY_NAME +# The name of the property to append to. +function(append_to_property list) + cmake_parse_arguments(ARG + "" + "PROPERTY_NAME" + "" + ${ARGN}) + get_property(new_property GLOBAL PROPERTY ${ARG_PROPERTY_NAME}) + list(APPEND new_property ${list}) + set_property(GLOBAL PROPERTY ${ARG_PROPERTY_NAME} ${new_property}) endfunction() -function(add_devicelib filename) - cmake_parse_arguments(DL "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - add_devicelib_spv(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_bc(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_obj(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) +# Creates device libaries for all filetypes. +# Adds bitcode library files additionally for each devicelib_arch target and +# adds the created file to an arch specific global property. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(add_devicelibs filename) + cmake_parse_arguments(ARG + "" + "" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + foreach(filetype IN LISTS filetypes) + compile_lib(${filename} + FILETYPE ${filetype} + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) + endforeach() + + foreach(arch IN LISTS devicelib_arch) + compile_lib(${filename}--${arch} + FILETYPE bc + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${bc_device_compile_opts} + ${compile_opts_${arch}}) + + append_to_property(${bc_binary_dir}/${filename}--${arch}.bc + PROPERTY_NAME BC_DEVICE_LIBS_${arch}) + endforeach() endfunction() +# Set up the dependency lists for the libdevice libraries set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) @@ -174,37 +216,91 @@ if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) # Include NativeCPU UR adapter path to enable finding header file with state struct. # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. # Todo: add versions for other targets (for cross-compilation) - add_devicelib_bc(libsycl-nativecpu_utils SRC nativecpu_utils.cpp DEP ${itt_obj_deps} EXTRA_ARGS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu) + compile_lib(libsycl-nativecpu_utils + FILETYPE bc + SRC nativecpu_utils.cpp + DEPENDENCIES ${itt_obj_deps} + EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only + -fsycl-device-obj=llvmir) endif() -add_devicelib(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp DEP ${itt_obj_deps}) - -add_devicelib(libsycl-crt SRC crt_wrapper.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-complex SRC complex_wrapper.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} ) -add_devicelib(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) +# Add all device libraries for each filetype except for the Intel math function +# ones. +add_devicelibs(libsycl-itt-stubs + SRC itt_stubs.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-compiler-wrappers + SRC itt_compiler_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-user-wrappers + SRC itt_user_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) + +add_devicelibs(libsycl-crt + SRC crt_wrapper.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-complex + SRC complex_wrapper.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-complex-fp64 + SRC complex_wrapper_fp64.cpp + DEPENDENCIES ${complex_obj_deps} ) +add_devicelibs(libsycl-cmath + SRC cmath_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-cmath-fp64 + SRC cmath_wrapper_fp64.cpp + DEPENDENCIES ${cmath_obj_deps} ) +add_devicelibs(libsycl-imf + SRC imf_wrapper.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-fp64 + SRC imf_wrapper_fp64.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-bf16 + SRC imf_wrapper_bf16.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) if(MSVC) - add_devicelib(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) + add_devicelibs(libsycl-msvc-math + SRC msvc_math.cpp + DEPENDENCIES ${cmath_obj_deps}) else() - add_devicelib(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) + add_devicelibs(libsycl-sanitizer + SRC sanitizer_utils.cpp + DEPENDENCIES ${sanitizer_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) endif() -add_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) -add_devicelib(libsycl-fallback-cstring SRC fallback-cstring.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps}) -add_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps}) - +add_devicelibs(libsycl-fallback-cassert + SRC fallback-cassert.cpp + DEPENDENCIES ${crt_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) +add_devicelibs(libsycl-fallback-cstring + SRC fallback-cstring.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-fallback-complex + SRC fallback-complex.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-complex-fp64 + SRC fallback-complex-fp64.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-cmath + SRC fallback-cmath.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-cmath-fp64 + SRC fallback-cmath-fp64.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-bfloat16 + SRC fallback-bfloat16.cpp + DEPENDENCIES ${bfloat16_obj_deps}) +add_devicelibs(libsycl-native-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${bfloat16_obj_deps}) + +# Create dependency and source lists for Intel math function libraries. file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) set(imf_src_dir ${CMAKE_CURRENT_SOURCE_DIR}) @@ -217,8 +313,7 @@ set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op. imf/imf_inline_fp32.cpp imf/imf_fp32_dl.cpp) set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp - imf_utils/double_convert.cpp - imf_utils/fp64_round.cpp + imf_utils/double_convert.cpp imf_utils/fp64_round.cpp imf/imf_inline_fp64.cpp imf/imf_fp64_dl.cpp) set(imf_fallback_bf16_deps device.h device_imf.hpp imf_bf16.hpp @@ -275,320 +370,197 @@ if (NOT WIN32) add_imf_host_cxx_flags_compile_flags_if_supported("-fcf-protection=full") endif() -add_custom_command(OUTPUT ${imf_fp32_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP32 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp32_deps}) - -add_custom_command(OUTPUT ${imf_fp64_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP64 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp64_deps}) - -add_custom_command(OUTPUT ${imf_bf16_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=BF16 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_bf16_deps}) - -add_custom_target(get_imf_fallback_fp32 DEPENDS ${imf_fp32_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf.spv - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf.bc - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_fp64 DEPENDS ${imf_fp64_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_bf16 DEPENDS ${imf_bf16_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_target(imf_fallback_fp32_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf.spv) -add_custom_target(imf_fallback_fp32_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf.bc) -add_custom_target(imf_fallback_fp32_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix}) -add_custom_target(imf_fallback_fp32_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fallback_fp32_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp32_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp32_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp32_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp32_new_offload_obj) - -add_custom_target(imf_fallback_fp64_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv) -add_custom_target(imf_fallback_fp64_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc) -add_custom_target(imf_fallback_fp64_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix}) -add_custom_target(imf_fallback_fp64_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix}) -add_custom_target(imf_fallback_fp64_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp64_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp64_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp64_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp64_new_offload_obj) - -add_custom_target(imf_fallback_bf16_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv) -add_custom_target(imf_fallback_bf16_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc) -add_custom_target(imf_fallback_bf16_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix}) -add_custom_target(imf_fallback_bf16_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix}) -add_custom_target(imf_fallback_bf16_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_bf16_spv) -add_dependencies(libsycldevice-bc imf_fallback_bf16_bc) -add_dependencies(libsycldevice-obj imf_fallback_bf16_obj) -add_dependencies(libsycldevice-obj imf_fallback_bf16_new_offload_obj) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_target(imf_fp32_host_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fp64_host_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${lib-suffix}) -add_custom_target(imf_bf16_host_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${lib-suffix}) - -add_custom_target(imf_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix}) -add_custom_target(imf_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix}) -add_custom_target(imf_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix}) - -add_custom_target(imf_host_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static} - ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS imf_fp32_host_obj imf_fallback_fp32_host_obj - DEPENDS imf_fp64_host_obj imf_fallback_fp64_host_obj - DEPENDS imf_bf16_host_obj imf_fallback_bf16_host_obj - DEPENDS sycl-compiler - VERBATIM) -add_custom_target(imf_host_new_offload_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static_new_offload}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static_new_offload} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static_new_offload} - ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS imf_fp32_host_new_offload_obj imf_fallback_fp32_host_new_offload_obj - DEPENDS imf_fp64_host_new_offload_obj imf_fallback_fp64_host_new_offload_obj - DEPENDS imf_bf16_host_new_offload_obj imf_fallback_bf16_host_new_offload_obj - DEPENDS sycl-compiler - VERBATIM) -add_dependencies(libsycldevice-obj imf_host_obj) -add_dependencies(libsycldevice-obj imf_host_new_offload_obj) -install(FILES ${spv_binary_dir}/libsycl-fallback-imf.spv - ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) - -install(FILES ${bc_binary_dir}/libsycl-fallback-imf.bc - ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - ${obj_binary_dir}/${devicelib_host_static} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - ${obj_binary_dir}/${devicelib_host_static_new_offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) +set(obj-new-offload_host_compile_opts ${imf_host_cxx_flags} --offload-new-driver + -foffload-lto=thin) +set(obj_host_compile_opts ${imf_host_cxx_flags}) + +foreach(datatype IN ITEMS fp32 fp64 bf16) + string(TOUPPER ${datatype} upper_datatype) + + add_custom_command( + OUTPUT ${imf_${datatype}_fallback_src} + COMMAND ${CMAKE_COMMAND} + -D SRC_DIR=${imf_src_dir} + -D DEST_DIR=${imf_fallback_src_dir} + -D IMF_TARGET=${upper_datatype} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake + DEPENDS ${imf_fallback_${datatype}_deps}) + + add_custom_target(get_imf_fallback_${datatype} + DEPENDS ${imf_${datatype}_fallback_src}) +endforeach() + +# Adds Intel math functions libraries. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * DIR +# The directory where the output file should be located in. +# * FTYPE +# Filetype of the output library file (e.g. 'bc'). +# * DTYPE +# The datatype of the library, which determines the input source +# and dependencies of the compilation command. +# * TGT_NAME +# Name of the new target that depends on the compilation of the library. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# +# Depends on the clang target for compiling. +function(add_lib_imf name) + cmake_parse_arguments(ARG + "" + "DIR;FTYPE;DTYPE;TGT_NAME" + "EXTRA_OPTS" + ${ARGN}) + + add_custom_command( + OUTPUT ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/imf + ${imf_${ARG_DTYPE}_fallback_src} + -o + ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + DEPENDS ${imf_fallback_${ARG_DTYPE}_deps} + get_imf_fallback_${ARG_DTYPE} sycl-compiler + VERBATIM) + + add_custom_target(${ARG_TGT_NAME} + DEPENDS ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix}) + + add_dependencies(libsycldevice-${ARG_FTYPE} ${ARG_TGT_NAME}) +endfunction() + +# Add device fallback imf libraries for the SPIRV targets and all filetypes. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN LISTS filetypes) + set(libsycl_name libsycl-fallback-imf) + if (NOT (dtype STREQUAL "fp32")) + set(libsycl_name libsycl-fallback-imf-${dtype}) + endif() + set(tgt_name imf_fallback_${dtype}_${ftype}) + + add_lib_imf(${libsycl_name} + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_device_compile_opts} + TGT_NAME ${tgt_name}) + endforeach() +endforeach() + +# Add device fallback imf libraries for the CUDA and AMD targets. +# The output files are bitcode. +foreach(arch IN LISTS devicelib_arch) + foreach(dtype IN ITEMS bf16 fp32 fp64) + set(tgt_name imf_fallback_${dtype}_bc_${arch}) + + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} + ARCH ${arch} + DIR ${bc_binary_dir} + FTYPE bc + DTYPE ${dtype} + EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} + TGT_NAME ${tgt_name}) + + append_to_property( + ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} + PROPERTY_NAME ${arch}) + endforeach() +endforeach() + +# Create one large bitcode file for the CUDA and AMD targets. +# Use all the files collected in the respective global properties. +foreach(arch IN LISTS devicelib_arch) + get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) + # Link the bitcode files together. + link_bc(TARGET device_lib_device_${arch} + RSP_DIR ${CMAKE_CURRENT_BINARY_DIR} + INPUTS ${BC_DEVICE_LIBS_${arch}}) + set( builtins_link_lib_${arch} + $) + add_dependencies(libsycldevice-bc device_lib_device_${arch}) + set( builtins_opt_lib_tgt_${arch} builtins_${arch}.opt) + + # Run the optimizer on the resulting bitcode file and call prepare_builtins + # on it, which strips away debug and arch information. + process_bc(devicelib--${arch}.bc + LIB_TGT builtins_${arch}.opt + IN_FILE ${builtins_link_lib_${arch}} + OUT_DIR ${bc_binary_dir} + OPT_FLAGS ${opt_flags_${arch}} + DEPENDENCIES device_lib_device_${arch}) + add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) + set(complete_${arch}_libdev + $) + install( FILES ${complete_${arch}_libdev} + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) +endforeach() + +# Add host device imf libraries for obj and new offload objects. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN ITEMS obj obj-new-offload) + set(tgt_name imf_fallback_${dtype}_host_${ftype}) + + add_lib_imf(fallback-imf-${dtype}-host + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_host_compile_opts} + TGT_NAME ${tgt_name}) + + set(wrapper_name imf_wrapper.cpp) + if (NOT ("${dtype}" STREQUAL "fp32")) + set(wrapper_name imf_wrapper_${dtype}.cpp) + endif() + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + COMMAND ${clang} ${${ftype}_host_compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + -o ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + DEPENDS ${imf_obj_deps} + VERBATIM) + + add_custom_target(imf_${dtype}_host_${ftype} DEPENDS + ${obj_binary_dir}/imf-${dtype}-host.${${ftype}-suffix}) + endforeach() +endforeach() + +foreach(ftype IN ITEMS obj obj-new-offload) + add_custom_target(imf_host_${ftype} + DEPENDS ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}}) + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + COMMAND ${llvm-ar} rcs + ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + ${${ftype}_binary_dir}/imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-bf16-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-bf16-host.${${ftype}-suffix} + DEPENDS imf_fp32_host_${ftype} imf_fallback_fp32_host_${ftype} + DEPENDS imf_fp64_host_${ftype} imf_fallback_fp64_host_${ftype} + DEPENDS imf_bf16_host_${ftype} imf_fallback_bf16_host_${ftype} + DEPENDS sycl-compiler + VERBATIM) + add_dependencies(libsycldevice-obj imf_host_${ftype}) + + install( FILES ${obj_binary_dir}/${devicelib_host_static_${ftype}} + DESTINATION ${install_dest_obj} + COMPONENT libsycldevice) +endforeach() + +foreach(ftype IN LISTS filetypes) + install( + FILES ${${ftype}_binary_dir}/libsycl-fallback-imf.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-fp64.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-bf16.${${ftype}-suffix} + DESTINATION ${install_dest_${ftype}} + COMPONENT libsycldevice) +endforeach() + diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index cf40373a90efb..8e0d96c3609b6 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -196,4 +197,4 @@ DEVICE_EXTERN_C_INLINE float rintf(float x) { return __nv_rintf(x); } #endif // __NVPTX__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index bfc1a122f0f18..e03e9119f2816 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -493,4 +494,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index e8160013a66df..8978c32d2d5e4 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -17,7 +17,8 @@ DeviceGlobal RandNext; #endif -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); @@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line, __spirv_LocalInvocationId_z()); } #endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/device.h b/libdevice/device.h index 360af54f9b4c4..2ab565f5a939c 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,8 @@ #define EXTERN_C #endif // __cplusplus -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ @@ -27,7 +28,7 @@ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) #define DEVICE_EXTERN_C_NOINLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((noinline)) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__) #define __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index 01085013dae57..343021bfc5c02 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,8 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include typedef struct { @@ -361,5 +362,5 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 5d3c99d63c556..5f7bcafa6ecc0 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, } #endif // __SPIR__ || __SPIRV__ -#ifdef __NVPTX__ +#if defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, unsigned __line, const char *__function, @@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, __assertfail(_Message, _File, _Line, 0, 1); } -#endif +#endif // __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 49832ef966b5f..064be6640ee28 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -188,4 +189,4 @@ DEVICE_EXTERN_C_INLINE double __devicelib_scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 6289126272da4..e7342c09fc909 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -204,4 +205,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index 5d384f00a78cb..e63e83052dfeb 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,8 @@ #include "wrapper.h" #include -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { @@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) { return head_cmp; } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_ diff --git a/libdevice/imf/imf_fp32_dl.cpp b/libdevice/imf/imf_fp32_dl.cpp index f08ee1d305999..eff8c2ac7472d 100644 --- a/libdevice/imf/imf_fp32_dl.cpp +++ b/libdevice/imf/imf_fp32_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) { return (x >= 0) ? x : -x; } diff --git a/libdevice/imf/imf_fp64_dl.cpp b/libdevice/imf/imf_fp64_dl.cpp index 37fbd906f71eb..d9382bc2ddc21 100644 --- a/libdevice/imf/imf_fp64_dl.cpp +++ b/libdevice/imf/imf_fp64_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) { return __fabs(x); } diff --git a/libdevice/imf/imf_inline_bf16.cpp b/libdevice/imf/imf_inline_bf16.cpp index c7165a1ee0183..96335de774fd0 100644 --- a/libdevice/imf/imf_inline_bf16.cpp +++ b/libdevice/imf/imf_inline_bf16.cpp @@ -5,9 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a, _iml_bf16_internal b, diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp index e71499f8fe057..44061ec40ab45 100644 --- a/libdevice/imf/imf_inline_fp32.cpp +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16( _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) { _iml_half ha(a), hb(b), hc(c); diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp index f8d5418513f11..24c016c49344c 100644 --- a/libdevice/imf/imf_inline_fp64.cpp +++ b/libdevice/imf/imf_inline_fp64.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b, double c) { return __fma(a, b, c); diff --git a/libdevice/imf_utils/bfloat16_convert.cpp b/libdevice/imf_utils/bfloat16_convert.cpp index 750cb4e5877b4..1591c90768939 100644 --- a/libdevice/imf_utils/bfloat16_convert.cpp +++ b/libdevice/imf_utils/bfloat16_convert.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal b) { return __bfloat162float(b); diff --git a/libdevice/imf_utils/double_convert.cpp b/libdevice/imf_utils/double_convert.cpp index c4cd6dea07bf1..3c80dfe3ae769 100644 --- a/libdevice/imf_utils/double_convert.cpp +++ b/libdevice/imf_utils/double_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline float __double2float_rd(double x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __double2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/float_convert.cpp b/libdevice/imf_utils/float_convert.cpp index 85299c0f33823..299ea5c25f96f 100644 --- a/libdevice/imf_utils/float_convert.cpp +++ b/libdevice/imf_utils/float_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline int __float2int_rd(float x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __float2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp index 32548b1ccf912..973371feca0d9 100644 --- a/libdevice/imf_utils/fp32_round.cpp +++ b/libdevice/imf_utils/fp32_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fadd_rd(float x, float y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp index aa4de27a669e1..2f88265a1103a 100644 --- a/libdevice/imf_utils/fp64_round.cpp +++ b/libdevice/imf_utils/fp64_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE double __devicelib_imf_dadd_rd(double x, double y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/half_convert.cpp b/libdevice/imf_utils/half_convert.cpp index 3e23d3a46f01e..e16b9ec699f65 100644 --- a/libdevice/imf_utils/half_convert.cpp +++ b/libdevice/imf_utils/half_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_half2float(_iml_half_internal x) { return __half2float(x); diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index fdc850ee42281..06642eec7d267 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_brev(unsigned int x) { unsigned int res = 0; diff --git a/libdevice/imf_utils/simd_emulate.cpp b/libdevice/imf_utils/simd_emulate.cpp index 7369a1598aacb..a8ac73f42ab8b 100644 --- a/libdevice/imf_utils/simd_emulate.cpp +++ b/libdevice/imf_utils/simd_emulate.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + template struct __twice_size; template using __twice_size_t = typename __twice_size::type; template struct __twice_size_tag { diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 336725cad5f63..be630bccbf579 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_saturatef(float); diff --git a/libdevice/imf_wrapper_bf16.cpp b/libdevice/imf_wrapper_bf16.cpp index d02903b0a720f..0c72d95bccc63 100644 --- a/libdevice/imf_wrapper_bf16.cpp +++ b/libdevice/imf_wrapper_bf16.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "imf_bf16.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "imf_bf16.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal); diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 10cf98e844774..e90979d2bb724 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_double2float_rd(double); diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 4445520bff741..869c343206994 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include @@ -58,5 +59,5 @@ const size_t_vec __spirv_BuiltInGlobalInvocationId{}; const size_t_vec __spirv_BuiltInLocalInvocationId{}; #endif // !__SPIR__ && !__SPIRV__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index bbc0cd4f1ca87..45555785fd2fb 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include @@ -29,5 +30,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_WRAPPER_H__ diff --git a/sycl/test-e2e/DeviceLib/assert.cpp b/sycl/test-e2e/DeviceLib/assert.cpp index 81bedb8d0350e..c30ef5619a524 100644 --- a/sycl/test-e2e/DeviceLib/assert.cpp +++ b/sycl/test-e2e/DeviceLib/assert.cpp @@ -1,4 +1,4 @@ -// REQUIRES: (cpu || cuda ) && linux +// REQUIRES: (cpu || cuda || hip ) && linux // RUN: %{build} -DSYCL_FALLBACK_ASSERT=1 -o %t.out // (see the other RUN lines below; it is a bit complicated) // diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 54e0c14d08bfa..40749f7cd57a9 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -1,6 +1,5 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// UNSUPPORTED: hip // RUN: %{build} -fno-builtin %{mathflags} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 94b91255a5f1b..c6ab1c6335e20 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 0380234575061..aeda8550294da 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: hip - // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // RUN: %{build} %{mathflags} -o %t.out diff --git a/sycl/test-e2e/DeviceLib/string_test.cpp b/sycl/test-e2e/DeviceLib/string_test.cpp index c518661303b6e..c189844592520 100644 --- a/sycl/test-e2e/DeviceLib/string_test.cpp +++ b/sycl/test-e2e/DeviceLib/string_test.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip // RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -o %t.out // RUN: %{run} %t.out //