From 254e98e028cd16f314cdc8aa677b2957e8e9a400 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Fri, 2 Aug 2024 10:31:14 +0100 Subject: [PATCH 01/14] Create one bitcode library for AMD and NVPTX Enable compilation of libdevice for AMD by adding AMDGCN to macro guarded code parts in libdevice for enabling e.g. standard library math function. Add compilation workflow to SYCLLibdevice.cmake for AMD. Create single bitcode libraries for AMD and NVPTX by compiling each libdev file into bitcode first, linking these together and running opt on them. Strip away metadata by reusing prepare_builtins from libclc and making the resulting library architecture independent. Remove NVPTX bundles from the libdev object files and remove any unbundling action spawned by the Clang driver for the SYCL toolchain when compiling for the NVPTX backend. Make the driver link against the single bitcode libraries for AMD and NVPTX for the SYCL toolchain when device library linkage is not excluded. Ensure that the clang tests check for the correctness of the new clang driver actions and check if the driver still links the device code against the itt device libraries when device library linkage has been excluded. Refactor SYCLLibdevice.cmake by creating functions and grouping e.g. the same compilation flags for a filetype together in one variable. Reuse these variables and call functions to remove redundancies. Fix a compilation error of Intel math function libraries for MSVC when targeting AMD. Include "device.h" before including "device_imf.hpp" to avoid the inclusion of , which failed with a redefinition of symbols error. --- clang/lib/Driver/Driver.cpp | 7 +- clang/lib/Driver/ToolChains/SYCL.cpp | 76 +- clang/test/Driver/sycl-offload-amdgcn.cpp | 24 +- clang/test/Driver/sycl-offload-nvptx.cpp | 76 +- libclc/CMakeLists.txt | 2 +- libclc/cmake/modules/AddLibclc.cmake | 82 +- libdevice/CMakeLists.txt | 2 + libdevice/cmake/modules/SYCLLibdevice.cmake | 840 +++++++++----------- libdevice/cmath_wrapper.cpp | 4 +- libdevice/cmath_wrapper_fp64.cpp | 4 +- libdevice/crt_wrapper.cpp | 4 +- libdevice/device.h | 4 +- libdevice/device_math.h | 4 +- libdevice/fallback-cassert.cpp | 5 +- libdevice/fallback-cmath-fp64.cpp | 4 +- libdevice/fallback-cmath.cpp | 4 +- libdevice/fallback-cstring.cpp | 4 +- libdevice/imf/imf_fp32_dl.cpp | 5 +- libdevice/imf/imf_fp64_dl.cpp | 5 +- libdevice/imf/imf_inline_bf16.cpp | 5 +- libdevice/imf/imf_inline_fp32.cpp | 6 +- libdevice/imf/imf_inline_fp64.cpp | 6 +- libdevice/imf_utils/bfloat16_convert.cpp | 5 +- libdevice/imf_utils/double_convert.cpp | 4 +- libdevice/imf_utils/float_convert.cpp | 4 +- libdevice/imf_utils/fp32_round.cpp | 5 +- libdevice/imf_utils/fp64_round.cpp | 5 +- libdevice/imf_utils/half_convert.cpp | 4 +- libdevice/imf_utils/integer_misc.cpp | 5 +- libdevice/imf_utils/simd_emulate.cpp | 5 +- libdevice/imf_wrapper.cpp | 4 +- libdevice/imf_wrapper_bf16.cpp | 4 +- libdevice/imf_wrapper_fp64.cpp | 4 +- libdevice/spirv_vars.h | 4 +- libdevice/wrapper.h | 4 +- 35 files changed, 612 insertions(+), 618 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f8f2ff94e2a2c..b522ac1bd70c9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5533,7 +5533,7 @@ class OffloadingActionBuilder final { // AOT compilation. bool SYCLDeviceLibLinked = false; Action *NativeCPULib = nullptr; - if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) { + if (IsSPIR || IsNVPTX || IsAMDGCN|| IsSYCLNativeCPU) { bool UseJitLink = IsSPIR && Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final { ++NumOfDeviceLibLinked; Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); - if (TC->getTriple().isNVPTX() || - (TC->getTriple().isSPIR() && + if (TC->getTriple().isSPIR() && TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga)) { + llvm::Triple::SPIRSubArch_fpga) { auto *SYCLDeviceLibsInputAction = C.MakeAction(*InputArg, types::TY_Object); auto *SYCLDeviceLibsUnbundleAction = diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 37589f00c4140..ee90f8687959a 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -165,9 +165,10 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // spir64 target is actually JIT compilation, so we defer selection of // bfloat16 libraries to runtime. For AOT we need libraries, but skip - // for Nvidia. + // for Nvidia and AMD. NeedLibs = - Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX(); + Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX() + && !Triple.isAMDGCN(); UseNative = false; if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && C.hasOffloadToolChain()) { @@ -212,12 +213,17 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SmallVector LibraryList; const llvm::opt::ArgList &Args = C.getArgs(); + // For NVPTX and AMDGCN we only use one single bitcode library and ignore + // manually specified SYCL device libraries. + bool ignore_single_libs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN(); + struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; }; bool NoDeviceLibs = false; + // Currently, all SYCL device libraries will be linked by default. Linkage // of "internal" libraries cannot be affected via -fno-sycl-device-lib. llvm::StringMap DeviceLibLinkInfo = { @@ -233,10 +239,12 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) NoDeviceLibs = true; + bool printUnusedLibWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal"); + DeviceLibLinkInfo[K] = (!ignore_single_libs && !NoDeviceLibs) || (K == "internal" && NoDeviceLibs) ; + printUnusedLibWarning = false; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -247,10 +255,25 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = true && !NoDeviceLibs; + DeviceLibLinkInfo[Val] = true && !NoDeviceLibs && !ignore_single_libs; + printUnusedLibWarning = ignore_single_libs && !NoDeviceLibs && true; } + if (printUnusedLibWarning) + C.getDriver().Diag(diag::warn_ignored_clang_option) + << A->getSpelling() << A->getAsString(Args); } } + + if (TargetTriple.isNVPTX() && !NoDeviceLibs) { + LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + } + if (TargetTriple.isAMDGCN() && !NoDeviceLibs) { + LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc")); + } + + if (ignore_single_libs && !NoDeviceLibs) + return LibraryList; + using SYCLDeviceLibsList = SmallVector; const SYCLDeviceLibsList SYCLDeviceWrapperLibs = { @@ -304,10 +327,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; - if (TargetTriple.isNVPTX() || - (TargetTriple.isSPIR() && + if ((TargetTriple.isSPIR() && TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)) - // For NVidia or FPGA, we are unbundling objects. + // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) // For new offload model, we use packaged .bc files. @@ -323,7 +345,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, }; addLibraries(SYCLDeviceWrapperLibs); - if (IsSpirvAOT || TargetTriple.isNVPTX()) + if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); bool NativeBfloatLibs; @@ -482,35 +504,19 @@ void SYCL::populateSYCLDeviceTraitsMacrosArgs( // The list should match pre-built SYCL device library files located in // compiler package. Once we add or remove any SYCL device library files, // the list should be updated accordingly. -static llvm::SmallVector SYCLDeviceLibList{ - "bfloat16", - "crt", - "cmath", - "cmath-fp64", - "complex", - "complex-fp64", +static llvm::SmallVector SYCLDeviceLibList { + "bfloat16", "crt", "cmath", "cmath-fp64", "complex", "complex-fp64", #if defined(_WIN32) - "msvc-math", + "msvc-math", #else - "sanitizer", + "sanitizer", #endif - "imf", - "imf-fp64", - "imf-bf16", - "itt-compiler-wrappers", - "itt-stubs", - "itt-user-wrappers", - "fallback-cassert", - "fallback-cstring", - "fallback-cmath", - "fallback-cmath-fp64", - "fallback-complex", - "fallback-complex-fp64", - "fallback-imf", - "fallback-imf-fp64", - "fallback-imf-bf16", - "fallback-bfloat16", - "native-bfloat16"}; + "imf", "imf-fp64", "imf-bf16", "itt-compiler-wrappers", "itt-stubs", + "itt-user-wrappers", "fallback-cassert", "fallback-cstring", + "fallback-cmath", "fallback-cmath-fp64", "fallback-complex", + "fallback-complex-fp64", "fallback-imf", "fallback-imf-fp64", + "fallback-imf-bf16", "fallback-bfloat16", "native-bfloat16" +}; const char *SYCL::Linker::constructLLVMLinkCommand( Compilation &C, const JobAction &JA, const InputInfo &Output, @@ -551,7 +557,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand( this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; StringRef LibPostfix = ".bc"; - if (IsNVPTX || IsFPGA) { + if (IsFPGA) { LibPostfix = ".o"; if (HostTC->getTriple().isWindowsMSVCEnvironment() && C.getDriver().IsCLMode()) diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index 389cdc641119c..5bbf0f7b77e8d 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -37,17 +37,19 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object -// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}", ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object +// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl) /// Check that we only unbundle an archive once. // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \ diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index db5e024363b02..324c5aa9cdd0e 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -53,28 +53,22 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object -// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object -// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl) // /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ @@ -97,28 +91,22 @@ // CHK-PHASES: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35) -// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object -// CHK-PHASES: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object -// CHK-PHASES: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object -// CHK-PHASES: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35) - // CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35) -// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35) -// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35) -// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35) -// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object -// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35) +// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35) +// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35) +// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35) +// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35) +// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object +// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl) /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index f05492d777977..dff0f66ba25b9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL ) foreach( file subnormal_use_default subnormal_disable ) link_bc( TARGET ${file} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll ) install( FILES $ ARCHIVE @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Enable SPIR-V builtin function declarations, so they don't # have to be explicity declared in the soruce. list( APPEND flags -Xclang -fdeclare-spirv-builtins) - set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 2a843dd67fb8c..c98504c1204c6 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -86,12 +86,15 @@ endfunction() # Custom target to create # * INPUT ... # List of bytecode files to link together +# * RSP_DIR +# Directory where a response file should be placed +# (Only needed for WIN32 or CYGWIN) # * DEPENDENCIES ... # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG "" - "TARGET" + "TARGET;RSP_DIR" "INPUTS;DEPENDENCIES" ${ARGN} ) @@ -100,7 +103,7 @@ function(link_bc) if( WIN32 OR CYGWIN ) # Create a response file in case the number of inputs exceeds command-line # character limits on certain platforms. - file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) + file( TO_CMAKE_PATH ${ARG_RSP_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) file( WRITE ${RSP_FILE} ${RSP_INPUT} ) @@ -216,6 +219,44 @@ function(add_libclc_alias alias target) endfunction(add_libclc_alias alias target) +# runs opt and prepare-builtins on a bitcode file specified by lib_tgt +# LIB_TGT string +# Target name that becomes dependent on the out file named LIB_TGT.bc +# IN_FILE string +# Target name of the input bytecode file +# OUT_DIR string +# Name of the directory where the output should be placed +# DEPENDENCIES ... +# List of extra dependencies to inject +function(opt_prepare out_file) + cmake_parse_arguments(OPT "" "LIB_TGT;IN_FILE;OUT_DIR" "DEPENDENCIES" ${ARGN}) + add_custom_command( OUTPUT ${OPT_LIB_TGT}.bc + COMMAND libclc::opt ${ARG_OPT_FLAGS} -o ${OPT_LIB_TGT}.bc + ${OPT_IN_FILE} + DEPENDS libclc::opt ${OPT_IN_FILE} ${OPT_DEPENDENCIES} + ) + add_custom_target( ${OPT_LIB_TGT} + ALL DEPENDS ${OPT_LIB_TGT}.bc + ) +set_target_properties( ${OPT_LIB_TGT} + PROPERTIES TARGET_FILE ${OPT_LIB_TGT}.bc + ) + +set( builtins_opt_lib $ ) + + # Add prepare target + add_custom_command( OUTPUT ${OPT_OUT_DIR}/${out_file} + COMMAND prepare_builtins -o ${OPT_OUT_DIR}/${out_file} + ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${OPT_LIB_TGT} prepare_builtins ) + add_custom_target( prepare-${out_file} ALL + DEPENDS ${OPT_OUT_DIR}/${out_file} + ) + set_target_properties( prepare-${out_file} + PROPERTIES TARGET_FILE ${OPT_OUT_DIR}/${out_file} + ) +endfunction() + # add_libclc_builtin_set(arch_suffix # TRIPLE string # Triple used to compile @@ -291,44 +332,25 @@ macro(add_libclc_builtin_set arch_suffix) link_bc( TARGET ${builtins_link_lib_tgt} INPUTS ${bytecode_files} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} DEPENDENCIES ${builtins_comp_lib_tgt} ) set( builtins_link_lib $ ) - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} - PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc - ) + add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${builtins_link_lib} prepare_builtins ) - set( builtins_opt_lib $ ) + set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} - COMMAND ${prepare_builtins_exe} -o ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL - DEPENDS ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) - set_target_properties( prepare-${obj_suffix} - PROPERTIES TARGET_FILE ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) + opt_prepare(${arch_suffix}.bc LIB_TGT ${builtins_opt_lib_tgt} IN_FILE + ${builtins_link_lib} OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. + set( obj_suffix ${arch_suffix}.bc ) add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix}) set( builtins_lib $ ) diff --git a/libdevice/CMakeLists.txt b/libdevice/CMakeLists.txt index b6078f9a44cf8..564391547677f 100644 --- a/libdevice/CMakeLists.txt +++ b/libdevice/CMakeLists.txt @@ -1,6 +1,8 @@ # Utility project providing various functionalities for SPIR-V devices # without native support of these functionalities. +include(${CMAKE_CURRENT_SOURCE_DIR}/../libclc/cmake/modules/AddLibclc.cmake) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c1aac6d017eff..efa59b52f1708 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,26 +1,31 @@ set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(obj_new_offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) - set(lib-suffix obj) - set(new-offload-lib-suffix new.obj) + set(obj-suffix obj) + set(obj-new-offload-suffix new.obj) set(spv_binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") set(install_dest_spv bin) - set(devicelib_host_static sycl-devicelib-host.lib) - set(devicelib_host_static_new_offload sycl-devicelib-host.new.lib) + set(devicelib_host_static_obj sycl-devicelib-host.lib) + set(devicelib_host_static_obj-new-offload sycl-devicelib-host.new.lib) else() - set(lib-suffix o) - set(new-offload-lib-suffix new.o) + set(obj-suffix o) + set(obj-new-offload-suffix new.o) set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(install_dest_spv lib${LLVM_LIBDIR_SUFFIX}) - set(devicelib_host_static libsycl-devicelib-host.a) - set(devicelib_host_static_new_offload libsycl-devicelib-host.new.a) + set(devicelib_host_static_obj libsycl-devicelib-host.a) + set(devicelib_host_static_obj-new-offload libsycl-devicelib-host.new.a) endif() +set(spv-suffix spv) +set(bc-suffix bc) set(bc_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(install_dest_lib lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj-new-offload lib${LLVM_LIBDIR_SUFFIX}) set(install_dest_bc lib${LLVM_LIBDIR_SUFFIX}) set(clang $) set(llvm-ar $) +set(llvm-link $) +set(llvm-opt $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -46,116 +51,145 @@ if (NOT SYCL_LIBDEVICE_GCC_TOOLCHAIN STREQUAL "") list(APPEND compile_opts "--gcc-toolchain=${SYCL_LIBDEVICE_GCC_TOOLCHAIN}") endif() -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") - list(APPEND compile_opts - "-fno-sycl-libspirv" - "-fno-bundle-offload-arch" - "-nocudalib" - "--cuda-gpu-arch=sm_50") -endif() - if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) endif() -add_custom_target(libsycldevice-obj) -add_custom_target(libsycldevice-obj-new-offload) -add_custom_target(libsycldevice-spv) -add_custom_target(libsycldevice-bc) - -add_custom_target(libsycldevice DEPENDS - libsycldevice-obj - libsycldevice-bc - libsycldevice-obj-new-offload - libsycldevice-spv) - -function(add_devicelib_obj obj_filename) - cmake_parse_arguments(OBJ "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-obj-file ${obj_binary_dir}/${obj_filename}.${lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target ${obj_filename}-obj) - add_custom_target(${devicelib-obj-target} DEPENDS ${devicelib-obj-file}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target}) - install(FILES ${devicelib-obj-file} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - - set(devicelib-obj-file-new-offload ${obj_new_offload_binary_dir}/${obj_filename}.${new-offload-lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file-new-offload} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file-new-offload} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target-new-offload ${obj_filename}-new-offload-obj) - add_custom_target(${devicelib-obj-target-new-offload} DEPENDS ${devicelib-obj-file-new-offload}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target-new-offload}) - install(FILES ${devicelib-obj-file-new-offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) -endfunction() +add_custom_target(libsycldevice) + +set(filetypes obj obj-new-offload spv bc) + +foreach(filetype IN LISTS filetypes) + add_custom_target(libsycldevice-${filetype}) + add_dependencies(libsycldevice libsycldevice-${filetype}) +endforeach() + +# For NVPTX and AMDGCN each device libary is compiled into a single bitcode +# file and all files created this way are linked into one large bitcode +# library. +# Additional compilation options are needed for compiling each device library. +set(devicelib_arch) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch cuda) + set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda" + "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") +endif() +if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch amd) + set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" + "-Xsycl-target-backend" "--offload-arch=gfx940") +endif() + -function(add_devicelib_spv spv_filename) - cmake_parse_arguments(SPV "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-spv-file ${spv_binary_dir}/${spv_filename}.spv) - add_custom_command(OUTPUT ${devicelib-spv-file} - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} ${SPV_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${SPV_SRC} - -o ${devicelib-spv-file} - MAIN_DEPENDENCY ${SPV_SRC} - DEPENDS ${SPV_DEP} - VERBATIM) - set(devicelib-spv-target ${spv_filename}-spv) - add_custom_target(${devicelib-spv-target} DEPENDS ${devicelib-spv-file}) - add_dependencies(libsycldevice-spv ${devicelib-spv-target}) - install(FILES ${devicelib-spv-file} - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) +set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv) +set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir) +set(obj-new-offload_device_compile_opts -fsycl -c --offload-new-driver + -foffload-lto=thin ${sycl_targets_opt}) +set(obj_device_compile_opts -fsycl -c ${sycl_targets_opt}) + +# Compiles and installs a single device library. +# +# Arguments: +# * FILETYPE +# Specifies the output file type of the compilation and its repsective +# installation directory. +# Adds a new target that the libsycldevice-FILETYPE target will depend on. +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(compile_lib filename) + cmake_parse_arguments(ARG + "" + "FILETYPE" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + set(devicelib-file + ${${ARG_FILETYPE}_binary_dir}/${filename}.${${ARG_FILETYPE}-suffix}) + + add_custom_command( + OUTPUT ${devicelib-file} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + ${CMAKE_CURRENT_SOURCE_DIR}/${ARG_SRC} -o ${devicelib-file} + MAIN_DEPENDENCY ${ARG_SRC} + DEPENDS ${ARG_DEPENDENCIES} + VERBATIM + ) + set(devicelib-${ARG_FILETYPE}-target ${filename}-${ARG_FILETYPE}) + add_custom_target(${devicelib-${ARG_FILETYPE}-target} + DEPENDS ${devicelib-file}) + add_dependencies(libsycldevice-${ARG_FILETYPE} + ${devicelib-${ARG_FILETYPE}-target}) + + install( FILES ${devicelib-file} + DESTINATION ${install_dest_${ARG_FILETYPE}} + COMPONENT libsycldevice) endfunction() -function(add_devicelib_bc bc_filename) - cmake_parse_arguments(BC "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-bc-file ${bc_binary_dir}/${bc_filename}.bc) - add_custom_command(OUTPUT ${devicelib-bc-file} - COMMAND ${clang} -fsycl-device-only - -fsycl-device-obj=llvmir ${compile_opts} - ${BC_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${BC_SRC} - -o ${devicelib-bc-file} - MAIN_DEPENDENCY ${BC_SRC} - DEPENDS ${BC_DEP} - VERBATIM) - set(devicelib-bc-target ${bc_filename}-bc) - add_custom_target(${devicelib-bc-target} DEPENDS ${devicelib-bc-file}) - add_dependencies(libsycldevice-bc ${devicelib-bc-target}) - install(FILES ${devicelib-bc-file} - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) +# Appends a list to a global property. +# +# Arguments: +# * PROPERTY_NAME +# The name of the property to append to. +function(append_to_property list) + cmake_parse_arguments(ARG + "" + "PROPERTY_NAME" + "" + ${ARGN}) + get_property(new_property GLOBAL PROPERTY ${ARG_PROPERTY_NAME}) + list(APPEND new_property ${list}) + set_property(GLOBAL PROPERTY ${ARG_PROPERTY_NAME} ${new_property}) endfunction() -function(add_devicelib filename) - cmake_parse_arguments(DL "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - add_devicelib_spv(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_bc(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_obj(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) +# Creates device libaries for each file type. +# Adds bitcode library files additionally for each devicelib_arch target. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(add_devicelibs filename) + cmake_parse_arguments(ARG + "" + "" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + foreach(filetype IN LISTS filetypes) + compile_lib(${filename} FILETYPE ${filetype} SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} EXTRA_OPTS + ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) + endforeach() + + foreach(arch IN LISTS devicelib_arch) + compile_lib(${filename}--${arch} FILETYPE bc SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} EXTRA_OPTS ${ARG_EXTRA_OPTS} + ${bc_device_compile_opts} ${compile_opts_${arch}}) + append_to_property(${bc_binary_dir}/${filename}--${arch}.bc + PROPERTY_NAME BC_DEVICE_LIBS_${arch}) + endforeach() endfunction() +# Set up the dependency lists for the libdevice libraries set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) -set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp imf_impl_utils.hpp device.h sycl-compiler) +set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp + imf_impl_utils.hpp device.h sycl-compiler) set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler) set(bfloat16_obj_deps sycl-headers sycl-compiler) if (NOT MSVC) @@ -169,46 +203,77 @@ endif() if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) if (NOT DEFINED NATIVE_CPU_DIR) - message( FATAL_ERROR "Undefined UR variable NATIVE_CPU_DIR. The name may have changed." ) + message( FATAL_ERROR "Undefined UR variable NATIVE_CPU_DIR. + The name may have changed." ) endif() - # Include NativeCPU UR adapter path to enable finding header file with state struct. - # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. + # Include NativeCPU UR adapter path to enable finding header file with + # state struct. libsycl-nativecpu_utils is only needed as BC file by + # NativeCPU. # Todo: add versions for other targets (for cross-compilation) - add_devicelib_bc(libsycl-nativecpu_utils SRC nativecpu_utils.cpp DEP ${itt_obj_deps} EXTRA_ARGS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu) + compile_lib(libsycl-nativecpu_utils FILETYPE bc SRC nativecpu_utils.cpp + DEPENDENCIES ${itt_obj_deps} EXTRA_OPTS -I ${NATIVE_CPU_DIR} + -fsycl-targets=native_cpu -fsycl-device-only -fsycl-device-obj=llvmir) endif() -add_devicelib(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp DEP ${itt_obj_deps}) - -add_devicelib(libsycl-crt SRC crt_wrapper.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-complex SRC complex_wrapper.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} ) -add_devicelib(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) +# Add all device libraries for each filetype except for the Intel math function +# ones. +add_devicelibs(libsycl-itt-stubs SRC itt_stubs.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) + +add_devicelibs(libsycl-crt SRC crt_wrapper.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-complex SRC complex_wrapper.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp + DEPENDENCIES ${complex_obj_deps} ) +add_devicelibs(libsycl-cmath SRC cmath_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp + DEPENDENCIES ${cmath_obj_deps} ) +add_devicelibs(libsycl-imf SRC imf_wrapper.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-bfloat16 SRC bfloat16_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) if(MSVC) - add_devicelib(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) + add_devicelibs(libsycl-msvc-math SRC msvc_math.cpp + DEPENDENCIES ${cmath_obj_deps}) else() - add_devicelib(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) + add_devicelibs(libsycl-sanitizer SRC sanitizer_utils.cpp + DEPENDENCIES ${sanitizer_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) endif() -add_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) -add_devicelib(libsycl-fallback-cstring SRC fallback-cstring.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps}) -add_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps}) - +add_devicelibs(libsycl-fallback-cassert SRC fallback-cassert.cpp + DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code) +add_devicelibs(libsycl-fallback-cstring SRC fallback-cstring.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-fallback-complex SRC fallback-complex.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-cmath SRC fallback-cmath.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp + DEPENDENCIES ${bfloat16_obj_deps}) +add_devicelibs(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp + DEPENDENCIES ${bfloat16_obj_deps}) + +# Create dependency and source lists for Intel math function libraries. file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) set(imf_src_dir ${CMAKE_CURRENT_SOURCE_DIR}) -set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp +set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp + imf_rounding_op.hpp imf_impl_utils.hpp imf_utils/integer_misc.cpp imf_utils/float_convert.cpp imf_utils/half_convert.cpp @@ -216,9 +281,9 @@ set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op. imf_utils/fp32_round.cpp imf/imf_inline_fp32.cpp imf/imf_fp32_dl.cpp) -set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp - imf_utils/double_convert.cpp - imf_utils/fp64_round.cpp +set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp + imf_rounding_op.hpp imf_impl_utils.hpp + imf_utils/double_convert.cpp imf_utils/fp64_round.cpp imf/imf_inline_fp64.cpp imf/imf_fp64_dl.cpp) set(imf_fallback_bf16_deps device.h device_imf.hpp imf_bf16.hpp @@ -265,7 +330,8 @@ macro(add_imf_host_cxx_flags_compile_flags_if_supported) foreach(flag ${ARGN}) mangle_name("${flag}" flagname) check_cxx_compiler_flag("${flag}" "CXX_SUPPORTS_${flagname}_FLAG") - add_imf_host_cxx_flags_compile_flags_if(CXX_SUPPORTS_${flagname}_FLAG ${flag}) + add_imf_host_cxx_flags_compile_flags_if(CXX_SUPPORTS_${flagname}_FLAG + ${flag}) endforeach() endmacro() @@ -275,320 +341,184 @@ if (NOT WIN32) add_imf_host_cxx_flags_compile_flags_if_supported("-fcf-protection=full") endif() -add_custom_command(OUTPUT ${imf_fp32_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP32 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp32_deps}) - -add_custom_command(OUTPUT ${imf_fp64_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP64 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp64_deps}) - -add_custom_command(OUTPUT ${imf_bf16_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=BF16 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_bf16_deps}) - -add_custom_target(get_imf_fallback_fp32 DEPENDS ${imf_fp32_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf.spv - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf.bc - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_fp64 DEPENDS ${imf_fp64_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_bf16 DEPENDS ${imf_bf16_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_target(imf_fallback_fp32_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf.spv) -add_custom_target(imf_fallback_fp32_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf.bc) -add_custom_target(imf_fallback_fp32_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix}) -add_custom_target(imf_fallback_fp32_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fallback_fp32_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp32_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp32_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp32_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp32_new_offload_obj) - -add_custom_target(imf_fallback_fp64_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv) -add_custom_target(imf_fallback_fp64_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc) -add_custom_target(imf_fallback_fp64_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix}) -add_custom_target(imf_fallback_fp64_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix}) -add_custom_target(imf_fallback_fp64_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp64_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp64_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp64_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp64_new_offload_obj) - -add_custom_target(imf_fallback_bf16_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv) -add_custom_target(imf_fallback_bf16_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc) -add_custom_target(imf_fallback_bf16_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix}) -add_custom_target(imf_fallback_bf16_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix}) -add_custom_target(imf_fallback_bf16_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_bf16_spv) -add_dependencies(libsycldevice-bc imf_fallback_bf16_bc) -add_dependencies(libsycldevice-obj imf_fallback_bf16_obj) -add_dependencies(libsycldevice-obj imf_fallback_bf16_new_offload_obj) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_target(imf_fp32_host_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fp64_host_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${lib-suffix}) -add_custom_target(imf_bf16_host_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${lib-suffix}) - -add_custom_target(imf_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix}) -add_custom_target(imf_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix}) -add_custom_target(imf_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix}) - -add_custom_target(imf_host_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static} - ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS imf_fp32_host_obj imf_fallback_fp32_host_obj - DEPENDS imf_fp64_host_obj imf_fallback_fp64_host_obj - DEPENDS imf_bf16_host_obj imf_fallback_bf16_host_obj - DEPENDS sycl-compiler - VERBATIM) -add_custom_target(imf_host_new_offload_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static_new_offload}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static_new_offload} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static_new_offload} - ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS imf_fp32_host_new_offload_obj imf_fallback_fp32_host_new_offload_obj - DEPENDS imf_fp64_host_new_offload_obj imf_fallback_fp64_host_new_offload_obj - DEPENDS imf_bf16_host_new_offload_obj imf_fallback_bf16_host_new_offload_obj - DEPENDS sycl-compiler - VERBATIM) -add_dependencies(libsycldevice-obj imf_host_obj) -add_dependencies(libsycldevice-obj imf_host_new_offload_obj) -install(FILES ${spv_binary_dir}/libsycl-fallback-imf.spv - ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) - -install(FILES ${bc_binary_dir}/libsycl-fallback-imf.bc - ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - ${obj_binary_dir}/${devicelib_host_static} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - ${obj_binary_dir}/${devicelib_host_static_new_offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) +set(obj-new-offload_host_compile_opts ${imf_host_cxx_flags} --offload-new-driver + -foffload-lto=thin) +set(obj_host_compile_opts ${imf_host_cxx_flags}) + +foreach(datatype IN ITEMS fp32 fp64 bf16) + string(TOUPPER ${datatype} upper_datatype) + + add_custom_command( + OUTPUT ${imf_${datatype}_fallback_src} + COMMAND ${CMAKE_COMMAND} + -D SRC_DIR=${imf_src_dir} + -D DEST_DIR=${imf_fallback_src_dir} + -D IMF_TARGET=${upper_datatype} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake + DEPENDS ${imf_fallback_${datatype}_deps}) + + add_custom_target(get_imf_fallback_${datatype} + DEPENDS ${imf_${datatype}_fallback_src}) +endforeach() + +# Adds Intel math functions libraries. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * DIR +# The directory where the output file should be located in. +# * FTYPE +# Filetype of the output library file (e.g. 'bc'). +# * DTYPE +# The datatype of the library, which determines the input source +# and dependencies of the compilation command. +# * TGT_NAME +# Name of the new target that depends on the compilation of the library. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# +# Depends on the clang target for compiling. +function(add_lib_imf name) + cmake_parse_arguments(ARG + "" + "DIR;FTYPE;DTYPE;TGT_NAME" + "EXTRA_OPTS" + ${ARGN}) + + add_custom_command( + OUTPUT ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/imf + ${imf_${ARG_DTYPE}_fallback_src} + -o + ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + DEPENDS ${imf_fallback_${ARG_DTYPE}_deps} + get_imf_fallback_${ARG_DTYPE} sycl-compiler + VERBATIM) + + add_custom_target(${ARG_TGT_NAME} + DEPENDS ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix}) + + add_dependencies(libsycldevice-${ARG_FTYPE} ${ARG_TGT_NAME}) +endfunction() + +# Add device fallback imf libraries for the SPIRV targets and all filetypes. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN LISTS filetypes) + set(libsycl_name libsycl-fallback-imf) + if (NOT (dtype STREQUAL "fp32")) + set(libsycl_name libsycl-fallback-imf-${dtype}) + endif() + set(tgt_name imf_fallback_${dtype}_${ftype}) + + add_lib_imf(${libsycl_name} DIR ${${ftype}_binary_dir} FTYPE + ${ftype} DTYPE ${dtype} EXTRA_OPTS ${${ftype}_device_compile_opts} + TGT_NAME ${tgt_name}) + endforeach() +endforeach() + +# Add device fallback imf libraries for the CUDA and AMD targets. +# The output files are bitcode. +foreach(arch IN LISTS devicelib_arch) + foreach(dtype IN ITEMS bf16 fp32 fp64) + set(tgt_name imf_fallback_${dtype}_bc_${arch}) + + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} ARCH ${arch} DIR + ${bc_binary_dir} FTYPE bc DTYPE ${dtype} + EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} + TGT_NAME ${tgt_name}) + + append_to_property( + ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} + PROPERTY_NAME ${arch}) + endforeach() +endforeach() + +# Create one large bitcode file for the CUDA and AMD targets. +# Use all the files collected in the respective global properties. +foreach(arch IN LISTS devicelib_arch) + get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) + # Link the bitcode files together. + link_bc(TARGET device_lib_device_${arch} + RSP_DIR ${CMAKE_CURRENT_BINARY_DIR} + INPUTS ${BC_DEVICE_LIBS_${arch}}) + set( builtins_link_lib_${arch} + $) + add_dependencies(libsycldevice-bc device_lib_device_${arch}) + set( builtins_opt_lib_tgt_${arch} builtins_${arch}.opt) + + # Run the optimizer on the resulting bitcode file and call prepare_builtins + # on it, which strips away debug and arch information. + opt_prepare(devicelib--${arch}.bc LIB_TGT builtins_${arch}.opt IN_FILE + ${builtins_link_lib_${arch}} OUT_DIR ${bc_binary_dir}) + add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) + set(complete_${arch}_libdev + $) + install( FILES ${complete_${arch}_libdev} + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) +endforeach() + +# Add host device imf libraries for obj and new offload objects. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN ITEMS obj obj-new-offload) + set(tgt_name imf_fallback_${dtype}_host_${ftype}) + + add_lib_imf(fallback-imf-${dtype}-host DIR ${${ftype}_binary_dir} + FTYPE ${ftype} DTYPE ${dtype} EXTRA_OPTS ${${ftype}_host_compile_opts} + TGT_NAME ${tgt_name}) + + set(wrapper_name imf_wrapper.cpp) + if (NOT ("${dtype}" STREQUAL "fp32")) + set(wrapper_name imf_wrapper_${dtype}.cpp) + endif() + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + COMMAND ${clang} ${${ftype}_host_compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + -o ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + DEPENDS ${imf_obj_deps} + VERBATIM) + + add_custom_target(imf_${dtype}_host_${ftype} DEPENDS + ${obj_binary_dir}/imf-${dtype}-host.${${ftype}-suffix}) + endforeach() +endforeach() + +foreach(ftype IN ITEMS obj obj-new-offload) + add_custom_target(imf_host_${ftype} + DEPENDS ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}}) + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + COMMAND ${llvm-ar} rcs + ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + ${${ftype}_binary_dir}/imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-bf16-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-bf16-host.${${ftype}-suffix} + DEPENDS imf_fp32_host_${ftype} imf_fallback_fp32_host_${ftype} + DEPENDS imf_fp64_host_${ftype} imf_fallback_fp64_host_${ftype} + DEPENDS imf_bf16_host_${ftype} imf_fallback_bf16_host_${ftype} + DEPENDS sycl-compiler + VERBATIM) + add_dependencies(libsycldevice-obj imf_host_${ftype}) + + install( FILES ${obj_binary_dir}/${devicelib_host_static_${ftype}} + DESTINATION ${install_dest_obj} + COMPONENT libsycldevice) +endforeach() + +foreach(ftype IN LISTS filetypes) + install( + FILES ${${ftype}_binary_dir}/libsycl-fallback-imf.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-fp64.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-bf16.${${ftype}-suffix} + DESTINATION ${install_dest_${ftype}} + COMPONENT libsycldevice) +endforeach() + diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index cf40373a90efb..0ad643032593a 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -196,4 +196,4 @@ DEVICE_EXTERN_C_INLINE float rintf(float x) { return __nv_rintf(x); } #endif // __NVPTX__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index bfc1a122f0f18..9366fed6eae72 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -493,4 +493,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index e8160013a66df..3b4c4e3558750 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -17,7 +17,7 @@ DeviceGlobal RandNext; #endif -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); @@ -126,4 +126,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line, __spirv_LocalInvocationId_z()); } #endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/device.h b/libdevice/device.h index 360af54f9b4c4..734b1f4a36869 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,7 @@ #define EXTERN_C #endif // __cplusplus -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ @@ -27,7 +27,7 @@ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) #define DEVICE_EXTERN_C_NOINLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((noinline)) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__) #define __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index 01085013dae57..af480c99b3bae 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,7 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) #include typedef struct { @@ -361,5 +361,5 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 5d3c99d63c556..493955346c900 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -100,7 +100,8 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, } #endif // __SPIR__ || __SPIRV__ -#ifdef __NVPTX__ + +#if defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, unsigned __line, const char *__function, @@ -119,4 +120,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, __assertfail(_Message, _File, _Line, 0, 1); } -#endif +#endif // __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 49832ef966b5f..83518b7ad512f 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -188,4 +188,4 @@ DEVICE_EXTERN_C_INLINE double __devicelib_scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 6289126272da4..112fcb34a4d32 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -204,4 +204,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index 5d384f00a78cb..f73811b9b8752 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,7 @@ #include "wrapper.h" #include -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { @@ -202,4 +202,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) { return head_cmp; } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_ diff --git a/libdevice/imf/imf_fp32_dl.cpp b/libdevice/imf/imf_fp32_dl.cpp index f08ee1d305999..eff8c2ac7472d 100644 --- a/libdevice/imf/imf_fp32_dl.cpp +++ b/libdevice/imf/imf_fp32_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) { return (x >= 0) ? x : -x; } diff --git a/libdevice/imf/imf_fp64_dl.cpp b/libdevice/imf/imf_fp64_dl.cpp index 37fbd906f71eb..d9382bc2ddc21 100644 --- a/libdevice/imf/imf_fp64_dl.cpp +++ b/libdevice/imf/imf_fp64_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) { return __fabs(x); } diff --git a/libdevice/imf/imf_inline_bf16.cpp b/libdevice/imf/imf_inline_bf16.cpp index c7165a1ee0183..96335de774fd0 100644 --- a/libdevice/imf/imf_inline_bf16.cpp +++ b/libdevice/imf/imf_inline_bf16.cpp @@ -5,9 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a, _iml_bf16_internal b, diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp index e71499f8fe057..44061ec40ab45 100644 --- a/libdevice/imf/imf_inline_fp32.cpp +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16( _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) { _iml_half ha(a), hb(b), hc(c); diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp index f8d5418513f11..24c016c49344c 100644 --- a/libdevice/imf/imf_inline_fp64.cpp +++ b/libdevice/imf/imf_inline_fp64.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b, double c) { return __fma(a, b, c); diff --git a/libdevice/imf_utils/bfloat16_convert.cpp b/libdevice/imf_utils/bfloat16_convert.cpp index 750cb4e5877b4..1591c90768939 100644 --- a/libdevice/imf_utils/bfloat16_convert.cpp +++ b/libdevice/imf_utils/bfloat16_convert.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal b) { return __bfloat162float(b); diff --git a/libdevice/imf_utils/double_convert.cpp b/libdevice/imf_utils/double_convert.cpp index c4cd6dea07bf1..3c80dfe3ae769 100644 --- a/libdevice/imf_utils/double_convert.cpp +++ b/libdevice/imf_utils/double_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline float __double2float_rd(double x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __double2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/float_convert.cpp b/libdevice/imf_utils/float_convert.cpp index 85299c0f33823..299ea5c25f96f 100644 --- a/libdevice/imf_utils/float_convert.cpp +++ b/libdevice/imf_utils/float_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline int __float2int_rd(float x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __float2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp index 32548b1ccf912..973371feca0d9 100644 --- a/libdevice/imf_utils/fp32_round.cpp +++ b/libdevice/imf_utils/fp32_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fadd_rd(float x, float y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp index aa4de27a669e1..2f88265a1103a 100644 --- a/libdevice/imf_utils/fp64_round.cpp +++ b/libdevice/imf_utils/fp64_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE double __devicelib_imf_dadd_rd(double x, double y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/half_convert.cpp b/libdevice/imf_utils/half_convert.cpp index 3e23d3a46f01e..e16b9ec699f65 100644 --- a/libdevice/imf_utils/half_convert.cpp +++ b/libdevice/imf_utils/half_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_half2float(_iml_half_internal x) { return __half2float(x); diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index fdc850ee42281..06642eec7d267 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_brev(unsigned int x) { unsigned int res = 0; diff --git a/libdevice/imf_utils/simd_emulate.cpp b/libdevice/imf_utils/simd_emulate.cpp index 7369a1598aacb..a8ac73f42ab8b 100644 --- a/libdevice/imf_utils/simd_emulate.cpp +++ b/libdevice/imf_utils/simd_emulate.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + template struct __twice_size; template using __twice_size_t = typename __twice_size::type; template struct __twice_size_tag { diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 336725cad5f63..be630bccbf579 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_saturatef(float); diff --git a/libdevice/imf_wrapper_bf16.cpp b/libdevice/imf_wrapper_bf16.cpp index d02903b0a720f..0c72d95bccc63 100644 --- a/libdevice/imf_wrapper_bf16.cpp +++ b/libdevice/imf_wrapper_bf16.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "imf_bf16.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "imf_bf16.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal); diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 10cf98e844774..e90979d2bb724 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_double2float_rd(double); diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 4445520bff741..65b96565752db 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,7 +11,7 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) #include #include @@ -58,5 +58,5 @@ const size_t_vec __spirv_BuiltInGlobalInvocationId{}; const size_t_vec __spirv_BuiltInLocalInvocationId{}; #endif // !__SPIR__ && !__SPIRV__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index bbc0cd4f1ca87..482a02aa71bab 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,7 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) #include #include @@ -29,5 +29,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_WRAPPER_H__ From 075cf2ebfd249746082806f717ecbc91ec0d1026 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Fri, 2 Aug 2024 14:38:51 +0100 Subject: [PATCH 02/14] Reduce diff --- clang/lib/Driver/ToolChains/SYCL.cpp | 37 +++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index ee90f8687959a..bdeaf9b6fc3d8 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -223,7 +223,6 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, }; bool NoDeviceLibs = false; - // Currently, all SYCL device libraries will be linked by default. Linkage // of "internal" libraries cannot be affected via -fno-sycl-device-lib. llvm::StringMap DeviceLibLinkInfo = { @@ -504,19 +503,35 @@ void SYCL::populateSYCLDeviceTraitsMacrosArgs( // The list should match pre-built SYCL device library files located in // compiler package. Once we add or remove any SYCL device library files, // the list should be updated accordingly. -static llvm::SmallVector SYCLDeviceLibList { - "bfloat16", "crt", "cmath", "cmath-fp64", "complex", "complex-fp64", +static llvm::SmallVector SYCLDeviceLibList{ + "bfloat16", + "crt", + "cmath", + "cmath-fp64", + "complex", + "complex-fp64", #if defined(_WIN32) - "msvc-math", + "msvc-math", #else - "sanitizer", + "sanitizer", #endif - "imf", "imf-fp64", "imf-bf16", "itt-compiler-wrappers", "itt-stubs", - "itt-user-wrappers", "fallback-cassert", "fallback-cstring", - "fallback-cmath", "fallback-cmath-fp64", "fallback-complex", - "fallback-complex-fp64", "fallback-imf", "fallback-imf-fp64", - "fallback-imf-bf16", "fallback-bfloat16", "native-bfloat16" -}; + "imf", + "imf-fp64", + "imf-bf16", + "itt-compiler-wrappers", + "itt-stubs", + "itt-user-wrappers", + "fallback-cassert", + "fallback-cstring", + "fallback-cmath", + "fallback-cmath-fp64", + "fallback-complex", + "fallback-complex-fp64", + "fallback-imf", + "fallback-imf-fp64", + "fallback-imf-bf16", + "fallback-bfloat16", + "native-bfloat16"}; const char *SYCL::Linker::constructLLVMLinkCommand( Compilation &C, const JobAction &JA, const InputInfo &Output, From a8f2cb88acf08896ce4fd808386900844871dcf1 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Fri, 2 Aug 2024 15:07:33 +0100 Subject: [PATCH 03/14] Enable devicelib tests for HIP --- sycl/test-e2e/DeviceLib/assert.cpp | 2 +- sycl/test-e2e/DeviceLib/string_test.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/assert.cpp b/sycl/test-e2e/DeviceLib/assert.cpp index 81bedb8d0350e..c30ef5619a524 100644 --- a/sycl/test-e2e/DeviceLib/assert.cpp +++ b/sycl/test-e2e/DeviceLib/assert.cpp @@ -1,4 +1,4 @@ -// REQUIRES: (cpu || cuda ) && linux +// REQUIRES: (cpu || cuda || hip ) && linux // RUN: %{build} -DSYCL_FALLBACK_ASSERT=1 -o %t.out // (see the other RUN lines below; it is a bit complicated) // diff --git a/sycl/test-e2e/DeviceLib/string_test.cpp b/sycl/test-e2e/DeviceLib/string_test.cpp index c518661303b6e..c189844592520 100644 --- a/sycl/test-e2e/DeviceLib/string_test.cpp +++ b/sycl/test-e2e/DeviceLib/string_test.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip // RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -o %t.out // RUN: %{run} %t.out // From 52d9de98c3f90737263fb8ca5b97b1d70c7a62d2 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Fri, 2 Aug 2024 19:58:44 +0100 Subject: [PATCH 04/14] Fix intrinsic selection for AMD Do not select builtin LLVM intrinsics for AMDGCN by default. Previously, these intrinsics were selected by default and prevented linkage of standard library math functions provided by libdevice. --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- sycl/test-e2e/DeviceLib/cmath_test.cpp | 1 - sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 1 - sycl/test-e2e/DeviceLib/math_test.cpp | 2 -- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e192f5c701d8a..82d0644b2a23e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2719,7 +2719,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } if (GenerateIntrinsics && - !(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) { + !(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()))) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIceil: case Builtin::BIceilf: diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 54e0c14d08bfa..40749f7cd57a9 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -1,6 +1,5 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// UNSUPPORTED: hip // RUN: %{build} -fno-builtin %{mathflags} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 94b91255a5f1b..c6ab1c6335e20 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 0380234575061..aeda8550294da 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: hip - // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // RUN: %{build} %{mathflags} -o %t.out From 39ac1d62cd36d1eaa0f96a207a6b18f713bf23b4 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 5 Aug 2024 09:59:53 +0100 Subject: [PATCH 05/14] Add amdgcn-amd-amdhsa triple to libdevice test Test whether SYCL Codegen does not emit llvm intrinsics for amdgcn-amd-amdhsa and libdevice functions can be correctly linked later on. --- clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp index af94dada263d1..5c282449dc851 100644 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp @@ -5,8 +5,10 @@ // intrinsics. This allows the driver to link in the libdevice definitions for // cosf etc. later in the driver flow. -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s #include "Inputs/sycl.hpp" From 84497b06841e9ff5749caee409883fa03330c6dd Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 5 Aug 2024 13:59:40 +0100 Subject: [PATCH 06/14] Refactor with clang-format --- clang/lib/Driver/Driver.cpp | 6 +++--- clang/lib/Driver/ToolChains/SYCL.cpp | 10 +++++----- libdevice/cmath_wrapper.cpp | 3 ++- libdevice/cmath_wrapper_fp64.cpp | 3 ++- libdevice/crt_wrapper.cpp | 3 ++- libdevice/device.h | 3 ++- libdevice/device_math.h | 3 ++- libdevice/fallback-cassert.cpp | 1 - libdevice/fallback-cmath-fp64.cpp | 3 ++- libdevice/fallback-cmath.cpp | 3 ++- libdevice/fallback-cstring.cpp | 3 ++- libdevice/spirv_vars.h | 3 ++- libdevice/wrapper.h | 3 ++- 13 files changed, 28 insertions(+), 19 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index b522ac1bd70c9..263f41171a359 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5533,7 +5533,7 @@ class OffloadingActionBuilder final { // AOT compilation. bool SYCLDeviceLibLinked = false; Action *NativeCPULib = nullptr; - if (IsSPIR || IsNVPTX || IsAMDGCN|| IsSYCLNativeCPU) { + if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) { bool UseJitLink = IsSPIR && Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, @@ -5849,8 +5849,8 @@ class OffloadingActionBuilder final { Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); if (TC->getTriple().isSPIR() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga) { + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_fpga) { auto *SYCLDeviceLibsInputAction = C.MakeAction(*InputArg, types::TY_Object); auto *SYCLDeviceLibsUnbundleAction = diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index bdeaf9b6fc3d8..e88bf8b94e6df 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -166,9 +166,8 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // spir64 target is actually JIT compilation, so we defer selection of // bfloat16 libraries to runtime. For AOT we need libraries, but skip // for Nvidia and AMD. - NeedLibs = - Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX() - && !Triple.isAMDGCN(); + NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch && + !Triple.isNVPTX() && !Triple.isAMDGCN(); UseNative = false; if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && C.hasOffloadToolChain()) { @@ -242,7 +241,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, for (StringRef Val : A->getValues()) { if (Val == "all") { for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = (!ignore_single_libs && !NoDeviceLibs) || (K == "internal" && NoDeviceLibs) ; + DeviceLibLinkInfo[K] = (!ignore_single_libs && !NoDeviceLibs) || + (K == "internal" && NoDeviceLibs); printUnusedLibWarning = false; break; } @@ -259,7 +259,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } if (printUnusedLibWarning) C.getDriver().Diag(diag::warn_ignored_clang_option) - << A->getSpelling() << A->getAsString(Args); + << A->getSpelling() << A->getAsString(Args); } } diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 0ad643032593a..8e0d96c3609b6 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 9366fed6eae72..e03e9119f2816 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 3b4c4e3558750..8978c32d2d5e4 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -17,7 +17,8 @@ DeviceGlobal RandNext; #endif -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); diff --git a/libdevice/device.h b/libdevice/device.h index 734b1f4a36869..2ab565f5a939c 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,8 @@ #define EXTERN_C #endif // __cplusplus -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index af480c99b3bae..343021bfc5c02 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,8 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include typedef struct { diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 493955346c900..5f7bcafa6ecc0 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -100,7 +100,6 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, } #endif // __SPIR__ || __SPIRV__ - #if defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 83518b7ad512f..064be6640ee28 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 112fcb34a4d32..e7342c09fc909 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index f73811b9b8752..e63e83052dfeb 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,8 @@ #include "wrapper.h" #include -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 65b96565752db..869c343206994 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index 482a02aa71bab..45555785fd2fb 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include From bdcb90a69ae54728165830fede4294c95ec42bff Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 5 Aug 2024 14:16:46 +0100 Subject: [PATCH 07/14] Refactor by applying LLVM style consistently --- clang/lib/Driver/ToolChains/SYCL.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index e88bf8b94e6df..ef36f8c041c50 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -214,7 +214,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, // For NVPTX and AMDGCN we only use one single bitcode library and ignore // manually specified SYCL device libraries. - bool ignore_single_libs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN(); + bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN(); struct DeviceLibOptInfo { StringRef DeviceLibName; @@ -237,13 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) NoDeviceLibs = true; - bool printUnusedLibWarning = false; + bool PrintUnusedLibWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = (!ignore_single_libs && !NoDeviceLibs) || + DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) || (K == "internal" && NoDeviceLibs); - printUnusedLibWarning = false; + PrintUnusedLibWarning = false; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -254,23 +254,22 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = true && !NoDeviceLibs && !ignore_single_libs; - printUnusedLibWarning = ignore_single_libs && !NoDeviceLibs && true; + DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs; + PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs && true; } - if (printUnusedLibWarning) + if (PrintUnusedLibWarning) C.getDriver().Diag(diag::warn_ignored_clang_option) << A->getSpelling() << A->getAsString(Args); } } - if (TargetTriple.isNVPTX() && !NoDeviceLibs) { + if (TargetTriple.isNVPTX() && !NoDeviceLibs) LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); - } - if (TargetTriple.isAMDGCN() && !NoDeviceLibs) { + + if (TargetTriple.isAMDGCN() && !NoDeviceLibs) LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc")); - } - if (ignore_single_libs && !NoDeviceLibs) + if (IgnoreSingleLibs && !NoDeviceLibs) return LibraryList; using SYCLDeviceLibsList = SmallVector; @@ -326,8 +325,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; - if ((TargetTriple.isSPIR() && - TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)) + if (TargetTriple.isSPIR() && + TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) From 58bc0a866ecd3f5e4041dd6fa078f9d006102e25 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 5 Aug 2024 14:27:35 +0100 Subject: [PATCH 08/14] Refactor with clang-format --- clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index ef36f8c041c50..5cfdb82ec6621 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -326,7 +326,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; if (TargetTriple.isSPIR() && - TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) From 5905a226d1dd18d018c4971ab19745f635d5b7d1 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 6 Aug 2024 15:07:08 +0100 Subject: [PATCH 09/14] Refactor CMake and remove reduntant bools --- clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- libclc/cmake/modules/AddLibclc.cmake | 54 ++++---- libdevice/cmake/modules/SYCLLibdevice.cmake | 143 ++++++++++++-------- 3 files changed, 122 insertions(+), 77 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 5cfdb82ec6621..d6e1d96502a4e 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -255,7 +255,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, << A->getSpelling() << Val; } DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs; - PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs && true; + PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs; } if (PrintUnusedLibWarning) C.getDriver().Diag(diag::warn_ignored_clang_option) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index c98504c1204c6..3b3ef0a2946f2 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -219,41 +219,47 @@ function(add_libclc_alias alias target) endfunction(add_libclc_alias alias target) -# runs opt and prepare-builtins on a bitcode file specified by lib_tgt -# LIB_TGT string +# Runs opt and prepare-builtins on a bitcode file specified by lib_tgt +# +# ARGUMENTS: +# * LIB_TGT string # Target name that becomes dependent on the out file named LIB_TGT.bc -# IN_FILE string +# * IN_FILE string # Target name of the input bytecode file -# OUT_DIR string +# * OUT_DIR string # Name of the directory where the output should be placed -# DEPENDENCIES ... +# * DEPENDENCIES ... # List of extra dependencies to inject -function(opt_prepare out_file) - cmake_parse_arguments(OPT "" "LIB_TGT;IN_FILE;OUT_DIR" "DEPENDENCIES" ${ARGN}) - add_custom_command( OUTPUT ${OPT_LIB_TGT}.bc - COMMAND libclc::opt ${ARG_OPT_FLAGS} -o ${OPT_LIB_TGT}.bc - ${OPT_IN_FILE} - DEPENDS libclc::opt ${OPT_IN_FILE} ${OPT_DEPENDENCIES} +function(process_bc out_file) + cmake_parse_arguments(ARG + "" + "LIB_TGT;IN_FILE;OUT_DIR" + "DEPENDENCIES" + ${ARGN}) + add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc + COMMAND libclc::opt -o ${ARG_LIB_TGT}.bc + ${ARG_IN_FILE} + DEPENDS libclc::opt ${ARG_IN_FILE} ${ARG_DEPENDENCIES} ) - add_custom_target( ${OPT_LIB_TGT} - ALL DEPENDS ${OPT_LIB_TGT}.bc +add_custom_target( ${ARG_LIB_TGT} + ALL DEPENDS ${ARG_LIB_TGT}.bc ) -set_target_properties( ${OPT_LIB_TGT} - PROPERTIES TARGET_FILE ${OPT_LIB_TGT}.bc +set_target_properties( ${ARG_LIB_TGT} + PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc ) -set( builtins_opt_lib $ ) +set( builtins_opt_lib $ ) # Add prepare target - add_custom_command( OUTPUT ${OPT_OUT_DIR}/${out_file} - COMMAND prepare_builtins -o ${OPT_OUT_DIR}/${out_file} + add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} + COMMAND prepare_builtins -o ${ARG_OUT_DIR}/${out_file} ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${OPT_LIB_TGT} prepare_builtins ) + DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} prepare_builtins ) add_custom_target( prepare-${out_file} ALL - DEPENDS ${OPT_OUT_DIR}/${out_file} + DEPENDS ${ARG_OUT_DIR}/${out_file} ) set_target_properties( prepare-${out_file} - PROPERTIES TARGET_FILE ${OPT_OUT_DIR}/${out_file} + PROPERTIES TARGET_FILE ${ARG_OUT_DIR}/${out_file} ) endfunction() @@ -344,8 +350,10 @@ macro(add_libclc_builtin_set arch_suffix) set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - opt_prepare(${arch_suffix}.bc LIB_TGT ${builtins_opt_lib_tgt} IN_FILE - ${builtins_link_lib} OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + process_bc(${arch_suffix}.bc + LIB_TGT ${builtins_opt_lib_tgt} + IN_FILE ${builtins_link_lib} + OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} DEPENDS ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index efa59b52f1708..c68fad6161b77 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -170,15 +170,21 @@ function(add_devicelibs filename) ${ARGN}) foreach(filetype IN LISTS filetypes) - compile_lib(${filename} FILETYPE ${filetype} SRC ${ARG_SRC} - DEPENDENCIES ${ARG_DEPENDENCIES} EXTRA_OPTS - ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) + compile_lib(${filename} + FILETYPE ${filetype} + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) endforeach() foreach(arch IN LISTS devicelib_arch) - compile_lib(${filename}--${arch} FILETYPE bc SRC ${ARG_SRC} - DEPENDENCIES ${ARG_DEPENDENCIES} EXTRA_OPTS ${ARG_EXTRA_OPTS} - ${bc_device_compile_opts} ${compile_opts_${arch}}) + compile_lib(${filename}--${arch} + FILETYPE bc + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${bc_device_compile_opts} + ${compile_opts_${arch}}) + append_to_property(${bc_binary_dir}/${filename}--${arch}.bc PROPERTY_NAME BC_DEVICE_LIBS_${arch}) endforeach() @@ -188,8 +194,7 @@ endfunction() set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) -set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp - imf_impl_utils.hpp device.h sycl-compiler) +set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp imf_impl_utils.hpp device.h sycl-compiler) set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler) set(bfloat16_obj_deps sycl-headers sycl-compiler) if (NOT MSVC) @@ -203,77 +208,100 @@ endif() if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) if (NOT DEFINED NATIVE_CPU_DIR) - message( FATAL_ERROR "Undefined UR variable NATIVE_CPU_DIR. - The name may have changed." ) + message( FATAL_ERROR "Undefined UR variable NATIVE_CPU_DIR. The name may have changed." ) endif() - # Include NativeCPU UR adapter path to enable finding header file with - # state struct. libsycl-nativecpu_utils is only needed as BC file by - # NativeCPU. + # Include NativeCPU UR adapter path to enable finding header file with state struct. + # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. # Todo: add versions for other targets (for cross-compilation) - compile_lib(libsycl-nativecpu_utils FILETYPE bc SRC nativecpu_utils.cpp - DEPENDENCIES ${itt_obj_deps} EXTRA_OPTS -I ${NATIVE_CPU_DIR} - -fsycl-targets=native_cpu -fsycl-device-only -fsycl-device-obj=llvmir) + compile_lib(libsycl-nativecpu_utils + FILETYPE bc + SRC nativecpu_utils.cpp + DEPENDENCIES ${itt_obj_deps} + EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only + -fsycl-device-obj=llvmir) endif() # Add all device libraries for each filetype except for the Intel math function # ones. -add_devicelibs(libsycl-itt-stubs SRC itt_stubs.cpp +add_devicelibs(libsycl-itt-stubs + SRC itt_stubs.cpp DEPENDENCIES ${itt_obj_deps}) -add_devicelibs(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp +add_devicelibs(libsycl-itt-compiler-wrappers + SRC itt_compiler_wrappers.cpp DEPENDENCIES ${itt_obj_deps}) -add_devicelibs(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp +add_devicelibs(libsycl-itt-user-wrappers + SRC itt_user_wrappers.cpp DEPENDENCIES ${itt_obj_deps}) -add_devicelibs(libsycl-crt SRC crt_wrapper.cpp +add_devicelibs(libsycl-crt + SRC crt_wrapper.cpp DEPENDENCIES ${crt_obj_deps}) -add_devicelibs(libsycl-complex SRC complex_wrapper.cpp +add_devicelibs(libsycl-complex + SRC complex_wrapper.cpp DEPENDENCIES ${complex_obj_deps}) -add_devicelibs(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp +add_devicelibs(libsycl-complex-fp64 + SRC complex_wrapper_fp64.cpp DEPENDENCIES ${complex_obj_deps} ) -add_devicelibs(libsycl-cmath SRC cmath_wrapper.cpp +add_devicelibs(libsycl-cmath + SRC cmath_wrapper.cpp DEPENDENCIES ${cmath_obj_deps}) -add_devicelibs(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp +add_devicelibs(libsycl-cmath-fp64 + SRC cmath_wrapper_fp64.cpp DEPENDENCIES ${cmath_obj_deps} ) -add_devicelibs(libsycl-imf SRC imf_wrapper.cpp +add_devicelibs(libsycl-imf + SRC imf_wrapper.cpp DEPENDENCIES ${imf_obj_deps}) -add_devicelibs(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp +add_devicelibs(libsycl-imf-fp64 + SRC imf_wrapper_fp64.cpp DEPENDENCIES ${imf_obj_deps}) -add_devicelibs(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp +add_devicelibs(libsycl-imf-bf16 + SRC imf_wrapper_bf16.cpp DEPENDENCIES ${imf_obj_deps}) -add_devicelibs(libsycl-bfloat16 SRC bfloat16_wrapper.cpp +add_devicelibs(libsycl-bfloat16 + SRC bfloat16_wrapper.cpp DEPENDENCIES ${cmath_obj_deps}) if(MSVC) - add_devicelibs(libsycl-msvc-math SRC msvc_math.cpp + add_devicelibs(libsycl-msvc-math + SRC msvc_math.cpp DEPENDENCIES ${cmath_obj_deps}) else() - add_devicelibs(libsycl-sanitizer SRC sanitizer_utils.cpp + add_devicelibs(libsycl-sanitizer + SRC sanitizer_utils.cpp DEPENDENCIES ${sanitizer_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code) endif() -add_devicelibs(libsycl-fallback-cassert SRC fallback-cassert.cpp - DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code) -add_devicelibs(libsycl-fallback-cstring SRC fallback-cstring.cpp +add_devicelibs(libsycl-fallback-cassert + SRC fallback-cassert.cpp + DEPENDENCIES ${crt_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) +add_devicelibs(libsycl-fallback-cstring + SRC fallback-cstring.cpp DEPENDENCIES ${crt_obj_deps}) -add_devicelibs(libsycl-fallback-complex SRC fallback-complex.cpp +add_devicelibs(libsycl-fallback-complex + SRC fallback-complex.cpp DEPENDENCIES ${complex_obj_deps}) -add_devicelibs(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp +add_devicelibs(libsycl-fallback-complex-fp64 + SRC fallback-complex-fp64.cpp DEPENDENCIES ${complex_obj_deps}) -add_devicelibs(libsycl-fallback-cmath SRC fallback-cmath.cpp +add_devicelibs(libsycl-fallback-cmath + SRC fallback-cmath.cpp DEPENDENCIES ${cmath_obj_deps}) -add_devicelibs(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp +add_devicelibs(libsycl-fallback-cmath-fp64 + SRC fallback-cmath-fp64.cpp DEPENDENCIES ${cmath_obj_deps}) -add_devicelibs(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp +add_devicelibs(libsycl-fallback-bfloat16 + SRC fallback-bfloat16.cpp DEPENDENCIES ${bfloat16_obj_deps}) -add_devicelibs(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp +add_devicelibs(libsycl-native-bfloat16 + SRC bfloat16_wrapper.cpp DEPENDENCIES ${bfloat16_obj_deps}) # Create dependency and source lists for Intel math function libraries. file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) set(imf_src_dir ${CMAKE_CURRENT_SOURCE_DIR}) -set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp - imf_rounding_op.hpp imf_impl_utils.hpp +set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp imf_utils/integer_misc.cpp imf_utils/float_convert.cpp imf_utils/half_convert.cpp @@ -281,8 +309,7 @@ set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_utils/fp32_round.cpp imf/imf_inline_fp32.cpp imf/imf_fp32_dl.cpp) -set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp - imf_rounding_op.hpp imf_impl_utils.hpp +set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp imf_utils/double_convert.cpp imf_utils/fp64_round.cpp imf/imf_inline_fp64.cpp imf/imf_fp64_dl.cpp) @@ -330,8 +357,7 @@ macro(add_imf_host_cxx_flags_compile_flags_if_supported) foreach(flag ${ARGN}) mangle_name("${flag}" flagname) check_cxx_compiler_flag("${flag}" "CXX_SUPPORTS_${flagname}_FLAG") - add_imf_host_cxx_flags_compile_flags_if(CXX_SUPPORTS_${flagname}_FLAG - ${flag}) + add_imf_host_cxx_flags_compile_flags_if(CXX_SUPPORTS_${flagname}_FLAG ${flag}) endforeach() endmacro() @@ -413,8 +439,11 @@ foreach(dtype IN ITEMS bf16 fp32 fp64) endif() set(tgt_name imf_fallback_${dtype}_${ftype}) - add_lib_imf(${libsycl_name} DIR ${${ftype}_binary_dir} FTYPE - ${ftype} DTYPE ${dtype} EXTRA_OPTS ${${ftype}_device_compile_opts} + add_lib_imf(${libsycl_name} + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_device_compile_opts} TGT_NAME ${tgt_name}) endforeach() endforeach() @@ -425,8 +454,11 @@ foreach(arch IN LISTS devicelib_arch) foreach(dtype IN ITEMS bf16 fp32 fp64) set(tgt_name imf_fallback_${dtype}_bc_${arch}) - add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} ARCH ${arch} DIR - ${bc_binary_dir} FTYPE bc DTYPE ${dtype} + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} + ARCH ${arch} + DIR ${bc_binary_dir} + FTYPE bc + DTYPE ${dtype} EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} TGT_NAME ${tgt_name}) @@ -451,8 +483,10 @@ foreach(arch IN LISTS devicelib_arch) # Run the optimizer on the resulting bitcode file and call prepare_builtins # on it, which strips away debug and arch information. - opt_prepare(devicelib--${arch}.bc LIB_TGT builtins_${arch}.opt IN_FILE - ${builtins_link_lib_${arch}} OUT_DIR ${bc_binary_dir}) + process_bc(devicelib--${arch}.bc + LIB_TGT builtins_${arch}.opt + IN_FILE ${builtins_link_lib_${arch}} + OUT_DIR ${bc_binary_dir}) add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) set(complete_${arch}_libdev $) @@ -466,8 +500,11 @@ foreach(dtype IN ITEMS bf16 fp32 fp64) foreach(ftype IN ITEMS obj obj-new-offload) set(tgt_name imf_fallback_${dtype}_host_${ftype}) - add_lib_imf(fallback-imf-${dtype}-host DIR ${${ftype}_binary_dir} - FTYPE ${ftype} DTYPE ${dtype} EXTRA_OPTS ${${ftype}_host_compile_opts} + add_lib_imf(fallback-imf-${dtype}-host + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_host_compile_opts} TGT_NAME ${tgt_name}) set(wrapper_name imf_wrapper.cpp) From 5a29d6adcc2fa7ce3dbaef2c40c9d86b89a437c8 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 6 Aug 2024 15:14:19 +0100 Subject: [PATCH 10/14] Fix indentation of CMake file --- libclc/cmake/modules/AddLibclc.cmake | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 3b3ef0a2946f2..2212da1d25a41 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -241,14 +241,14 @@ function(process_bc out_file) ${ARG_IN_FILE} DEPENDS libclc::opt ${ARG_IN_FILE} ${ARG_DEPENDENCIES} ) -add_custom_target( ${ARG_LIB_TGT} - ALL DEPENDS ${ARG_LIB_TGT}.bc - ) -set_target_properties( ${ARG_LIB_TGT} - PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc - ) + add_custom_target( ${ARG_LIB_TGT} + ALL DEPENDS ${ARG_LIB_TGT}.bc + ) + set_target_properties( ${ARG_LIB_TGT} + PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc + ) -set( builtins_opt_lib $ ) + set( builtins_opt_lib $ ) # Add prepare target add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} From 1ebfe17ef84c8c047432c1d3d27b09fdf2231660 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 12 Aug 2024 15:23:09 +0100 Subject: [PATCH 11/14] Fix tool names --- libclc/cmake/modules/AddLibclc.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 2212da1d25a41..8aaacd7f7d842 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -237,9 +237,9 @@ function(process_bc out_file) "DEPENDENCIES" ${ARGN}) add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc - COMMAND libclc::opt -o ${ARG_LIB_TGT}.bc + COMMAND ${opt_exe} -o ${ARG_LIB_TGT}.bc ${ARG_IN_FILE} - DEPENDS libclc::opt ${ARG_IN_FILE} ${ARG_DEPENDENCIES} + DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES} ) add_custom_target( ${ARG_LIB_TGT} ALL DEPENDS ${ARG_LIB_TGT}.bc @@ -252,9 +252,9 @@ function(process_bc out_file) # Add prepare target add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} - COMMAND prepare_builtins -o ${ARG_OUT_DIR}/${out_file} + COMMAND ${prepare_builtins_exe} -o ${ARG_OUT_DIR}/${out_file} ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} prepare_builtins ) + DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} ${prepare_builtins_target} ) add_custom_target( prepare-${out_file} ALL DEPENDS ${ARG_OUT_DIR}/${out_file} ) From 8f01fa6deb93e6de5a05f2c2ced253628f118451 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 12 Aug 2024 17:14:21 +0100 Subject: [PATCH 12/14] Add opt flags to libclc and libdev compilation Add the optimizer flags again that were previously removed. --- libclc/cmake/modules/AddLibclc.cmake | 5 +++-- libdevice/cmake/modules/SYCLLibdevice.cmake | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 8aaacd7f7d842..a7871c9d86684 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -234,10 +234,10 @@ function(process_bc out_file) cmake_parse_arguments(ARG "" "LIB_TGT;IN_FILE;OUT_DIR" - "DEPENDENCIES" + "OPT_FLAGS;DEPENDENCIES" ${ARGN}) add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc - COMMAND ${opt_exe} -o ${ARG_LIB_TGT}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${ARG_LIB_TGT}.bc ${ARG_IN_FILE} DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES} ) @@ -354,6 +354,7 @@ macro(add_libclc_builtin_set arch_suffix) LIB_TGT ${builtins_opt_lib_tgt} IN_FILE ${builtins_link_lib} OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + OPT_FLAGS ${ARG_OPT_FLAGS} DEPENDS ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c68fad6161b77..4631d6f5908ec 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -74,11 +74,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) list(APPEND devicelib_arch cuda) set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda" "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") + set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false") endif() if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) list(APPEND devicelib_arch amd) set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" "-Xsycl-target-backend" "--offload-arch=gfx940") + set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false") endif() @@ -486,7 +488,8 @@ foreach(arch IN LISTS devicelib_arch) process_bc(devicelib--${arch}.bc LIB_TGT builtins_${arch}.opt IN_FILE ${builtins_link_lib_${arch}} - OUT_DIR ${bc_binary_dir}) + OUT_DIR ${bc_binary_dir} + OPT_FLAGS ${opt_flags_${arch}}) add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) set(complete_${arch}_libdev $) From ffe12ad28d61bfe19dbb424f316e0b04c6272089 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 13 Aug 2024 10:49:05 +0100 Subject: [PATCH 13/14] Fix dependency error --- libclc/cmake/modules/AddLibclc.cmake | 2 +- libdevice/cmake/modules/SYCLLibdevice.cmake | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index a7871c9d86684..4711b9eb3e3b8 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -355,7 +355,7 @@ macro(add_libclc_builtin_set arch_suffix) IN_FILE ${builtins_link_lib} OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} OPT_FLAGS ${ARG_OPT_FLAGS} - DEPENDS ${builtins_link_lib_tgt}) + DEPENDENCIES ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 4631d6f5908ec..1afc2cf2919e9 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -489,7 +489,8 @@ foreach(arch IN LISTS devicelib_arch) LIB_TGT builtins_${arch}.opt IN_FILE ${builtins_link_lib_${arch}} OUT_DIR ${bc_binary_dir} - OPT_FLAGS ${opt_flags_${arch}}) + OPT_FLAGS ${opt_flags_${arch}} + DEPENDENCIES device_lib_device_${arch}) add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) set(complete_${arch}_libdev $) From c3fba0b6058faab150c7d8814afc1a3f872ec967 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 13 Aug 2024 11:50:12 +0100 Subject: [PATCH 14/14] Make comments and checks in tests more specific --- clang/test/Driver/sycl-offload-amdgcn.cpp | 2 +- libdevice/cmake/modules/SYCLLibdevice.cmake | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index 5bbf0f7b77e8d..cf46fc5b696d5 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -37,7 +37,7 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 10: input, "{{.*}}", ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) // CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906) // CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906) // CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 1afc2cf2919e9..5314c2bc7ad33 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -151,8 +151,9 @@ function(append_to_property list) set_property(GLOBAL PROPERTY ${ARG_PROPERTY_NAME} ${new_property}) endfunction() -# Creates device libaries for each file type. -# Adds bitcode library files additionally for each devicelib_arch target. +# Creates device libaries for all filetypes. +# Adds bitcode library files additionally for each devicelib_arch target and +# adds the created file to an arch specific global property. # # Arguments: # * SRC ...