From e2f86318cf7b4d93cc4c8c334cb72176ee710a7c Mon Sep 17 00:00:00 2001 From: "Sabianin, Maksim" Date: Wed, 27 May 2026 07:19:59 -0700 Subject: [PATCH 1/2] [SYCL][NewOffloadModel] Refactor SYCL pipeline in clang-linker-wrapper Extract SYCL offloading steps into named functions within the sycl namespace: runSYCLOffloadingPipeline, postLinkProcessModule, compileAndBundle and compileSYCLDevice. SYCL modifications of linkDevice function has been extracted to the dedicated function - compileSYCLDevice. Revert linkDevice function to its upstream state. Side effect of this revert: hipspv tests get resolved. Related tracker: CMPLRLLVM-73247 Functions containerizeRawImage and writeOffloadFile are moved so that their location corresponds to the upstream version. --- .../Driver/hipspv-link-static-library.hip | 1 - clang/test/Driver/hipspv-toolchain.hip | 1 - .../ClangLinkerWrapper.cpp | 395 +++++++++++------- 3 files changed, 242 insertions(+), 155 deletions(-) diff --git a/clang/test/Driver/hipspv-link-static-library.hip b/clang/test/Driver/hipspv-link-static-library.hip index a67aa2a7cfe7c..eb114ada49020 100644 --- a/clang/test/Driver/hipspv-link-static-library.hip +++ b/clang/test/Driver/hipspv-link-static-library.hip @@ -3,7 +3,6 @@ // REQUIRES: x86-registered-target // REQUIRES: spirv-registered-target // UNSUPPORTED: system-windows -// XFAIL: * // Create a dummy archive to test SDL linking // RUN: rm -rf %t && mkdir %t diff --git a/clang/test/Driver/hipspv-toolchain.hip b/clang/test/Driver/hipspv-toolchain.hip index 6b5cb5bd6e0cd..d2a7e9a3aeb3a 100644 --- a/clang/test/Driver/hipspv-toolchain.hip +++ b/clang/test/Driver/hipspv-toolchain.hip @@ -1,6 +1,5 @@ // REQUIRES: spirv-registered-target // UNSUPPORTED: system-windows, system-cygwin -// XFAIL: * // RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \ // RUN: --no-offload-new-driver --hip-path=%S/Inputs/hipspv -nohipwrapperinc %s \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 2a9950d2154be..88bb6103b2f2d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -18,6 +18,7 @@ #include "clang/Basic/TargetID.h" #include "clang/Basic/Version.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" @@ -288,42 +289,6 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { return TempFiles.back(); } -Error containerizeRawImage(std::unique_ptr &Img, OffloadKind Kind, - const ArgList &Args) { - llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - if (Kind == OFK_OpenMP && Triple.isSPIRV() && - Triple.getVendor() == llvm::Triple::Intel) - return offloading::intel::containerizeOpenMPSPIRVImage(Img, Triple); - return Error::success(); -} - -// TODO: Remove HasSYCLOffloadKind dependence when aligning with community code. -Expected writeOffloadFile(const OffloadFile &File, - bool HasSYCLOffloadKind = false) { - const OffloadBinary &Binary = *File.getBinary(); - - StringRef Prefix = - sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); - - StringRef BinArch = (Binary.getArch() == "*") ? "any" : Binary.getArch(); - auto TempFileOrErr = createOutputFile( - Prefix + "-" + Binary.getTriple() + "-" + BinArch, - HasSYCLOffloadKind ? getImageKindName(Binary.getImageKind()) : "o"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - Expected> OutputOrErr = - FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size()); - if (!OutputOrErr) - return OutputOrErr.takeError(); - std::unique_ptr Output = std::move(*OutputOrErr); - llvm::copy(Binary.getImage(), Output->getBufferStart()); - if (Error E = Output->commit()) - return std::move(E); - - return *TempFileOrErr; -} - /// Execute the command \p ExecutablePath with the arguments \p Args. Error executeCommands(StringRef ExecutablePath, ArrayRef Args) { if (Verbose || DryRun) @@ -1845,22 +1810,22 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, } } // namespace generic -Expected linkDevice(ArrayRef InputFiles, - const ArgList &Args, bool IsSYCLKind = false, - StringRef SYCLBackendOptions = StringRef()) { +// This part is located here because it uses a functionality from the +// generic::clang +namespace sycl { + +/// For NVPTX, AMDGCN, NativeCPU invokes clang backend. +/// For spir[v]{32,64} performs SPIRV translation (JIT case) + possible AOT +/// compilation (Intel CPU/GPU). +Expected +compileSYCLDevice(ArrayRef InputFiles, const ArgList &Args, + StringRef SYCLBackendOptions = StringRef()) { const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); switch (Triple.getArch()) { case Triple::nvptx: case Triple::nvptx64: case Triple::amdgcn: - case Triple::x86: - case Triple::x86_64: - case Triple::aarch64: - case Triple::aarch64_be: - case Triple::ppc64: - case Triple::ppc64le: - case Triple::systemz: - return generic::clang(InputFiles, Args, IsSYCLKind); + return generic::clang(InputFiles, Args, /*IsSYCLKind*/ true); case Triple::spirv32: case Triple::spirv64: case Triple::spir: @@ -1872,37 +1837,216 @@ Expected linkDevice(ArrayRef InputFiles, inconvertibleErrorCode(), "For SPIR targets, Linking is supported only for JIT compilations " "and AOT compilations for Intel CPUs/GPUs"); - if (IsSYCLKind) { - auto SPVFile = sycl::runLLVMToSPIRVTranslation(InputFiles[0], Args); - if (!SPVFile) - return SPVFile.takeError(); - // TODO(NOM6): Add AOT support for other targets - bool NeedAOTCompile = - (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen || - Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64); - auto AOTFile = (NeedAOTCompile) ? sycl::runAOTCompile(*SPVFile, Args, - SYCLBackendOptions) - : *SPVFile; - if (!AOTFile) - return AOTFile.takeError(); - return NeedAOTCompile ? *AOTFile : *SPVFile; - } - // Return empty file - return StringRef(""); + Expected SPVFile = + sycl::runLLVMToSPIRVTranslation(InputFiles[0], Args); + if (!SPVFile) + return SPVFile.takeError(); + // TODO(NOM6): Add AOT support for other targets + bool NeedAOTCompile = + (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen || + Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64); + Expected AOTFile = + (NeedAOTCompile) + ? sycl::runAOTCompile(*SPVFile, Args, SYCLBackendOptions) + : *SPVFile; + if (!AOTFile) + return AOTFile.takeError(); + return NeedAOTCompile ? *AOTFile : *SPVFile; } - case Triple::loongarch64: - return generic::clang(InputFiles, Args, IsSYCLKind); case Triple::native_cpu: - if (IsSYCLKind) - return generic::clang(InputFiles, Args, IsSYCLKind); + return generic::clang(InputFiles, Args, /*IsSYCLKind*/ true); + default: return createStringError(Triple.getArchName() + - " linking is not supported other than for SYCL"); + " linking is not supported"); + } +} + +/// Function invokes device compilation and bundling for NVPTX and AMDGCN cases. +Expected compileDeviceAndBundle(StringRef ModuleFilePath, + const ArgList &LinkerArgs, + const llvm::Triple &Triple, + StringRef AdditionalCompileOptions) { + SmallVector Files = {ModuleFilePath}; + Expected OutputOrErr = + compileSYCLDevice(Files, LinkerArgs, AdditionalCompileOptions); + if (!OutputOrErr) + return OutputOrErr.takeError(); + + if (Triple.isNVPTX() || Triple.isAMDGCN()) { + StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); + if (Arch.empty()) + Arch = "native"; + + return sycl::bundleDeviceModule(*OutputOrErr, LinkerArgs, Triple, Arch); + } + + return OutputOrErr; +} + +/// Function runs SYCL offloading processing for the linked module. +/// Processing consists of steps: +/// * Invoke sycl-post-link (module split, spec-const lowering, esimd lowering). +/// * Handle -fsycl-embed-ir case (early wrap + compile + transfer output to the +/// given +/// \p WrappedOutputCallback). +/// * Handle NaticeCPU case (early transfer output to the given \p +/// WrappedOutputCallback). +/// * Set Compile/Link options to the output Modules. +/// * Invokes device backend compilation + bundling. +/// +/// \returns The list of the processed Modules. +Expected> postLinkProcessModule( + StringRef ModuleFilePath, const ArgList &LinkerArgs, + const std::pair &CompileLinkOptions, + function_ref WrappedOutputCallback) { + SmallVector CompileArgsSplit; + StringRef(CompileLinkOptions.first).split(CompileArgsSplit, ' '); + bool IsDevicePassedWithSyclTargetBackend = + std::find(CompileArgsSplit.begin(), CompileArgsSplit.end(), "-device") != + CompileArgsSplit.end(); + + SmallVector InputFilesSYCL = {ModuleFilePath}; + Expected> SplitModulesOrErr = + UseSYCLPostLinkTool + ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs, + IsDevicePassedWithSyclTargetBackend) + : sycl::runSYCLPostLinkLibrary(InputFilesSYCL, LinkerArgs, + *SYCLModuleSplitMode); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + + std::vector &SplitModules = *SplitModulesOrErr; + const llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ)); + if ((Triple.isNVPTX() || Triple.isAMDGCN()) && + LinkerArgs.hasArg(OPT_sycl_embed_ir)) { + // When compiling for Nvidia/AMD devices and the user requested the + // IR to be embedded in the application (via option), run the output + // of sycl-post-link (filetable referencing LLVM Bitcode + symbols) + // through the offload wrapper and link the resulting object to the + // application. + Expected OutputFile = sycl::runWrapperAndCompile( + SplitModules, LinkerArgs, /* IsEmbeddedIR */ true); + if (!OutputFile) + return OutputFile.takeError(); + + WrappedOutputCallback(*OutputFile); + } + + // TODO: Take into account Arch values considered as JIT: "native", + // "spir64", "spir", "spirv32" and "spirv64" for SPIR targets. + // For now we only consider NoSubArch target as JIT. + bool IsJIT = + Triple.isSPIROrSPIRV() && Triple.getSubArch() == llvm::Triple::NoSubArch; + if (IsJIT) + std::for_each(SplitModules.begin(), SplitModules.end(), + [&CompileLinkOptions](module_split::SplitModule &M) { + M.CompileOptions = CompileLinkOptions.first; + M.LinkOptions = CompileLinkOptions.second; + }); + + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + Expected OutputOrErr = + compileDeviceAndBundle(SplitModules[I].ModuleFilePath, LinkerArgs, + Triple, CompileLinkOptions.first); + if (!OutputOrErr) + return OutputOrErr.takeError(); + + SplitModules[I].ModuleFilePath = *OutputOrErr; + if (Triple.isNativeCPU()) { + // Add to WrappedOutput directly rather than combining this with + // the usual pipeline because WrappedOutput holds references and + // SplitModules[I].ModuleFilePath will go out of scope too soon. + WrappedOutputCallback(*OutputOrErr); + } + } + + return std::move(SplitModules); +} + +/// Run SYCL offloading pipeline for the given \p InputModules. +/// Here steps consists of: +/// * Link all input Modules. +/// * PostLink process of the linked Module. +/// +/// \p WrappedOutputCallback allows to save output modules early for +/// -fsycl-embed-ir case. +/// +/// \returns A list of split modules. +Expected> runSYCLOffloadingPipeline( + ArrayRef InputModules, const ArgList &LinkerArgs, + const std::pair &CompileLinkOptions, + function_ref WrappedOutputCallback) { + Expected LinkedModuleOrErr = + sycl::linkDevice(InputModules, LinkerArgs); + if (!LinkedModuleOrErr) + return LinkedModuleOrErr.takeError(); + + return postLinkProcessModule(*LinkedModuleOrErr, LinkerArgs, + CompileLinkOptions, WrappedOutputCallback); +} + +} // namespace sycl + +Expected linkDevice(ArrayRef InputFiles, + const ArgList &Args, + uint16_t ActiveOffloadKindMask) { + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + switch (Triple.getArch()) { + case Triple::nvptx: + case Triple::nvptx64: + case Triple::amdgcn: + case Triple::x86: + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::spirv64: + case Triple::systemz: + case Triple::loongarch64: + return generic::clang(InputFiles, Args, ActiveOffloadKindMask); default: return createStringError(Triple.getArchName() + " linking is not supported"); } } +Error containerizeRawImage(std::unique_ptr &Img, OffloadKind Kind, + const ArgList &Args) { + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + if (Kind == OFK_OpenMP && Triple.isSPIRV() && + Triple.getVendor() == llvm::Triple::Intel) + return offloading::intel::containerizeOpenMPSPIRVImage(Img, Triple); + return Error::success(); +} + +// TODO: Remove HasSYCLOffloadKind dependence when aligning with community code. +Expected writeOffloadFile(const OffloadFile &File, + bool HasSYCLOffloadKind = false) { + const OffloadBinary &Binary = *File.getBinary(); + + StringRef Prefix = + sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); + + StringRef BinArch = (Binary.getArch() == "*") ? "any" : Binary.getArch(); + auto TempFileOrErr = createOutputFile( + Prefix + "-" + Binary.getTriple() + "-" + BinArch, + HasSYCLOffloadKind ? getImageKindName(Binary.getImageKind()) : "o"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + Expected> OutputOrErr = + FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + llvm::copy(Binary.getImage(), Output->getBufferStart()); + if (Error E = Output->commit()) + return std::move(E); + + return *TempFileOrErr; +} + // Compile the module to an object file using the appropriate target machine for // the host triple. Expected compileModule(Module &M, OffloadKind Kind) { @@ -2294,26 +2438,35 @@ linkAndWrapDeviceFiles(ArrayRef> LinkerInputFiles, HasNonSYCLOffloadKinds = true; } + auto AppendImageToWrapperOutput = [&WrappedOutput, + &ImageMtx](StringRef ImagePath) { + std::scoped_lock Guard(ImageMtx); + WrappedOutput.push_back(ImagePath); + }; + if (HasSYCLOffloadKind) { Expected> CompileLinkOptionsOrErr = extractSYCLCompileLinkOptions(Input); if (!CompileLinkOptionsOrErr) return CompileLinkOptionsOrErr.takeError(); + std::pair &CompileLinkOptions = + *CompileLinkOptionsOrErr; + // Append device compiler and linker options passed via // -device-compiler= and -device-linker= to clang-linker-warpper, // together with options extracted from the image. StringRef DeviceCompilerArgs = LinkerArgs.getLastArgValue(OPT_compiler_arg_EQ); if (!DeviceCompilerArgs.empty()) { - CompileLinkOptionsOrErr->first += " "; - CompileLinkOptionsOrErr->first += DeviceCompilerArgs; + CompileLinkOptions.first += " "; + CompileLinkOptions.first += DeviceCompilerArgs; } StringRef DeviceLinkerArgs = LinkerArgs.getLastArgValue(OPT_linker_arg_EQ); if (!DeviceLinkerArgs.empty()) { - CompileLinkOptionsOrErr->second += " "; - CompileLinkOptionsOrErr->second += DeviceLinkerArgs; + CompileLinkOptions.second += " "; + CompileLinkOptions.second += DeviceLinkerArgs; } SmallVector InputFiles; @@ -2324,101 +2477,36 @@ linkAndWrapDeviceFiles(ArrayRef> LinkerInputFiles, return FileNameOrErr.takeError(); InputFiles.emplace_back(*FileNameOrErr); } - // Link the input device files using the device linker for SYCL - // offload. - auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs); - if (!TmpOutputOrErr) - return TmpOutputOrErr.takeError(); - SmallVector InputFilesSYCL; - InputFilesSYCL.emplace_back(*TmpOutputOrErr); - - SmallVector Args; - StringRef(CompileLinkOptionsOrErr->first).split(Args, ' '); - bool IsDevicePassedWithSyclTargetBackend = - std::find(Args.begin(), Args.end(), "-device") != Args.end(); - auto SplitModulesOrErr = - UseSYCLPostLinkTool - ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs, - IsDevicePassedWithSyclTargetBackend) - : sycl::runSYCLPostLinkLibrary(InputFilesSYCL, LinkerArgs, - *SYCLModuleSplitMode); - if (!SplitModulesOrErr) - return SplitModulesOrErr.takeError(); - - auto &SplitModules = *SplitModulesOrErr; - const llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); - if (Arch.empty()) - Arch = "native"; - // TODO: Take into account Arch values considered as JIT: "native", - // "spir64", "spir", "spirv32" and "spirv64" for SPIR targets. - // For now we only consider NoSubArch target as JIT. - bool IsJIT = Triple.isSPIROrSPIRV() && - Triple.getSubArch() == llvm::Triple::NoSubArch; - if ((Triple.isNVPTX() || Triple.isAMDGCN()) && - LinkerArgs.hasArg(OPT_sycl_embed_ir)) { - // When compiling for Nvidia/AMD devices and the user requested the - // IR to be embedded in the application (via option), run the output - // of sycl-post-link (filetable referencing LLVM Bitcode + symbols) - // through the offload wrapper and link the resulting object to the - // application. - auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs, - /* IsEmbeddedIR */ true); - if (!OutputFile) - return OutputFile.takeError(); - WrappedOutput.push_back(*OutputFile); - } - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - SmallVector Files = {SplitModules[I].ModuleFilePath}; - auto ClangOutputOrErr = - linkDevice(Files, LinkerArgs, true /* IsSYCLKind */, - CompileLinkOptionsOrErr->first); - if (!ClangOutputOrErr) - return ClangOutputOrErr.takeError(); - if (Triple.isNVPTX() || Triple.isAMDGCN()) { - auto BundledFileOrErr = sycl::bundleDeviceModule( - *ClangOutputOrErr, LinkerArgs, Triple, Arch); - if (!BundledFileOrErr) - return BundledFileOrErr.takeError(); - SplitModules[I].ModuleFilePath = *BundledFileOrErr; - } else { - SplitModules[I].ModuleFilePath = *ClangOutputOrErr; - if (IsJIT) { - SplitModules[I].CompileOptions = CompileLinkOptionsOrErr->first; - SplitModules[I].LinkOptions = CompileLinkOptionsOrErr->second; - } - - if (Triple.isNativeCPU()) { - // Add to WrappedOutput directly rather than combining this with the - // below because WrappedOutput holds references and - // SplitModules[I].ModuleFilePath will go out of scope too soon. - std::scoped_lock Guard(ImageMtx); - WrappedOutput.push_back(*ClangOutputOrErr); - } - } - } + Expected> ModulesOrErr = + sycl::runSYCLOffloadingPipeline(InputFiles, LinkerArgs, + CompileLinkOptions, + AppendImageToWrapperOutput); + if (!ModulesOrErr) + return ModulesOrErr.takeError(); + + std::vector &Modules = *ModulesOrErr; if (OutputSYCLBIN) { SYCLBIN::SYCLBINModuleDesc MD; MD.ArchString = LinkerArgs.getLastArgValue(OPT_arch_EQ); MD.TargetTriple = llvm::Triple{LinkerArgs.getLastArgValue(OPT_triple_EQ)}; - MD.SplitModules = std::move(SplitModules); + MD.SplitModules = std::move(Modules); std::scoped_lock Guard(SYCLBINModulesMtx); SYCLBINModules.emplace_back(std::move(MD)); } else { // TODO(NOM7): Remove this call and use community flow for bundle/wrap - auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); + Expected OutputFile = + sycl::runWrapperAndCompile(Modules, LinkerArgs); if (!OutputFile) return OutputFile.takeError(); // SYCL offload kind images are all ready to be sent to host linker. // TODO: Currently, device code wrapping for SYCL offload happens in a // separate path inside 'linkDevice' call seen above. - // This will eventually be refactored to use the 'common' wrapping logic - // that is used for other offload kinds. - std::scoped_lock Guard(ImageMtx); - WrappedOutput.push_back(*OutputFile); + // This will eventually be refactored to use the 'common' wrapping + // logic that is used for other offload kinds. + AppendImageToWrapperOutput(*OutputFile); } } if (HasNonSYCLOffloadKinds) { @@ -2432,7 +2520,8 @@ linkAndWrapDeviceFiles(ArrayRef> LinkerInputFiles, } // Link the remaining device files using the device linker. - auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); + auto OutputOrErr = + linkDevice(InputFiles, LinkerArgs, ActiveOffloadKindMask); if (!OutputOrErr) return OutputOrErr.takeError(); From 45b6d835f9d79e591055495c7d488845ea4bfaef Mon Sep 17 00:00:00 2001 From: "Sabianin, Maksim" Date: Fri, 29 May 2026 08:38:03 -0700 Subject: [PATCH 2/2] fix typo and retrigger CI --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 88bb6103b2f2d..18189b90a5011 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1889,7 +1889,7 @@ Expected compileDeviceAndBundle(StringRef ModuleFilePath, /// * Handle -fsycl-embed-ir case (early wrap + compile + transfer output to the /// given /// \p WrappedOutputCallback). -/// * Handle NaticeCPU case (early transfer output to the given \p +/// * Handle NativeCPU case (early transfer output to the given \p /// WrappedOutputCallback). /// * Set Compile/Link options to the output Modules. /// * Invokes device backend compilation + bundling.