diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2368fb64d4e59..f0a3e3ca02099 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11066,7 +11066,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, ArgStringList CmdArgs; // Pass the CUDA path to the linker wrapper tool. - for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) { + for (Action::OffloadKind Kind : + {Action::OFK_Cuda, Action::OFK_OpenMP, Action::OFK_SYCL}) { auto TCRange = C.getOffloadToolChains(Kind); for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) { const ToolChain *TC = I.second; diff --git a/clang/test/Driver/linker-wrapper-sycl-win.cpp b/clang/test/Driver/linker-wrapper-sycl-win.cpp index d3b923720fe86..c7c67a74195d9 100644 --- a/clang/test/Driver/linker-wrapper-sycl-win.cpp +++ b/clang/test/Driver/linker-wrapper-sycl-win.cpp @@ -90,10 +90,11 @@ // CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link.exe" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings // CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc // CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}} -// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc +// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]] +// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]] +// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]].bc // CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc // CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o - /// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for AMD) // ------- // Generate .o file as linker wrapper input. @@ -107,6 +108,7 @@ // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link.exe" [[FIRSTLLVMLINKIN:.*]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}} -// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc +// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler.exe"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]] +// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]].bc // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper-sycl.cpp b/clang/test/Driver/linker-wrapper-sycl.cpp index 37c00fde275c7..631cc4a4b8b99 100644 --- a/clang/test/Driver/linker-wrapper-sycl.cpp +++ b/clang/test/Driver/linker-wrapper-sycl.cpp @@ -108,7 +108,9 @@ // CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings // CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc // CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}} -// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]] +// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]] +// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]] +// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]] // CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]] // CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o @@ -125,7 +127,8 @@ // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link" [[FIRSTLLVMLINKIN]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}} -// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]] +// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx803 -input=/dev/null -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]] +// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]] // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]] // CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o @@ -150,7 +153,9 @@ // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}} -// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]] +// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]] +// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]] +// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o @@ -169,6 +174,7 @@ // CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]] // CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]] // CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}} -// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT2:.*]] +// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]] +// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT2:.*]] // CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT2]] // CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 819dbb738913c..4d77874fdca0e 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -188,3 +188,10 @@ // RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" %s 2>&1 \ // RUN: | FileCheck -check-prefix COMMA_FILE %s // COMMA_FILE: clang-offload-packager{{.*}} "--image=file={{.*}}pvc@bdw{{.*}},triple=spir64_gen-unknown-unknown,arch=pvc,bdw,kind=sycl" + +/// Verify that --cuda-path is passed to clang-linker-wrapper for SYCL offload +// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s \ +// RUN: --offload-new-driver 2>&1 \ +// RUN: | FileCheck -check-prefix NVPTX_CUDA_PATH %s +// NVPTX_CUDA_PATH: clang-linker-wrapper{{.*}} "--cuda-path={{.*}}Inputs/CUDA_80/usr/local/cuda" diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index c372157048c7e..b0cfc68ee0f8d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -14,6 +14,7 @@ // //===---------------------------------------------------------------------===// +#include "clang/Basic/Cuda.h" #include "clang/Basic/Version.h" #include "llvm/ADT/MapVector.h" #include "llvm/BinaryFormat/Magic.h" @@ -409,6 +410,46 @@ fatbinary(ArrayRef> InputFiles, return *TempFileOrErr; } + +// ptxas binary +Expected ptxas(StringRef InputFile, const ArgList &Args, + StringRef Arch) { + llvm::TimeTraceScope TimeScope("NVPTX ptxas"); + // NVPTX uses the ptxas program to process assembly files. + Expected PtxasPath = + findProgram("ptxas", {CudaBinaryPath + "/bin"}); + if (!PtxasPath) + return PtxasPath.takeError(); + + llvm::Triple Triple( + Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); + + // Create a new file to write the output to. + auto TempFileOrErr = + createOutputFile(sys::path::filename(ExecutableName), "cubin"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*PtxasPath); + CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); + // Pass -v to ptxas if it was passed to the driver. + if (Args.hasArg(OPT_verbose)) + CmdArgs.push_back("-v"); + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); + if (Args.hasArg(OPT_debug)) + CmdArgs.push_back("-g"); + else + CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); + CmdArgs.push_back("--gpu-name"); + CmdArgs.push_back(Arch); + CmdArgs.push_back("--output-file"); + CmdArgs.push_back(*TempFileOrErr); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*PtxasPath, CmdArgs)) + return std::move(Err); + return *TempFileOrErr; +} } // namespace nvptx namespace amdgcn { @@ -1240,7 +1281,8 @@ static Expected linkDevice(ArrayRef InputFiles, } // namespace sycl namespace generic { -Expected clang(ArrayRef InputFiles, const ArgList &Args) { +Expected clang(ArrayRef InputFiles, const ArgList &Args, + bool IsSYCLKind = false) { llvm::TimeTraceScope TimeScope("Clang"); // Use `clang` to invoke the appropriate device tools. Expected ClangPath = @@ -1276,6 +1318,8 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { if (!Triple.isNVPTX()) CmdArgs.push_back("-Wl,--no-undefined"); + if (IsSYCLKind && Triple.isNVPTX()) + CmdArgs.push_back("-S"); for (StringRef InputFile : InputFiles) CmdArgs.push_back(InputFile); @@ -1369,7 +1413,7 @@ Expected linkDevice(ArrayRef InputFiles, case Triple::ppc64: case Triple::ppc64le: case Triple::systemz: - return generic::clang(InputFiles, Args); + return generic::clang(InputFiles, Args, IsSYCLKind); case Triple::spirv32: case Triple::spirv64: case Triple::spir: @@ -2078,14 +2122,40 @@ Expected> linkAndWrapDeviceFiles( return OutputFile.takeError(); WrappedOutput.push_back(*OutputFile); } - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { SmallVector Files = {SplitModules[I].ModuleFilePath}; - auto LinkedFileFinalOrErr = + StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); + if (Arch.empty()) + Arch = "native"; + SmallVector, 4> BundlerInputFiles; + auto ClangOutputOrErr = linkDevice(Files, LinkerArgs, true /* IsSYCLKind */); - if (!LinkedFileFinalOrErr) - return LinkedFileFinalOrErr.takeError(); - SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr; + if (!ClangOutputOrErr) + return ClangOutputOrErr.takeError(); + if (Triple.isNVPTX()) { + auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString( + clang::StringToCudaArch(Arch))); + auto PtxasOutputOrErr = + nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); + if (!PtxasOutputOrErr) + return PtxasOutputOrErr.takeError(); + BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); + BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); + auto BundledFileOrErr = + nvptx::fatbinary(BundlerInputFiles, LinkerArgs); + if (!BundledFileOrErr) + return BundledFileOrErr.takeError(); + SplitModules[I].ModuleFilePath = *BundledFileOrErr; + } else if (Triple.isAMDGCN()) { + BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); + auto BundledFileOrErr = + amdgcn::fatbinary(BundlerInputFiles, LinkerArgs); + if (!BundledFileOrErr) + return BundledFileOrErr.takeError(); + SplitModules[I].ModuleFilePath = *BundledFileOrErr; + } else { + SplitModules[I].ModuleFilePath = *ClangOutputOrErr; + } } // TODO(NOM7): Remove this call and use community flow for bundle/wrap auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); diff --git a/sycl/test-e2e/NewOffloadDriver/Inputs/aot.cpp b/sycl/test-e2e/NewOffloadDriver/Inputs/aot.cpp new file mode 100644 index 0000000000000..39aed079cd935 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/Inputs/aot.cpp @@ -0,0 +1,71 @@ +//==--- aot.cpp - Simple vector addition (AOT compilation example) --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include + +#include +#include + +constexpr sycl::access::mode sycl_read = sycl::access::mode::read; +constexpr sycl::access::mode sycl_write = sycl::access::mode::write; + +template class Vadd; + +template +void vadd(const std::array &A, const std::array &B, + std::array &C) { + sycl::queue Queue([](sycl::exception_list ExceptionList) { + for (std::exception_ptr ExceptionPtr : ExceptionList) { + try { + std::rethrow_exception(ExceptionPtr); + } catch (sycl::exception &E) { + std::cerr << E.what(); + } catch (...) { + std::cerr << "Unknown async exception was caught." << std::endl; + } + } + }); + + sycl::range<1> numOfItems{N}; + sycl::buffer bufA(A.data(), numOfItems); + sycl::buffer bufB(B.data(), numOfItems); + sycl::buffer bufC(C.data(), numOfItems); + + Queue.submit([&](sycl::handler &cgh) { + sycl::accessor accA{bufA, cgh, sycl::read_only}; + sycl::accessor accB{bufB, cgh, sycl::read_only}; + sycl::accessor accC{bufC, cgh, sycl::write_only}; + + cgh.parallel_for>(numOfItems, [=](sycl::id<1> wiID) { + accC[wiID] = accA[wiID] + accB[wiID]; + }); + }); + + Queue.wait_and_throw(); +} + +int main() { + const size_t array_size = 4; + std::array A = {{1, 2, 3, 4}}, B = {{1, 2, 3, 4}}, C; + std::array D = {{1.f, 2.f, 3.f, 4.f}}, + E = {{1.f, 2.f, 3.f, 4.f}}, F; + vadd(A, B, C); + vadd(D, E, F); + for (unsigned int i = 0; i < array_size; i++) { + if (C[i] != A[i] + B[i]) { + std::cout << "Incorrect result (element " << i << " is " << C[i] << "!\n"; + return 1; + } + if (F[i] != D[i] + E[i]) { + std::cout << "Incorrect result (element " << i << " is " << F[i] << "!\n"; + return 1; + } + } + std::cout << "Correct result!\n"; + return 0; +} diff --git a/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source-second-file.cpp b/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source-second-file.cpp new file mode 100644 index 0000000000000..d5ee857ec4df3 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source-second-file.cpp @@ -0,0 +1,23 @@ +#include "split-per-source.h" + +void runKernelsFromFile2() { + sycl::queue Q; + int Data = 0; + { + sycl::buffer Buf(&Data, sycl::range<1>(1)); + auto KernelID1 = sycl::get_kernel_id(); + auto KB = sycl::get_kernel_bundle( + Q.get_context(), {KernelID1}); + auto Krn = KB.get_kernel(KernelID1); + + std::vector KernelIDStorage = KB.get_kernel_ids(); + assert(KernelIDStorage.size() == 1); + assert(KernelIDStorage[0] == KernelID1); + + Q.submit([&](sycl::handler &Cgh) { + auto Acc = Buf.get_access(Cgh); + Cgh.single_task(Krn, [=]() { Acc[0] = 3; }); + }); + } + assert(Data == 3); +} diff --git a/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source.h b/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source.h new file mode 100644 index 0000000000000..60f8884d61dbf --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/Inputs/split-per-source.h @@ -0,0 +1,7 @@ +#include + +class File1Kern1; +class File1Kern2; +class File2Kern1; + +void runKernelsFromFile2(); diff --git a/sycl/test-e2e/NewOffloadDriver/aot-cpu.cpp b/sycl/test-e2e/NewOffloadDriver/aot-cpu.cpp new file mode 100644 index 0000000000000..9ee6bf8d70122 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/aot-cpu.cpp @@ -0,0 +1,6 @@ +// REQUIRES: opencl-aot, cpu + +// Test with `--offload-new-driver` +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_x86_64 -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp \ +// RUN: -fsycl-dead-args-optimization --offload-new-driver +// RUN: %{run} %t.out diff --git a/sycl/test-e2e/NewOffloadDriver/aot-gpu.cpp b/sycl/test-e2e/NewOffloadDriver/aot-gpu.cpp new file mode 100644 index 0000000000000..3532c97cc634e --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/aot-gpu.cpp @@ -0,0 +1,13 @@ +// REQUIRES: ocloc, gpu +// UNSUPPORTED: cuda || hip +// CUDA does neither support device code splitting nor SPIR. +// Test with `--offload-new-driver` +// +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \ +// RUN: -fsycl-targets=spir64_gen \ +// RUN: -Xsycl-target-backend=spir64_gen \ +// RUN: "-device tgllp" -I %S/Inputs -o %t.out \ +// RUN: %S/split-per-source-main.cpp \ +// RUN: %S/Inputs/split-per-source-second-file.cpp \ +// RUN: -fsycl-dead-args-optimization --offload-new-driver +// RUN: %{run} %t.out diff --git a/sycl/test-e2e/NewOffloadDriver/cpu.cpp b/sycl/test-e2e/NewOffloadDriver/cpu.cpp new file mode 100644 index 0000000000000..ef93d69f8693b --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/cpu.cpp @@ -0,0 +1,20 @@ +//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +// REQUIRES: opencl-aot, cpu + +// Test with `--offload-new-driver` +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -o %t.out +// RUN: %{run} %t.out + +// Test that opencl-aot can handle multiple build options. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out + +// Test that opencl-aot can handle march option. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=avx512" +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=wsm" diff --git a/sycl/test-e2e/NewOffloadDriver/multisource.cpp b/sycl/test-e2e/NewOffloadDriver/multisource.cpp new file mode 100644 index 0000000000000..ec0f3ad1b4569 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/multisource.cpp @@ -0,0 +1,86 @@ +//==--------------- multisource.cpp ----------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Separate kernel sources and host code sources +// Test with `--offload-new-driver` +// RUN: %{build} --offload-new-driver -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL +// RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.kernel.o %t.main.o -o %t.fat +// RUN: %{run} %t.fat + +// Multiple sources with kernel code +// Test with `--offload-new-driver` +// RUN: %{build} --offload-new-driver -c -o %t.init.o -DINIT_KERNEL +// RUN: %{build} --offload-new-driver -c -o %t.calc.o -DCALC_KERNEL +// RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t.fat +// RUN: %{run} %t.fat + +#include + +#include + +using namespace sycl; + +#ifdef MAIN_APP +void init_buf(queue &q, buffer &b, range<1> &r, int i); +#elif INIT_KERNEL +void init_buf(queue &q, buffer &b, range<1> &r, int i) { + q.submit([&](handler &cgh) { + auto B = b.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { B[index] = i; }); + }); +} +#endif + +#ifdef MAIN_APP +void calc_buf(queue &q, buffer &a, buffer &b, buffer &c, + range<1> &r); +#elif CALC_KERNEL +void calc_buf(queue &q, buffer &a, buffer &b, buffer &c, + range<1> &r) { + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for( + r, [=](id<1> index) { C[index] = A[index] - B[index]; }); + }); +} +#endif + +#ifdef MAIN_APP +const size_t N = 100; +int main() { + { + queue q; + + range<1> r(N); + buffer a(r); + buffer b(r); + buffer c(r); + + init_buf(q, a, r, 2); + init_buf(q, b, r, 1); + + calc_buf(q, a, b, c, r); + + auto C = c.get_host_access(); + for (size_t i = 0; i < N; i++) { + if (C[i] != 1) { + std::cout << "Wrong value " << C[i] << " for element " << i + << std::endl; + return -1; + } + } + } + + std::cout << "Done!" << std::endl; + return 0; +} +#endif diff --git a/sycl/test-e2e/NewOffloadDriver/spirv_device_obj_smoke.cpp b/sycl/test-e2e/NewOffloadDriver/spirv_device_obj_smoke.cpp new file mode 100644 index 0000000000000..63351c8cab3c6 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/spirv_device_obj_smoke.cpp @@ -0,0 +1,38 @@ +// UNSUPPORTED: cuda || hip +// Test with `--offload-new-driver` +// RUN: %clangxx -fsycl -fsycl-device-obj=spirv --offload-new-driver -c -o %t.o %s +// RUN: %clangxx -fsycl --offload-new-driver -o %t.out %t.o +// RUN: %{run} %t.out + +// This test verifies SPIR-V based fat objects. + +#include + +int main() { + sycl::buffer Buffer(4); + + sycl::queue Queue; + + sycl::range<1> NumOfWorkItems{Buffer.size()}; + + Queue.submit([&](sycl::handler &cgh) { + sycl::accessor Accessor{Buffer, cgh, sycl::write_only}; + cgh.parallel_for(NumOfWorkItems, [=](sycl::id<1> WIid) { + Accessor[WIid] = WIid.get(0); + }); + }); + + sycl::host_accessor HostAccessor{Buffer, sycl::read_only}; + + bool MismatchFound = false; + for (size_t I = 0; I < Buffer.size(); ++I) { + if (HostAccessor[I] != I) { + std::cout << "The result is incorrect for element: " << I + << " , expected: " << I << " , got: " << HostAccessor[I] + << std::endl; + MismatchFound = true; + } + } + + return MismatchFound; +} diff --git a/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp new file mode 100644 index 0000000000000..1996e93ad3382 --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp @@ -0,0 +1,53 @@ +// RUN: %{build} -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \ +// RUN: --offload-new-driver -fsycl-dead-args-optimization +// RUN: %{run} %t.out +// +// XFAIL: hip_nvidia + +#include "Inputs/split-per-source.h" + +int main() { + sycl::queue Q; + int Data = 0; + + auto KernelID = sycl::get_kernel_id(); + auto KB = sycl::get_kernel_bundle( + Q.get_context(), {KernelID}); + assert(KB.has_kernel(KernelID)); + auto Krn1 = KB.get_kernel(KernelID); + + auto KernelID2 = sycl::get_kernel_id(); + assert(KB.has_kernel(KernelID2)); + auto Krn2 = KB.get_kernel(KernelID2); + + std::vector KernelIDStorage = KB.get_kernel_ids(); + assert(KernelIDStorage.size() == 2); + assert(std::any_of( + KernelIDStorage.begin(), KernelIDStorage.end(), + [&KernelID](const sycl::kernel_id &id) { return id == KernelID; })); + assert(std::any_of( + KernelIDStorage.begin(), KernelIDStorage.end(), + [&KernelID2](const sycl::kernel_id &id) { return id == KernelID2; })); + + { + sycl::buffer Buf(&Data, sycl::range<1>(1)); + Q.submit([&](sycl::handler &Cgh) { + auto Acc = Buf.get_access(Cgh); + Cgh.single_task(Krn1, [=]() { Acc[0] = 1; }); + }); + } + assert(Data == 1); + + { + sycl::buffer Buf(&Data, sycl::range<1>(1)); + Q.submit([&](sycl::handler &Cgh) { + auto Acc = Buf.get_access(Cgh); + Cgh.single_task(Krn2, [=]() { Acc[0] = 2; }); + }); + } + assert(Data == 2); + + runKernelsFromFile2(); + + return 0; +} diff --git a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp new file mode 100644 index 0000000000000..38268ad5efb6f --- /dev/null +++ b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp @@ -0,0 +1,53 @@ +// Test with `--offload-new-driver` +// RUN: %{build} -DSOURCE1 --offload-new-driver -c -o %t1.o +// RUN: %{build} -DSOURCE2 --offload-new-driver -c -o %t2.o +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t1.o %t2.o -o %t.exe +// RUN: %{run} %t.exe + +#ifdef SOURCE1 +#include +#include + +using accT = sycl::accessor; +constexpr int value = 42; + +template +[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc); + +int main() { + sycl::queue q; + int data = 0; + sycl::buffer buf{&data, {1}}; + if (q.get_device().has(sycl::aspect::cpu)) { + q.submit([&](sycl::handler &cgh) { + accT acc{buf, cgh}; + cgh.single_task([=] { func(acc); }); + }).wait_and_throw(); + } else if (q.get_device().has(sycl::aspect::gpu)) { + q.submit([&](sycl::handler &cgh) { + accT acc{buf, cgh}; + cgh.single_task([=] { func(acc); }); + }).wait_and_throw(); + } + std::cout << "OK" << std::endl; +} + +#endif // SOURCE1 + +#ifdef SOURCE2 +#include + +constexpr int value = 42; + +using accT = sycl::accessor; + +template +[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc); +template <> SYCL_EXTERNAL void func(const accT &acc) { + acc[0] = value; +} +template <> SYCL_EXTERNAL void func(const accT &acc) { + acc[0] = value; +} + +#endif // SOURCE2