diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f90ba124e5a09..7d331a1fb2982 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11486,6 +11486,12 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fsycl_embed_ir)) CmdArgs.push_back(Args.MakeArgString("-sycl-embed-ir")); + if (Args.hasFlag(options::OPT_fsycl_allow_device_image_dependencies, + options::OPT_fno_sycl_allow_device_image_dependencies, + false)) + CmdArgs.push_back( + Args.MakeArgString("-sycl-allow-device-image-dependencies")); + // Formulate and add any offload-wrapper and AOT specific options. These // are additional options passed in via -Xsycl-target-linker and // -Xsycl-target-backend. diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index fbadad2863360..b6732b3e9312e 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -195,3 +195,19 @@ // RUN: --offload-new-driver 2>&1 \ // RUN: | FileCheck -check-prefix NVPTX_CUDA_PATH %s // NVPTX_CUDA_PATH: clang-linker-wrapper{{.*}} "--cuda-path={{.*}}Inputs/CUDA_80/usr/local/cuda" + +/// Check for -sycl-allow-device-image-dependencies transmission to clang-linker-wrapper tool +// RUN: %clangxx -fsycl -### --offload-new-driver \ +// RUN: -fsycl-allow-device-image-dependencies %s 2>&1 \ +// RUN: | FileCheck -check-prefix CHECK_DYNAMIC_LINKING %s +// CHECK_DYNAMIC_LINKING: clang-linker-wrapper{{.*}} "-sycl-allow-device-image-dependencies" + +/// Check that -sycl-allow-device-image-dependencies is not passed to clang-linker-wrapper tool +// RUN: %clangxx -fsycl -### --offload-new-driver \ +// RUN: -fno-sycl-allow-device-image-dependencies %s 2>&1 \ +// RUN: | FileCheck -check-prefix CHECK_NO_DYNAMIC_LINKING %s + +/// Check that -sycl-allow-device-image-dependencies is not passed to clang-linker-wrapper tool +// RUN: %clangxx -fsycl -### --offload-new-driver %s 2>&1 \ +// RUN: | FileCheck -check-prefix CHECK_NO_DYNAMIC_LINKING %s +// CHECK_NO_DYNAMIC_LINKING-NOT: clang-linker-wrapper{{.*}} "-sycl-allow-device-image-dependencies" diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index ec883c1091196..1655f58169836 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -676,6 +676,7 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args, if ((!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs, OPT_sycl_remove_unused_external_funcs, false) && !SYCLNativeCPU) && + !Args.hasArg(OPT_sycl_allow_device_image_dependencies) && !Triple.isNVPTX() && !Triple.isAMDGPU()) PostLinkArgs.push_back("-emit-only-kernels-as-entry-points"); diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 60a13b23ba306..1885fb430b2bd 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -243,3 +243,8 @@ Flags<[WrapperOnlyOption]>, HelpText<"Embed LLVM IR for runtime kernel fusion"> def sycl_dump_device_code_EQ : Joined<["--", "-"], "sycl-dump-device-code=">, Flags<[WrapperOnlyOption]>, HelpText<"Path to the folder where the tool dumps SPIR-V device code. Other formats aren't dumped.">; + +// Options to enable/disable device dynamic linking. +def sycl_allow_device_image_dependencies : Flag<["--", "-"], "sycl-allow-device-image-dependencies">, + Flags<[WrapperOnlyOption, HelpHidden]>, + HelpText<"Allow dependencies between device code images">; diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.cpp new file mode 100644 index 0000000000000..34d1e3bb488f8 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.cpp @@ -0,0 +1,13 @@ +#define A_EXPORT +#include "a.hpp" +#include "b.hpp" +#include + +A_DECLSPEC SYCL_EXTERNAL int levelA(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + val ^= 0x1234; +#endif + val = levelB(val); + return val |= (0xA << 0); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.hpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.hpp new file mode 100644 index 0000000000000..ca9320c0e1fdd --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/a.hpp @@ -0,0 +1,13 @@ +#include + +#if defined(MAKE_DLL) +#ifdef A_EXPORT +#define A_DECLSPEC __declspec(dllexport) +#else +#define A_DECLSPEC __declspec(dllimport) +#endif +#else +#define A_DECLSPEC +#endif + +A_DECLSPEC SYCL_EXTERNAL int levelA(int val); diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.cpp new file mode 100644 index 0000000000000..5dddf5b5311d6 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.cpp @@ -0,0 +1,13 @@ +#define B_EXPORT +#include "b.hpp" +#include "c.hpp" +#include + +B_DECLSPEC SYCL_EXTERNAL int levelB(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + val ^= 0x2345; +#endif + val = levelC(val); + return val |= (0xB << 4); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.hpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.hpp new file mode 100644 index 0000000000000..019f1ccd19616 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/b.hpp @@ -0,0 +1,13 @@ +#include + +#if defined(MAKE_DLL) +#ifdef B_EXPORT +#define B_DECLSPEC __declspec(dllexport) +#else +#define B_DECLSPEC __declspec(dllimport) +#endif +#else +#define B_DECLSPEC +#endif + +B_DECLSPEC SYCL_EXTERNAL int levelB(int val); diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.cpp new file mode 100644 index 0000000000000..247be679882e4 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.cpp @@ -0,0 +1,13 @@ +#define C_EXPORT +#include "c.hpp" +#include "d.hpp" +#include + +C_DECLSPEC SYCL_EXTERNAL int levelC(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + val ^= 0x3456; +#endif + val = levelD(val); + return val |= (0xC << 8); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.hpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.hpp new file mode 100644 index 0000000000000..bc189ca5cf175 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/c.hpp @@ -0,0 +1,13 @@ +#include + +#if defined(MAKE_DLL) +#ifdef C_EXPORT +#define C_DECLSPEC __declspec(dllexport) +#else +#define C_DECLSPEC __declspec(dllimport) +#endif +#else +#define C_DECLSPEC +#endif + +C_DECLSPEC SYCL_EXTERNAL int levelC(int val); diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.cpp new file mode 100644 index 0000000000000..ca3dc79e5218e --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.cpp @@ -0,0 +1,11 @@ +#define D_EXPORT +#include "d.hpp" +#include + +D_DECLSPEC SYCL_EXTERNAL int levelD(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + val ^= 0x4567; +#endif + return val |= (0xD << 12); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.hpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.hpp new file mode 100644 index 0000000000000..ae865c0ad3a11 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/d.hpp @@ -0,0 +1,13 @@ +#include + +#if defined(MAKE_DLL) +#ifdef D_EXPORT +#define D_DECLSPEC __declspec(dllexport) +#else +#define D_DECLSPEC __declspec(dllimport) +#endif +#else +#define D_DECLSPEC +#endif + +D_DECLSPEC SYCL_EXTERNAL int levelD(int val); diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.cpp new file mode 100644 index 0000000000000..f2a0859b17477 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.cpp @@ -0,0 +1,26 @@ +#include "a.hpp" +#include +#include +#define EXPORT +#include "wrapper.hpp" + +using namespace sycl; + +class ExeKernel; + +int wrapper() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] = levelA(acc[0]); }); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val != 0xDCBA) + return (1); + return (0); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.hpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.hpp new file mode 100644 index 0000000000000..8c5d4d8a5c123 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/Inputs/wrapper.hpp @@ -0,0 +1,8 @@ +#if defined(_WIN32) +#ifdef EXPORT +__declspec(dllexport) +#else +__declspec(dllimport) +#endif +#endif + int wrapper(); diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp new file mode 100644 index 0000000000000..ff75d442cb892 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp @@ -0,0 +1,45 @@ +// Test -fsycl-allow-device-image-dependencies with dynamic libraries. + +// UNSUPPORTED: cuda || hip +// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. + +// DEFINE: %{dynamic_lib_options} = -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs %if windows %{-DMAKE_DLL %} +// DEFINE: %{dynamic_lib_suffix} = %if windows %{dll%} %else %{so%} + +// RUN: %clangxx --offload-new-driver %{dynamic_lib_options} %S/Inputs/d.cpp -o %T/libdevice_d.%{dynamic_lib_suffix} +// RUN: %clangxx --offload-new-driver %{dynamic_lib_options} %S/Inputs/c.cpp %if windows %{%T/libdevice_d.lib%} -o %T/libdevice_c.%{dynamic_lib_suffix} +// RUN: %clangxx --offload-new-driver %{dynamic_lib_options} %S/Inputs/b.cpp %if windows %{%T/libdevice_c.lib%} -o %T/libdevice_b.%{dynamic_lib_suffix} +// RUN: %clangxx --offload-new-driver %{dynamic_lib_options} %S/Inputs/a.cpp %if windows %{%T/libdevice_b.lib%} -o %T/libdevice_a.%{dynamic_lib_suffix} + +// RUN: %{build} --offload-new-driver -fsycl-allow-device-image-dependencies -I %S/Inputs -o %t.out \ +// RUN: %if windows \ +// RUN: %{%T/libdevice_a.lib%} \ +// RUN: %else \ +// RUN: %{-L%T -ldevice_a -ldevice_b -ldevice_c -ldevice_d -Wl,-rpath=%T%} + +// RUN: %{run} %t.out + +#include "a.hpp" +#include +#include + +using namespace sycl; + +class ExeKernel; + +int main() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] = levelA(acc[0]); }); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val != 0xDCBA) + return (1); + return (0); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp new file mode 100644 index 0000000000000..74758a837cd46 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp @@ -0,0 +1,286 @@ +// Ensure -fsycl-allow-device-dependencies can work with free function kernels. + +// REQUIRES: aspect-usm_shared_allocations +// RUN: %{build} -o %t.out --offload-new-driver -fsycl-allow-device-image-dependencies +// RUN: %{run} %t.out + +// The name mangling for free function kernels currently does not work with PTX. +// UNSUPPORTED: cuda +// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. + +// XFAIL: hip_amd +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742 + +#include +#include +#include +#include + +using namespace sycl; + +void printUSM(int *usmPtr, int size) { + std::cout << "usmPtr[] = {"; + for (int i = 0; i < size; i++) { + std::cout << usmPtr[i] << ", "; + } + std::cout << "}\n"; +} + +bool checkUSM(int *usmPtr, int size, int *Result) { + bool Pass = true; + for (int i = 0; i < size; i++) { + if (usmPtr[i] != Result[i]) { + Pass = false; + break; + } + } + if (Pass) + return true; + + std::cout << "Expected = {"; + for (int i = 0; i < size; i++) { + std::cout << Result[i] << ", "; + } + std::cout << "}\n"; + std::cout << "Result = {"; + for (int i = 0; i < size; i++) { + std::cout << usmPtr[i] << ", "; + } + std::cout << "}\n"; + return false; +} + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::single_task_kernel)) void ff_0(int *ptr, + int start, + int end) { + for (int i = start; i <= end; i++) + ptr[i] = start + end; +} + +bool test_0(queue Queue) { + constexpr int Range = 10; + int *usmPtr = malloc_shared(Range, Queue); + int start = 3; + int end = 5; + int Result[Range] = {0, 0, 0, 8, 8, 8, 0, 0, 0, 0}; + range<1> R1{Range}; + + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.single_task([=]() { + for (int i = start; i <= end; i++) + usmPtr[i] = start + end; + }); + }); + Queue.wait(); + bool PassA = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 0a: " << (PassA ? "PASS" : "FAIL") << std::endl; + + bool PassB = false; + // TODO: Avoid using __SYCL_DEVICE_ONLY__ or give rationale with a comment +#ifndef __SYCL_DEVICE_ONLY__ + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + kernel_id Kernel_id = ext::oneapi::experimental::get_kernel_id(); + kernel Kernel = Bundle.get_kernel(Kernel_id); + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.set_arg(0, usmPtr); + Handler.set_arg(1, start); + Handler.set_arg(2, end); + Handler.single_task(Kernel); + }); + Queue.wait(); + PassB = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 0b: " << (PassB ? "PASS" : "FAIL") << std::endl; + + free(usmPtr, Queue); +#endif + return PassA && PassB; +} + +// Overloaded free function definition. +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void ff_1(int *ptr, int start, int end) { + nd_item<1> Item = ext::oneapi::this_work_item::get_nd_item<1>(); + id<1> GId = Item.get_global_id(); + ptr[GId.get(0)] = GId.get(0) + start + end; +} + +bool test_1(queue Queue) { + constexpr int Range = 10; + int *usmPtr = malloc_shared(Range, Queue); + int start = 3; + int Result[Range] = {13, 14, 15, 16, 17, 18, 19, 20, 21, 22}; + nd_range<1> R1{{Range}, {1}}; + + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.parallel_for(R1, [=](nd_item<1> Item) { + id<1> GId = Item.get_global_id(); + usmPtr[GId.get(0)] = GId.get(0) + start + Range; + }); + }); + Queue.wait(); + bool PassA = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 1a: " << (PassA ? "PASS" : "FAIL") << std::endl; + + bool PassB = false; + // TODO: Avoid using __SYCL_DEVICE_ONLY__ or give rationale with a comment +#ifndef __SYCL_DEVICE_ONLY__ + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + kernel_id Kernel_id = ext::oneapi::experimental::get_kernel_id<( + void (*)(int *, int, int))ff_1>(); + kernel Kernel = Bundle.get_kernel(Kernel_id); + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.set_arg(0, usmPtr); + Handler.set_arg(1, start); + Handler.set_arg(2, Range); + Handler.parallel_for(R1, Kernel); + }); + Queue.wait(); + PassB = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 1b: " << (PassB ? "PASS" : "FAIL") << std::endl; + + free(usmPtr, Queue); +#endif + return PassA && PassB; +} + +// Overloaded free function definition. +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<2>)) +void ff_1(int *ptr, int start) { + int(&ptr2D)[4][4] = *reinterpret_cast(ptr); + nd_item<2> Item = ext::oneapi::this_work_item::get_nd_item<2>(); + id<2> GId = Item.get_global_id(); + id<2> LId = Item.get_local_id(); + ptr2D[GId.get(0)][GId.get(1)] = LId.get(0) + LId.get(1) + start; +} + +bool test_2(queue Queue) { + constexpr int Range = 16; + int *usmPtr = malloc_shared(Range, Queue); + int value = 55; + int Result[Range] = {55, 56, 55, 56, 56, 57, 56, 57, + 55, 56, 55, 56, 56, 57, 56, 57}; + nd_range<2> R2{range<2>{4, 4}, range<2>{2, 2}}; + + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.parallel_for(R2, [=](nd_item<2> Item) { + int(&ptr2D)[4][4] = *reinterpret_cast(usmPtr); + id<2> GId = Item.get_global_id(); + id<2> LId = Item.get_local_id(); + ptr2D[GId.get(0)][GId.get(1)] = LId.get(0) + LId.get(1) + value; + }); + }); + Queue.wait(); + bool PassA = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 2a: " << (PassA ? "PASS" : "FAIL") << std::endl; + + bool PassB = false; + // TODO: Avoid using __SYCL_DEVICE_ONLY__ or give rationale with a comment +#ifndef __SYCL_DEVICE_ONLY__ + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + kernel_id Kernel_id = + ext::oneapi::experimental::get_kernel_id<(void (*)(int *, int))ff_1>(); + kernel Kernel = Bundle.get_kernel(Kernel_id); + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.set_arg(0, usmPtr); + Handler.set_arg(1, value); + Handler.parallel_for(R2, Kernel); + }); + Queue.wait(); + PassB = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 2b: " << (PassB ? "PASS" : "FAIL") << std::endl; + + free(usmPtr, Queue); +#endif + return PassA && PassB; +} + +// Templated free function definition. +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<2>)) +void ff_3(T *ptr, T start) { + int(&ptr2D)[4][4] = *reinterpret_cast(ptr); + nd_item<2> Item = ext::oneapi::this_work_item::get_nd_item<2>(); + id<2> GId = Item.get_global_id(); + id<2> LId = Item.get_local_id(); + ptr2D[GId.get(0)][GId.get(1)] = LId.get(0) + LId.get(1) + start; +} + +// Explicit instantiation with "int*". +template void ff_3(int *ptr, int start); + +bool test_3(queue Queue) { + constexpr int Range = 16; + int *usmPtr = malloc_shared(Range, Queue); + int value = 55; + int Result[Range] = {55, 56, 55, 56, 56, 57, 56, 57, + 55, 56, 55, 56, 56, 57, 56, 57}; + nd_range<2> R2{range<2>{4, 4}, range<2>{2, 2}}; + + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.parallel_for(R2, [=](nd_item<2> Item) { + int(&ptr2D)[4][4] = *reinterpret_cast(usmPtr); + id<2> GId = Item.get_global_id(); + id<2> LId = Item.get_local_id(); + ptr2D[GId.get(0)][GId.get(1)] = LId.get(0) + LId.get(1) + value; + }); + }); + Queue.wait(); + bool PassA = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 3a: " << (PassA ? "PASS" : "FAIL") << std::endl; + + bool PassB = false; + // TODO: Avoid using __SYCL_DEVICE_ONLY__ or give rationale with a comment +#ifndef __SYCL_DEVICE_ONLY__ + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + kernel_id Kernel_id = ext::oneapi::experimental::get_kernel_id<( + void (*)(int *, int))ff_3>(); + kernel Kernel = Bundle.get_kernel(Kernel_id); + memset(usmPtr, 0, Range * sizeof(int)); + Queue.submit([&](handler &Handler) { + Handler.set_arg(0, usmPtr); + Handler.set_arg(1, value); + Handler.parallel_for(R2, Kernel); + }); + Queue.wait(); + PassB = checkUSM(usmPtr, Range, Result); + // TODO: Avoid printing anything if test passes to reduce I/O. + std::cout << "Test 3b: " << (PassB ? "PASS" : "FAIL") << std::endl; + + free(usmPtr, Queue); +#endif + return PassA && PassB; +} + +int main() { + queue Queue; + + bool Pass = true; + Pass &= test_0(Queue); + Pass &= test_1(Queue); + Pass &= test_2(Queue); + Pass &= test_3(Queue); + + return Pass ? 0 : 1; +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp new file mode 100644 index 0000000000000..949063d62b0c1 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp @@ -0,0 +1,28 @@ +// REQUIRES: aspect-fp64 +// UNSUPPORTED: hip || cuda +// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. + +// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} + +// RUN: %{build} --offload-new-driver -fsycl-allow-device-image-dependencies -fsycl-device-lib-jit-link %{mathflags} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +using namespace sycl; + +// Check that device lib dependencies are resolved with +// -fsycl-allow-device-image-dependencies. +// TODO this test will become redundant once +// -fsycl-allow-device-image-dependencies is enabled by default. +int main() { + range<1> Range{1}; + queue q; + buffer buffer1(Range); + q.submit([&](sycl::handler &cgh) { + auto Acc = buffer1.get_access(cgh); + cgh.single_task([=]() { Acc[0] = std::acosh(1.0); }); + }); + return 0; +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp new file mode 100644 index 0000000000000..9e71e88a35422 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp @@ -0,0 +1,36 @@ +// Test -fsycl-allow-device-image-dependencies with objects. + +// UNSUPPORTED: cuda || hip +// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. + +// RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o +// RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o +// RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/c.cpp -I %S/Inputs -c -o %t_c.o +// RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/d.cpp -I %S/Inputs -c -o %t_d.o +// RUN: %{build} --offload-new-driver -fsycl-allow-device-image-dependencies %t_a.o %t_b.o %t_c.o %t_d.o -I %S/Inputs -o %t.out +// RUN: %{run} %t.out + +#include "a.hpp" +#include +#include + +using namespace sycl; + +class ExeKernel; + +int main() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] = levelA(acc[0]); }); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val != 0xDCBA) + return (1); + return (0); +} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp new file mode 100644 index 0000000000000..cde5f7cdbaad9 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp @@ -0,0 +1,25 @@ +// Test -fsycl-allow-device-image-dependencies with a single dynamic library on +// Windows and Linux. + +// UNSUPPORTED: cuda || hip +// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. + +// RUN: %clangxx --offload-new-driver -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs \ +// RUN: %S/Inputs/a.cpp \ +// RUN: %S/Inputs/b.cpp \ +// RUN: %S/Inputs/c.cpp \ +// RUN: %S/Inputs/d.cpp \ +// RUN: %S/Inputs/wrapper.cpp \ +// RUN: -o %if windows %{%T/device_single.dll%} %else %{%T/libdevice_single.so%} + +// RUN: %{build} --offload-new-driver -I%S/Inputs -o %t.out \ +// RUN: %if windows \ +// RUN: %{%T/device_single.lib%} \ +// RUN: %else \ +// RUN: %{-L%T -ldevice_single -Wl,-rpath=%T%} + +// RUN: %{run} %t.out + +#include "wrapper.hpp" + +int main() { return (wrapper()); }