diff --git a/sycl/doc/SYCLEnvironmentVariables.md b/sycl/doc/SYCLEnvironmentVariables.md index 53198ec2eb525..700b0fac6fc6b 100644 --- a/sycl/doc/SYCLEnvironmentVariables.md +++ b/sycl/doc/SYCLEnvironmentVariables.md @@ -18,6 +18,7 @@ subject to change. Do not rely on these variables in production code. | SYCL_DUMP_IMAGES | Any(*) | Dump device image binaries to file. Control has no effect if SYCL_USE_KERNEL_SPV is set. | | SYCL_PRINT_EXECUTION_GRAPH | Described [below](#sycl_print_execution_graph-options) | Print execution graph to DOT text file. | | SYCL_THROW_ON_BLOCK | Any(*) | Throw an exception on attempt to wait for a blocked command. | +| SYCL_DEVICELIB_INHIBIT_NATIVE | String of device library extensions (separated by a whitespace) | Do not rely on device native support for devicelib extensions listed in this option. | `(*) Note: Any means this environment variable is effective when set to any non-null value.` diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/C-CXX-StandardLibrary.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/C-CXX-StandardLibrary.rst new file mode 100644 index 0000000000000..fff1d75cf735b --- /dev/null +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/C-CXX-StandardLibrary.rst @@ -0,0 +1,162 @@ +C and C++ Standard libraries support +=================================== + +This extension enables a set of functions from the C and C++ standard +libraries, and allows to use them in SYCL device code. + +Function declarations are taken from the standard headers (e.g. from + or ), and the corresponding header has to be +explicitly included in user code. + +Implementation requires a special device library to be linked with a +SYCL program. The library should match the C or C++ standard library +used to compile the program: + +For example, on Linux with GNU glibc: +.. code: + clang++ -fsycl -c main.cpp -o main.o + clang++ -fsycl main.o $(SYCL_INSTALL)/lib/libsycl-glibc.o -o a.out + +or, in case of Windows: +.. code: + clang++ -fsycl -c main.cpp -o main.obj + clang++ -fsycl main.obj %SYCL_INSTALL%/lib/libsycl-msvc.o -o a.exe + +List of supported functions from C standard library: + - assert macro (from or ) + +NOTE: only the GNU glibc and Microsoft C libraries are currently +supported. + +Example of usage +================ + +.. code: c++ + #include + #include + + template + void simple_vadd(const std::array& VA, const std::array& VB, + std::array& VC) { + // ... + cl::sycl::range<1> numOfItems{N}; + cl::sycl::buffer bufferA(VA.data(), numOfItems); + cl::sycl::buffer bufferB(VB.data(), numOfItems); + cl::sycl::buffer bufferC(VC.data(), numOfItems); + + deviceQueue.submit([&](cl::sycl::handler& cgh) { + auto accessorA = bufferA.template get_access(cgh); + auto accessorB = bufferB.template get_access(cgh); + auto accessorC = bufferC.template get_access(cgh); + + cgh.parallel_for>(numOfItems, + [=](cl::sycl::id<1> wiID) { + accessorC[wiID] = accessorA[wiID] + accessorB[wiID]; + assert(accessorC[wiID] > 0 && "Invalid value"); + }); + }); + deviceQueue.wait_and_throw(); + } + +Frontend +======== + +Once the system header is included, the corresponding functions can be +used in SYCL device code. This results in a handful of unresolved +functions in LLVM IR after clang: + +.. code: + ; Function Attrs: noreturn nounwind + declare dso_local spir_func void @__assert_fail(i8 addrspace(4)*, i8 addrspace(4)*, i32, i8 addrspace(4)*) + + [...] + cond.false: + call spir_func void @__assert_fail([...]) + unreachable + +The C and C++ specifications do not define names and signatures of the +functions from libc implementation that are used for a particular +function. For example, the `assert` macro: + + - in Glibc and musl libraries it expands to `__assert_fail` + - in MSVC library it expands to `_wassert` + - in newlib library it expands to `__assert_func` + +This makes it difficult to handle all possible cases in device +compilers. In order to facilitate porting to new platforms, and to +avoid imposing a lot of boilerplate code in *every* device compiler, +wrapper libraries are provided with the SYCL compiler that "lower" +libc implementation-specific functions into a stable set of functions, +that can be later handled by a device compiler. + +.. code: + clang++ -fsycl -c main.cpp -o main.o + clang++ -fsycl main.o $(SYCL_INSTALL)/lib/libsycl-glibc.o -o a.out + +This `libsycl-glibc.o` is one of these wrapper libraries: it provides +definitions for glibc specific library function, and these definitions +call the corresponding functions from `__devicelib_*` set of +functions. + +For example, `__assert_fail` from IR above gets transformed into: +.. code: + ; Function Attrs: noreturn nounwind + declare dso_local spir_func void @__devicelib_assert_fail(i8 addrspace(4)*, i8 addrspace(4)*, i32, i8 addrspace(4)*) + + ; Function Attrs: noreturn nounwind + define dso_local spir_func void @__assert_fail(i8 addrspace(4)*, i8 addrspace(4)*, i32, i8 addrspace(4)*) { + call spir_func void @__devicelib_assert_fail([...]) + } + + [...] + cond.false: + call spir_func void @__assert_fail([...]) + unreachable + +A single wrapper object provides function wrappers for *all* supported +library functions. Every supported C library implementation (MSVC or +glibc) has its own wrapper library object: + + - libsycl-glibc.o + - libsycl-msvc.o + +SPIR-V +====== + +Standard library functions are represented as external (import) +functions in SPIR-V: + +.. code: + 8 Decorate 67 LinkageAttributes "__devicelib_assert_fail" Import + ... + 2 Label 846 + 8 FunctionCall 63 864 67 855 857 863 859 + 1 Unreachable + +Device compiler +=============== + +Device compiler is free to implement these `__devicelib_*` functions. +In order to indicate support for a particular set of functions, +underlying runtime have to support the corresponding OpenCL (PI) +extension. See ``DeviceLibExtensions.rst`` for a list of supported +functions and corresponding extensions. + +Fallback implementation +======================= + +If a device compiler does not indicate "native" support for a +particular function, a fallback library is linked at JIT time by the +SYCL Runtime. This library is distributed with the SYCL Runtime and +resides in the same directory as the `libsycl.so` or `sycl.dll`. + +A fallback library is implemented as a device-agnostic SPIR-V program, +and it is supposed to work for any device that supports SPIR-V. + +Every set of functions is implemented in a separate fallback +library. For example, a fallback for `cl_intel_devicelib_cassert` +extension is provided as `libsycl-fallback-cassert.spv` + +NOTE that AOT compilation is not yet supported. Driver will have to +check for extension support and link the corresponding SPIR-V fallback +implementation, but this is not implemented yet. diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst new file mode 100644 index 0000000000000..ef896f2f55adc --- /dev/null +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -0,0 +1,34 @@ +Device library extensions +=================================== + +Device compiler that indicates support for a particular extension is +supposed to support *all* the corresponding functions. + +cl_intel_devicelib_cassert +========================== + +.. code: + void __devicelib_assert_fail(__generic const char *expr, + __generic const char *file, + int32_t line, + __generic const char *func, + size_t gid0, size_t gid1, size_t gid2, + size_t lid0, size_t lid1, size_t lid2); +Semantic: +the function is called when an assertion expression `expr` is false, +and it indicates that a program does not execute as expected. +The function should print a message containing the information +provided in the arguments. In addition to that, the function is free +to terminate the current kernel invocation. + +Arguments: + + - `expr` is a string representation of the assertion condition + - `file` and `line` are the source code location of the assertion + - `func` (optional, may be NULL) name of the function containing the assertion + - `gidX` current work-item global id + - `lidX` current work-item local id + +Example of a message: +.. code: + foo.cpp:42: void foo(int): global id: [0,0,0], local id: [0,0,0] Assertion `buf[wiID] == 0 && "Invalid value"` failed. diff --git a/sycl/include/CL/sycl/detail/context_impl.hpp b/sycl/include/CL/sycl/detail/context_impl.hpp index 2a31c8e90f9c2..8b0fbbe6f2b08 100644 --- a/sycl/include/CL/sycl/detail/context_impl.hpp +++ b/sycl/include/CL/sycl/detail/context_impl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,12 @@ class context_impl { /// @return an instance of raw plug-in context handle. const RT::PiContext &getHandleRef() const; + /// Unlike `get_info', this function returns a + /// reference. + const vector_class &getDevices() const { + return MDevices; + } + /// Gets cached programs. /// /// @return a map of cached programs. @@ -122,6 +129,23 @@ class context_impl { /// /// @return a pointer to USM dispatcher. std::shared_ptr getUSMDispatch() const; + + /// In contrast to user programs, which are compiled from user code, library + /// programs come from the SYCL runtime. They are identified by the + /// corresponding extension: + /// + /// cl_intel_devicelib_assert -> # + /// cl_intel_devicelib_complex -> # + /// etc. + /// + /// See `doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst' for + /// more details. + /// + /// @returns a map with device library programs. + std::map &getCachedLibPrograms() { + return MCachedLibPrograms; + } + private: async_handler MAsyncHandler; vector_class MDevices; @@ -132,6 +156,7 @@ class context_impl { std::map MCachedPrograms; std::map> MCachedKernels; std::shared_ptr MUSMDispatch; + std::map MCachedLibPrograms; }; } // namespace detail diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 93e55e5f8b61b..e2974430d7a7e 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -91,7 +91,8 @@ typedef enum { PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, PI_DEVICE_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE + PI_DEVICE_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS } _pi_device_info; // TODO: populate diff --git a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp index 3ebf0fda48088..1774c90cbe195 100644 --- a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp +++ b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp @@ -41,6 +41,10 @@ using DeviceImage = pi_device_binary_struct; // be attempted to de-allocate. struct ImageDeleter; +enum DeviceLibExt { + cl_intel_devicelib_assert = 0 +}; + // Provides single loading and building OpenCL programs with unique contexts // that is necessary for no interoperability cases with lambda. class ProgramManager { @@ -70,8 +74,13 @@ class ProgramManager { DeviceImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId, const context &Context); - void build(RT::PiProgram Program, const string_class &Options, - std::vector Devices); + using ProgramPtr = unique_ptr_class, + decltype(&::piProgramRelease)>; + ProgramPtr build(ProgramPtr Program, RT::PiContext Context, + const string_class &Options, + const std::vector &Devices, + std::map &CachedLibPrograms, + bool LinkDeviceLibs = false); /// Provides a new kernel set id for grouping kernel names together KernelSetId getNextKernelSetId() const; /// Returns the kernel set associated with the kernel, handles some special diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index fb14f19ecc772..eac87b89880fa 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -157,6 +157,12 @@ class device { template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); + + template + friend + typename std::add_pointer::type + detail::getRawSyclObjImpl(const T &SyclObject); + template friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); }; diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 7ecd155a3849b..819162e873ad0 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -137,3 +137,4 @@ install(TARGETS ${SYCL_RT_LIBS} ARCHIVE DESTINATION "lib" COMPONENT sycl LIBRARY DESTINATION "lib" COMPONENT sycl RUNTIME DESTINATION "bin" COMPONENT sycl) +add_subdirectory(detail/devicelib) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 457161d1c3608..0b2d26edfc542 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -90,6 +90,10 @@ context_impl::~context_impl() { PI_CALL(piKernelRelease)(KernIt.second); PI_CALL(piProgramRelease)(ToBeDeleted); } + for (auto LibProg : MCachedLibPrograms) { + assert(LibProg.second && "Null program must not be kept in the cache"); + PI_CALL(piProgramRelease)(LibProg.second); + } } const async_handler &context_impl::get_async_handler() const { diff --git a/sycl/source/detail/devicelib/CMakeLists.txt b/sycl/source/detail/devicelib/CMakeLists.txt new file mode 100644 index 0000000000000..273110a783496 --- /dev/null +++ b/sycl/source/detail/devicelib/CMakeLists.txt @@ -0,0 +1,66 @@ +# Place device libraries near the libsycl.so library in a build +# directory +if (WIN32) + set(binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") +else() + set(binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +endif() + +set(clang $) + +set(compile_opts + # suppress an error about SYCL_EXTERNAL + -Wno-sycl-strict + # for CL/__spirv/spirv_vars.hpp + -I${sycl_inc_dir}) + +if (WIN32) + set(devicelib-obj-file ${binary_dir}/libsycl-msvc.o) + add_custom_command(OUTPUT ${devicelib-obj-file} + COMMAND ${clang} -fsycl -c + ${compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/msvc_wrapper.cpp + -o ${devicelib-obj-file} + MAIN_DEPENDENCY msvc_wrapper.cpp + DEPENDS wrapper.h clang + VERBATIM) +else() + set(devicelib-obj-file ${binary_dir}/libsycl-glibc.o) + add_custom_command(OUTPUT ${devicelib-obj-file} + COMMAND ${clang} -fsycl -c + ${compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/glibc_wrapper.cpp + -o ${devicelib-obj-file} + MAIN_DEPENDENCY glibc_wrapper.cpp + DEPENDS wrapper.h clang + VERBATIM) +endif() + +add_custom_command(OUTPUT ${binary_dir}/libsycl-fallback-cassert.spv + COMMAND ${clang} -S -fsycl-device-only -fno-sycl-use-bitcode + ${compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/fallback-cassert.cpp + -o ${binary_dir}/libsycl-fallback-cassert.spv + MAIN_DEPENDENCY fallback-cassert.cpp + DEPENDS wrapper.h clang llvm-spirv + VERBATIM) + +add_custom_target(devicelib-obj DEPENDS ${devicelib-obj-file}) +add_custom_target(devicelib-spv DEPENDS ${binary_dir}/libsycl-fallback-cassert.spv) +add_dependencies(sycl devicelib-obj devicelib-spv) +if (MSVC) + add_dependencies(sycld devicelib-obj devicelib-spv) +endif() + +# Place device libraries near the libsycl.so library in an install +# directory as well +if (WIN32) + set(install_dest bin) +else() + set(install_dest lib) +endif() + +install(FILES ${devicelib-obj-file} + ${binary_dir}/libsycl-fallback-cassert.spv + DESTINATION ${install_dest} + COMPONENT sycl) diff --git a/sycl/source/detail/devicelib/fallback-cassert.cpp b/sycl/source/detail/devicelib/fallback-cassert.cpp new file mode 100644 index 0000000000000..d8e9ddd0a079d --- /dev/null +++ b/sycl/source/detail/devicelib/fallback-cassert.cpp @@ -0,0 +1,42 @@ +//==--- fallback-cassert.cpp - device agnostic implementation of C assert --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "wrapper.h" + +// __attribute((format(...))) enables compiler checks for a format string. +int __spirv_ocl_printf(const __attribute__((opencl_constant)) char *fmt, ...) + __attribute__((format(printf, 1, 2))); + +static const __attribute__((opencl_constant)) char assert_fmt[] = + "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " + "Assertion `%s` failed.\n"; + +SYCL_EXTERNAL +extern "C" void __devicelib_assert_fail( + const char *expr, const char *file, + int32_t line, const char *func, + uint64_t gid0, uint64_t gid1, uint64_t gid2, + uint64_t lid0, uint64_t lid1, uint64_t lid2) { + // intX_t types are used instead of `int' and `long' because the format string + // is defined in terms of *device* types (OpenCL types): %d matches a 32 bit + // integer, %lu matches a 64 bit unsigned integer. Host `int' and + // `long' types may be different, so we cannot use them. + __spirv_ocl_printf( + assert_fmt, + file, (int32_t)line, + // WORKAROUND: IGC does not handle this well + // (func) ? func : "", + func, + gid0, gid1, gid2, + lid0, lid1, lid2, + expr); + + // FIXME: call SPIR-V unreachable instead + // volatile int *die = (int *)0x0; + // *die = 0xdead; +} diff --git a/sycl/source/detail/devicelib/glibc_wrapper.cpp b/sycl/source/detail/devicelib/glibc_wrapper.cpp new file mode 100644 index 0000000000000..403a90cdda378 --- /dev/null +++ b/sycl/source/detail/devicelib/glibc_wrapper.cpp @@ -0,0 +1,26 @@ +//==--- glibc_wrapper.cpp - wrappers for Glibc internal functions ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __SYCL_DEVICE_ONLY__ +#include "wrapper.h" + +#include // for __spirv_BuiltInGlobalInvocationId, + // __spirv_BuiltInLocalInvocationId + +extern "C" SYCL_EXTERNAL +void __assert_fail(const char *expr, const char *file, + unsigned int line, const char *func) { + __devicelib_assert_fail(expr, file, line, func, + __spirv_BuiltInGlobalInvocationId.x, + __spirv_BuiltInGlobalInvocationId.y, + __spirv_BuiltInGlobalInvocationId.z, + __spirv_BuiltInLocalInvocationId.x, + __spirv_BuiltInLocalInvocationId.y, + __spirv_BuiltInLocalInvocationId.z); +} +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/source/detail/devicelib/msvc_wrapper.cpp b/sycl/source/detail/devicelib/msvc_wrapper.cpp new file mode 100644 index 0000000000000..21b430c3ad81e --- /dev/null +++ b/sycl/source/detail/devicelib/msvc_wrapper.cpp @@ -0,0 +1,45 @@ +//==--- msvc_wrapper.cpp - wrappers for Microsoft C library functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __SYCL_DEVICE_ONLY__ +#include "wrapper.h" + +#include // for __spirv_BuiltInGlobalInvocationId, + // __spirv_BuiltInLocalInvocationId + +// Truncates a wide (16 or 32 bit) string (wstr) into an ASCII string (str). +// Any non-ASCII characters are replaced by question mark '?'. +static void __truncate_wchar_char_str(const wchar_t *wstr, char *str, + size_t str_size) { + str_size -= 1; // reserve for NULL terminator + while (str_size > 0 && *wstr != L'\0') { + wchar_t w = *wstr++; + *str++ = (w > 0 && w < 127) ? (char)w : '?'; + str_size--; + } + *str = '\0'; +} + +extern "C" SYCL_EXTERNAL +void _wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line) { + // Paths and expressions that are longer than 256 characters are going to be + // truncated. + char file[256]; + __truncate_wchar_char_str(wfile, file, sizeof(file)); + char expr[256]; + __truncate_wchar_char_str(wexpr, expr, sizeof(expr)); + + __devicelib_assert_fail(expr, file, line, /*func=*/nullptr, + __spirv_BuiltInGlobalInvocationId.x, + __spirv_BuiltInGlobalInvocationId.y, + __spirv_BuiltInGlobalInvocationId.z, + __spirv_BuiltInLocalInvocationId.x, + __spirv_BuiltInLocalInvocationId.y, + __spirv_BuiltInLocalInvocationId.z); +} +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/source/detail/devicelib/wrapper.h b/sycl/source/detail/devicelib/wrapper.h new file mode 100644 index 0000000000000..b541cb9880c6c --- /dev/null +++ b/sycl/source/detail/devicelib/wrapper.h @@ -0,0 +1,22 @@ +//==--- wrapper.h - declarations for devicelib functions -----*- C++ -*-----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __SYCL_WRAPPERS_H__ +#define __SYCL_WRAPPERS_H__ + +#include +#include + +SYCL_EXTERNAL +extern "C" void __devicelib_assert_fail( + const char *expr, const char *file, + int32_t line, const char *func, + uint64_t gid0, uint64_t gid1, uint64_t gid2, + uint64_t lid0, uint64_t lid1, uint64_t lid2); + +#endif // __SYCL_WRAPPERS_H__ diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ba9fb59bbed9a..4b890e68a2c0c 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -176,15 +177,27 @@ ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, const DeviceImage &Img = getDeviceImage(M, KSId, Context); RT::PiProgram Prg = createPIProgram(Img, Context); - using PiProgramT = remove_pointer_t; - unique_ptr_class ProgramManaged( + ProgramPtr ProgramManaged( Prg, RT::PluginInformation.PiFunctionTable.piProgramRelease); - build(ProgramManaged.get(), Img.BuildOptions, {}); - RT::PiProgram Program = ProgramManaged.release(); - CachedPrograms[KSId] = Program; - - return Program; + // Link a fallback implementation of device libraries if they are not + // supported by a device compiler. + // Pre-compiled programs are supposed to be already linked. + const bool LinkDeviceLibs = getFormat(Img) == PI_DEVICE_BINARY_TYPE_SPIRV; + + context_impl *ContextImpl = getRawSyclObjImpl(Context); + RT::PiContext PiContext = ContextImpl->getHandleRef(); + const std::vector &Devices = ContextImpl->getDevices(); + std::vector PiDevices(Devices.size()); + std::transform( + Devices.begin(), Devices.end(), PiDevices.begin(), + [](const device Dev) { return getRawSyclObjImpl(Dev)->getHandleRef(); }); + + ProgramPtr BuiltProgram = + build(std::move(ProgramManaged), PiContext, Img.BuildOptions, PiDevices, + ContextImpl->getCachedLibPrograms(), LinkDeviceLibs); + CachedPrograms[KSId] = BuiltProgram.get(); + return BuiltProgram.release(); } RT::PiKernel ProgramManager::getOrCreateKernel(OSModuleHandle M, @@ -240,6 +253,83 @@ string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program) { return Log; } +static bool loadDeviceLib(const RT::PiContext &Context, const char *Name, + RT::PiProgram &Prog) { + std::string LibSyclDir = OSUtil::getCurrentDSODir(); + std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, + std::ifstream::in | std::ifstream::binary); + if (!File.good()) { + return false; + } + + File.seekg(0, std::ios::end); + size_t FileSize = File.tellg(); + File.seekg(0, std::ios::beg); + std::vector FileContent(FileSize); + File.read(&FileContent[0], FileSize); + File.close(); + + Prog = + createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize); + return Prog != nullptr; +} + +static const char* getDeviceLibFilename(DeviceLibExt Extension) { + switch (Extension) { + case cl_intel_devicelib_assert: + return "libsycl-fallback-cassert.spv"; + } + throw compile_program_error("Unhandled (new?) device library extension"); +} + +static const char* getDeviceLibExtensionStr(DeviceLibExt Extension) { + switch (Extension) { + case cl_intel_devicelib_assert: + return "cl_intel_devicelib_assert"; + } + throw compile_program_error("Unhandled (new?) device library extension"); +} + +static RT::PiProgram +loadDeviceLibFallback( + const RT::PiContext &Context, + DeviceLibExt Extension, + const std::vector &Devices, + std::map &CachedLibPrograms) { + + const char *LibFileName = getDeviceLibFilename(Extension); + std::map::iterator LibProgIt; + bool NotExists = false; + std::tie(LibProgIt, NotExists) = + CachedLibPrograms.insert({Extension, nullptr}); + RT::PiProgram &LibProg = LibProgIt->second; + + if (!NotExists) { + return LibProg; + } + + if (!loadDeviceLib(Context, LibFileName, LibProg)) { + CachedLibPrograms.erase(LibProgIt); + throw compile_program_error(std::string("Failed to load ") + LibFileName); + } + + RT::PiResult Error = PI_CALL_NOCHECK(piProgramCompile)( + LibProg, + // Assume that Devices contains all devices from Context. + Devices.size(), Devices.data(), + // Do not use compile options for library programs: it is not clear + // if user options (image options) are supposed to be applied to + // library program as well, and what actually happens to a SPIR-V + // program if we apply them. + "", 0, nullptr, nullptr, nullptr, nullptr); + if (Error != PI_SUCCESS) { + CachedLibPrograms.erase(LibProgIt); + throw compile_program_error(ProgramManager::getProgramBuildLog(LibProg)); + } + + return LibProg; +} + struct ImageDeleter { void operator()(DeviceImage *I) { delete[] I->BinaryStart; @@ -332,12 +422,62 @@ DeviceImage &ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, return *Img; } -void ProgramManager::build(RT::PiProgram Program, const string_class &Options, - std::vector Devices) { +static std::vector getDeviceLibPrograms( + const RT::PiContext Context, + const std::vector &Devices, + std::map &CachedLibPrograms) { + + std::vector Programs; + + // TODO: SYCL compiler should generate a list of required extensions for a + // particular program in order to allow us do a more fine-grained check here. + // Require *all* possible devicelib extensions for now. + std::pair RequiredDeviceLibExt[] = { + {cl_intel_devicelib_assert, /* is fallback loaded? */ false} + }; + + // Load a fallback library for an extension if at least one device does not + // support it. + for (RT::PiDevice Dev : Devices) { + std::string DevExtList = + get_device_info::get(Dev); + for (auto &Pair : RequiredDeviceLibExt) { + DeviceLibExt Ext = Pair.first; + bool &FallbackIsLoaded = Pair.second; + + if (FallbackIsLoaded) { + continue; + } + + const char* ExtStr = getDeviceLibExtensionStr(Ext); + + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; + } + + bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); + + if (!DeviceSupports || InhibitNativeImpl) { + Programs.push_back( + loadDeviceLibFallback(Context, Ext, Devices, CachedLibPrograms)); + FallbackIsLoaded = true; + } + } + } + return Programs; +} + +ProgramManager::ProgramPtr +ProgramManager::build(ProgramPtr Program, RT::PiContext Context, + const string_class &Options, + const std::vector &Devices, + std::map &CachedLibPrograms, + bool LinkDeviceLibs) { if (DbgProgMgr > 0) { - std::cerr << ">>> ProgramManager::build(" << Program << ", " << Options - << ", ... " << Devices.size() << ")\n"; + std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " + << Options << ", ... " << Devices.size() << ")\n"; } const char *Opts = std::getenv("SYCL_PROGRAM_BUILD_OPTIONS"); @@ -351,11 +491,42 @@ void ProgramManager::build(RT::PiProgram Program, const string_class &Options, if (!Opts) Opts = Options.c_str(); - if (PI_CALL_NOCHECK(piProgramBuild)(Program, Devices.size(), Devices.data(), - Opts, nullptr, nullptr) == PI_SUCCESS) - return; - throw compile_program_error(getProgramBuildLog(Program)); + std::vector LinkPrograms; + if (LinkDeviceLibs) { + LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms); + } + + if (LinkPrograms.empty()) { + RT::PiResult Error = PI_CALL_NOCHECK(piProgramBuild)( + Program.get(), Devices.size(), Devices.data(), Opts, nullptr, nullptr); + if (Error != PI_SUCCESS) + throw compile_program_error(getProgramBuildLog(Program.get())); + return Program; + } + + // Include the main program and compile/link everything together + PI_CALL(piProgramCompile)(Program.get(), Devices.size(), Devices.data(), Opts, + 0, nullptr, nullptr, nullptr, nullptr); + LinkPrograms.push_back(Program.get()); + + RT::PiProgram LinkedProg = nullptr; + RT::PiResult Error = PI_CALL_NOCHECK(piProgramLink)( + Context, Devices.size(), Devices.data(), Opts, LinkPrograms.size(), + LinkPrograms.data(), nullptr, nullptr, &LinkedProg); + + // Link program call returns a new program object if all parameters are valid, + // or NULL otherwise. Release the original (user) program. + Program.reset(LinkedProg); + if (Error != PI_SUCCESS) { + if (LinkedProg) { + // A non-trivial error occurred during linkage: get a build log, release + // an incomplete (but valid) LinkedProg, and throw. + throw compile_program_error(getProgramBuildLog(LinkedProg)); + } + pi::checkPiResult(Error); + } + return Program; } void ProgramManager::addImages(pi_device_binaries DeviceBinary) { diff --git a/sycl/test/devicelib/assert-windows.cpp b/sycl/test/devicelib/assert-windows.cpp new file mode 100644 index 0000000000000..c65a8bd6a3f45 --- /dev/null +++ b/sycl/test/devicelib/assert-windows.cpp @@ -0,0 +1,73 @@ +// REQUIRES: cpu,windows +// +// FIXME: OpenCL CPU backend compiler crashes on a call to _wassert. +// Disable the test until the fix reaches SYCL test infrastructure. +// XFAIL: * +// +// RUN: %clangxx -fsycl -c %s -o %t.o +// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/../bin/libsycl-msvc.o -o %t.out +// +// MSVC implementation of assert does not call an unreachable built-in, so the +// program doesn't terminate when fallback is used. +// +// FIXME: SPIR-V Unreachable should be called from the fallback +// explicitly. Since the test is going to crash, we'll have to follow a similar +// approach as on Linux - call the test in a subprocess. +// +// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=1 SYCL_DEVICE_TYPE=CPU %t.out >%t.stdout.fallback 2>%t.stderr.fallback +// RUN: FileCheck %s --check-prefix=CHECK-MESSAGE --input-file %t.stdout.fallback +// CHECK-MESSAGE: {{.*}}assert-windows.cpp:{{[0-9]+}}: (null): global id: [{{[0-3]}},0,0], local id: [{{[0-3]}},0,0] Assertion `accessorC[wiID] == 0 && "Invalid value"` failed. +// +// RUN: FileCheck %s --input-file %t.stdout.fallback --check-prefix=CHECK-FALLBACK +// CHECK-FALLBACK: ---> piProgramLink + +#include +#include +#include + +using namespace cl::sycl; + +constexpr auto sycl_read = cl::sycl::access::mode::read; +constexpr auto sycl_write = cl::sycl::access::mode::write; + +template +void simple_vadd(const std::array &VA, const std::array &VB, + std::array &VC) { + queue deviceQueue([](cl::sycl::exception_list ExceptionList) { + for (cl::sycl::exception_ptr_class ExceptionPtr : ExceptionList) { + try { + std::rethrow_exception(ExceptionPtr); + } catch (cl::sycl::exception &E) { + std::cerr << E.what() << std::endl; + } catch (...) { + std::cerr << "Unknown async exception was caught." << std::endl; + } + } + }); + + cl::sycl::range<1> numOfItems{N}; + cl::sycl::buffer bufferA(VA.data(), numOfItems); + cl::sycl::buffer bufferB(VB.data(), numOfItems); + cl::sycl::buffer bufferC(VC.data(), numOfItems); + + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto accessorA = bufferA.template get_access(cgh); + auto accessorB = bufferB.template get_access(cgh); + auto accessorC = bufferC.template get_access(cgh); + + cgh.parallel_for(numOfItems, [=](cl::sycl::id<1> wiID) { + accessorC[wiID] = accessorA[wiID] + accessorB[wiID]; + assert(accessorC[wiID] == 0 && "Invalid value"); + }); + }); + deviceQueue.wait_and_throw(); +} + +int main() { + std::array A = {1, 2, 3}; + std::array B = {1, 2, 3}; + std::array C = {0, 0, 0}; + + simple_vadd(A, B, C); + return EXIT_SUCCESS; +} diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp new file mode 100644 index 0000000000000..d0f18fe8cb544 --- /dev/null +++ b/sycl/test/devicelib/assert.cpp @@ -0,0 +1,200 @@ +// REQUIRES: cpu,linux +// RUN: %clangxx -fsycl -c %s -o %t.o +// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-glibc.o -o %t.out +// (see the other RUN lines below; it is a bit complicated) +// +// assert() call in device code guarantees nothing: on some devices it behaves +// in the usual way and terminates the program. On other devices it can print an +// error message and *continue* execution. Less capable devices can even ignore +// an assert! +// +// This makes testing an assert() a bit difficult task, and we have to rely on +// the implementation details to make sure that both "native" and "fallback" +// implementations work as expected. +// +// This test works only on Intel OpenCL CPU implementation, which is known to +// behave as follows: +// +// Fallback mode (aka the best we can do by following the OpenCL spec): +// 1. Assertion condition is printed to *stdout* by the OpenCL printf(). +// 2. Process (both host and device) is terminated by a SIGSEGV. +// +// Native mode (same behavior as libc assert on CPU): +// 1. Assertion condition is printed to *stderr*. +// 2. Process (both host and device) is terminated by a SIGABRT. +// +// Other devices are "covered" by the assert-dummy.cpp test, which doesn't +// verify anything except a successful compilation for a device. +// +// FIXME: assert-dummy.cpp is not implemented yet, so other devices are not +// covered. +// +// How the test works: +// ------------------- +// +// 1. First we verify that a call sequence in SYCL Runtime is correct: +// +// - in the fallback mode we have to link an additional library that +// provides a generic implementation of assert(). +// +// - in the native mode we don't link anything, and call clBuildProgram for +// a user program alone. +// +// 2. Then we test that there is actually a difference between the two +// modes. Since the CPU device is the only device that supports this +// extension natively, we catch the difference between the fallback and the +// native modes: SIGSEGV should occur in the fallback mode, SIGABRT in the +// native mode. +// +// In order to check the signal we fork() and let the child die. Then we +// verify how it was terminated. EXPECTED_SIGNAL environment variable +// controls the expected result. +// +// 3. We also test that a message is printed to the corresponding fd: stdout +// for the fallback mode and stderr for the native mode. In the fallback +// mode the test process dies right after a call to the OpenCL printf(), so +// the message can still be buffered by stdio. We turn the bufferization +// off explicitly. +// +// SYCL_DEVICELIB_INHIBIT_NATIVE=1 environment variable is used to force a mode +// in SYCL Runtime, so it doesn't look into a device extensions list and always +// link the fallback library. +// +// NOTE that Intel OpenCL CPU Vectorizer crashes when an `unreachable' +// instruction is found in IR. Workaround it for now using +// CL_CONFIG_USE_VECTORIZER=False environment variable. +// +// We also skip the native test entirely (see SKIP_IF_NO_EXT), since the assert +// extension is a new feature and may not be supported by the runtime used with +// SYCL. +// +// Overall this sounds stable enough. What could possibly go wrong? +// +// RUN: env SYCL_PI_TRACE=1 CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGABRT SKIP_IF_NO_EXT=1 %t.out 2>%t.stderr.native >%t.stdout.native +// RUN: FileCheck %s --input-file %t.stdout.native --check-prefixes=CHECK-NATIVE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED +// RUN: FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED +// +// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.out 2>%t.stderr.fallback >%t.stdout.fallback +// RUN: FileCheck %s --input-file %t.stdout.fallback --check-prefixes=CHECK-FALLBACK,CHECK-MESSAGE +// +// CHECK-NATIVE: ---> piProgramBuild +// CHECK-FALLBACK: ---> piProgramLink +// +// Skip the test if the CPU RT doesn't support the extension yet: +// CHECK-NOTSUPPORTED: Device has no support for cl_intel_devicelib_assert +// +// Anyway, the same message has to be printed for both the fallback and the +// native modes (fallback prints to stdout, while native prints to stderr; we +// already handled this difference in the RUN lines): +// +// CHECK-MESSAGE: {{.*}}assert.cpp:{{[0-9]+}}: auto simple_vadd(const std::array &, const std::array &, std::array &)::(anonymous class)::operator()(cl::sycl::handler &)::(anonymous class)::operator()(cl::sycl::id<1>) const: global id: [{{[0-3]}},0,0], local id: [{{[0-3]}},0,0] Assertion `accessorC[wiID] == 0 && "Invalid value"` failed. +// +// Note that the work-item that hits the assert first may vary, since the order +// of execution is undefined. We catch only the first one (whatever id it is). + +#include +#include +#include +#include +#include +#include +#include + +using namespace cl::sycl; + +constexpr auto sycl_read = cl::sycl::access::mode::read; +constexpr auto sycl_write = cl::sycl::access::mode::write; + +const int EXIT_SKIP_TEST = 42; + +template +void simple_vadd(const std::array &VA, const std::array &VB, + std::array &VC) { + queue deviceQueue([](cl::sycl::exception_list ExceptionList) { + for (cl::sycl::exception_ptr_class ExceptionPtr : ExceptionList) { + try { + std::rethrow_exception(ExceptionPtr); + } catch (cl::sycl::exception &E) { + std::cerr << E.what() << std::endl; + } catch (...) { + std::cerr << "Unknown async exception was caught." << std::endl; + } + } + }); + device dev = deviceQueue.get_device(); + bool unsupported = true; + for (auto &ext : dev.get_info()) { + if (ext == "cl_intel_devicelib_assert") { + unsupported = false; + } + } + if (unsupported && getenv("SKIP_IF_NO_EXT")) { + fprintf(stderr, "Device has no support for cl_intel_devicelib_assert, " + "skipping the test\n"); + exit(EXIT_SKIP_TEST); + } + + cl::sycl::range<1> numOfItems{N}; + cl::sycl::buffer bufferA(VA.data(), numOfItems); + cl::sycl::buffer bufferB(VB.data(), numOfItems); + cl::sycl::buffer bufferC(VC.data(), numOfItems); + + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto accessorA = bufferA.template get_access(cgh); + auto accessorB = bufferB.template get_access(cgh); + auto accessorC = bufferC.template get_access(cgh); + + cgh.parallel_for(numOfItems, [=](cl::sycl::id<1> wiID) { + accessorC[wiID] = accessorA[wiID] + accessorB[wiID]; + assert(accessorC[wiID] == 0 && "Invalid value"); + }); + }); + deviceQueue.wait_and_throw(); +} + +int main() { + int child = fork(); + if (child) { + int status = 0; + waitpid(child, &status, 0); + if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SKIP_TEST) { + return 0; + } + if (!WIFSIGNALED(status)) { + fprintf(stderr, "error: process did not terminate by a signal\n"); + return 1; + } + int sig = WTERMSIG(status); + int expected = 0; + if (const char *env = getenv("EXPECTED_SIGNAL")) { + if (0 == strcmp(env, "SIGABRT")) { + expected = SIGABRT; + } else if (0 == strcmp(env, "SIGSEGV")) { + expected = SIGSEGV; + } + if (!expected) { + fprintf(stderr, "EXPECTED_SIGNAL should be set to either \"SIGABRT\", " + "or \"SIGSEGV\"!\n"); + return 1; + } + } + if (sig != expected) { + fprintf(stderr, "error: expected signal %d, got %d\n", expected, sig); + return 1; + } + return 0; + } + + // Turn the bufferization off to not loose the assert message if it is written + // to stdout. + if (setvbuf(stdout, NULL, _IONBF, 0)) { + perror("failed to turn off bufferization on stdout"); + return 1; + } + + std::array A = {1, 2, 3}; + std::array B = {1, 2, 3}; + std::array C = {0, 0, 0}; + + simple_vadd(A, B, C); +} diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index fdf57998eabc5..1d2d32e38df96 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -35,6 +35,7 @@ config.test_exec_root = os.path.join(config.sycl_obj_root, 'test') if platform.system() == "Linux": + config.available_features.add('linux') # Propagate 'LD_LIBRARY_PATH' through the environment. if 'LD_LIBRARY_PATH' in os.environ: config.environment['LD_LIBRARY_PATH'] = os.path.pathsep.join((config.environment['LD_LIBRARY_PATH'], config.llvm_build_libs_dir))