From 7d1e969ba6dad2475e342068ccda2679379c3dd4 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 2 Apr 2019 16:07:45 +0300 Subject: [PATCH 01/11] [SYCL] Fix return type of relational built-in functions. Added a wrapper of boolean vector. Used the wrapper in the implementation of relational built-in functions. Fixed isnormal for scalar double for the host side. Fixed select for genfloatd and genfloath. Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 179 +++++++-------- sycl/include/CL/sycl/detail/boolean.hpp | 144 ++++++++++++ sycl/include/CL/sycl/detail/builtins.hpp | 74 +------ .../CL/sycl/detail/generic_type_traits.hpp | 189 +++++++++++++++- sycl/source/detail/builtins.cpp | 154 ++++++------- sycl/test/basic_tests/boolean.cpp | 150 +++++++++++++ sycl/test/built-ins/scalar_relational.cpp | 206 ++++++++++++------ sycl/test/built-ins/vector_relational.cpp | 172 +++++++++++---- 8 files changed, 905 insertions(+), 363 deletions(-) create mode 100644 sycl/include/CL/sycl/detail/boolean.hpp create mode 100644 sycl/test/basic_tests/boolean.cpp diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index aa86d8d137320..87259b42ae126 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -1221,7 +1222,6 @@ fast_normalize(T p) __NOEXC { } /* --------------- 4.13.7 Relational functions. Device version --------------*/ -// // int isequal (half x, half y) // shortn isequal (halfn x, halfn y) // igeninteger32bit isequal (genfloatf x, genfloatf y) @@ -1229,10 +1229,9 @@ fast_normalize(T p) __NOEXC { // longn isequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isequal(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdEqual>(x, y)); } // int isnotequal (half x, half y) @@ -1242,10 +1241,9 @@ typename detail::float_point_to_sign_integral::type isequal(T x, // longn isnotequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnotequal(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFUnordNotEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFUnordNotEqual>(x, y)); } // int isgreater (half x, half y) @@ -1255,10 +1253,9 @@ typename detail::float_point_to_sign_integral::type isnotequal(T x, // longn isgreater (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isgreater(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdGreaterThan< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdGreaterThan>(x, y)); } // int isgreaterequal (half x, half y) @@ -1268,10 +1265,9 @@ typename detail::float_point_to_sign_integral::type isgreater(T x, // longn isgreaterequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -isgreaterequal(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdGreaterThanEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdGreaterThanEqual>(x, y)); } // int isless (half x, half y) @@ -1281,11 +1277,11 @@ isgreaterequal(T x, T y) __NOEXC { // longn isless (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isless(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdLessThan< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isless(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdLessThan>(x, y)); } + // int islessequal (half x, half y) // shortn islessequal (halfn x, halfn y) // igeninteger32bit islessequal (genfloatf x, genfloatf y) @@ -1293,10 +1289,9 @@ typename detail::float_point_to_sign_integral::type isless(T x, // longn islessequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -islessequal(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdLessThanEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdLessThanEqual>(x, y)); } // int islessgreater (half x, half y) @@ -1306,11 +1301,11 @@ islessequal(T x, T y) __NOEXC { // longn islessgreater (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -islessgreater(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpLessOrGreater< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpLessOrGreater>(x, y)); } + // int isfinite (half x) // shortn isfinite (halfn x) // igeninteger32bit isfinite (genfloatf x) @@ -1318,10 +1313,9 @@ islessgreater(T x, T y) __NOEXC { // longn isfinite (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isfinite(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isfinite, T) - return __sycl_std::__invoke_OpIsFinite< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isfinite(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsFinite>(x)); } // int isinf (half x) @@ -1331,10 +1325,9 @@ typename detail::float_point_to_sign_integral::type isfinite(T x) __NOEXC { // longn isinf (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isinf(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isinf, T) - return __sycl_std::__invoke_OpIsInf< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isinf(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsInf>(x)); } // int isnan (half x) @@ -1344,11 +1337,11 @@ typename detail::float_point_to_sign_integral::type isinf(T x) __NOEXC { // longn isnan (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnan(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isnan, T) - return __sycl_std::__invoke_OpIsNan< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isnan(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsNan>(x)); } + // int isnormal (half x) // shortn isnormal (halfn x) // igeninteger32bit isnormal (genfloatf x) @@ -1356,10 +1349,9 @@ typename detail::float_point_to_sign_integral::type isnan(T x) __NOEXC { // longn isnormal (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnormal(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isnormal, T) - return __sycl_std::__invoke_OpIsNormal< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isnormal(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsNormal>(x)); } // int isordered (half x) @@ -1369,10 +1361,9 @@ typename detail::float_point_to_sign_integral::type isnormal(T x) __NOEXC { // longn isordered (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isordered(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpOrdered< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isordered(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpOrdered>(x, y)); } // int isunordered (half x, half y) @@ -1382,10 +1373,9 @@ typename detail::float_point_to_sign_integral::type isordered(T x, // longn isunordered (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -isunordered(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpUnordered< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpUnordered>(x, y)); } // int signbit (half x) @@ -1395,26 +1385,45 @@ isunordered(T x, T y) __NOEXC { // longn signbit (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type signbit(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(signbit, T) - return __sycl_std::__invoke_OpSignBitSet< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t signbit(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpSignBitSet>(x)); } -// int any (igeninteger x) -template ::value, T>::type> -cl::sycl::cl_int any(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(signbit, T) - return __sycl_std::__invoke_OpAny(x); +// int any (sigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +any(T x) __NOEXC { + return detail::Boolean<1>(cl::sycl::cl_int(detail::msbIsSet(x))); } -// int all (igeninteger x) -template ::value, T>::type> -cl::sycl::cl_int all(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(all, T) - return __sycl_std::__invoke_OpAll(x); +// int any (vigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +any(T x) __NOEXC { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_OpAny>( + detail::rel_sign_bit_test_arg_t(x))); +} + +// int all (sigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +all(T x) __NOEXC { + return detail::Boolean<1>(cl::sycl::cl_int(detail::msbIsSet(x))); +} + +// int all (vigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +all(T x) __NOEXC { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_OpAll>( + detail::rel_sign_bit_test_arg_t(x))); } // gentype bitselect (gentype a, gentype b, gentype c) @@ -1430,8 +1439,7 @@ typename std::enable_if::value && detail::is_igeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // geninteger select (geninteger a, geninteger b, ugeninteger c) @@ -1440,8 +1448,7 @@ typename std::enable_if::value && detail::is_ugeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, genint c) @@ -1449,8 +1456,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_genint::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, ugenint c) @@ -1458,8 +1464,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_ugenint::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) @@ -1468,8 +1473,7 @@ typename std::enable_if::value && detail::is_igeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) @@ -1478,29 +1482,26 @@ typename std::enable_if::value && detail::is_ugeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } #ifndef __HALF_NO_ENABLED -// genfloath select (genfloath a, genfloath b, igeninteger64 c) +// genfloath select (genfloath a, genfloath b, igeninteger16 c) template typename std::enable_if::value && - detail::is_igeninteger64bit::value, + detail::is_igeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -// genfloath select (genfloath a, genfloath b, ugeninteger64 c) +// genfloath select (genfloath a, genfloath b, ugeninteger16 c) template typename std::enable_if::value && - detail::is_ugeninteger64bit::value, + detail::is_ugeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } #endif diff --git a/sycl/include/CL/sycl/detail/boolean.hpp b/sycl/include/CL/sycl/detail/boolean.hpp new file mode 100644 index 0000000000000..295f7358d48fa --- /dev/null +++ b/sycl/include/CL/sycl/detail/boolean.hpp @@ -0,0 +1,144 @@ +//==----------- boolean.hpp - SYCL boolean type ----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +template struct Assigner { + template static void assign(R &r, const T x) { + Assigner::assign(r, x); + r.template swizzle() = x.value[Num]; + } + + template + static void init(R &r, const T x) { + Assigner::template init(r, x); + ET v = x.template swizzle(); + r.value[Num] = msbIsSet(v); + } +}; + +template <> struct Assigner<0> { + template static void assign(R &r, const T x) { + r.template swizzle<0>() = x.value[0]; + } + template + static void init(R &r, const T x) { + ET v = x.template swizzle<0>(); + r.value[0] = msbIsSet(v); + } +}; + +template struct alignas(N == 3 ? 4 : N) Boolean { + static_assert(((N == 2) || (N == 3) || (N == 4) || (N == 8) || (N == 16)), + "Invalid size"); + + using element_type = bool; + +#ifdef __SYCL_DEVICE_ONLY__ + using DataType = + element_type __attribute__((ext_vector_type(N == 3 ? 4 : N))); + using vector_t = DataType; +#else + using DataType = element_type[N == 3 ? 4 : N]; +#endif + + Boolean() : value{false} {} + + Boolean(std::initializer_list l) { + for (size_t I = 0; I < N; ++I) { + value[I] = *(l.begin() + I); + } + } + + Boolean(const Boolean &rhs) { + for (size_t I = 0; I < N; ++I) { + value[I] = rhs.value[I]; + } + } + +#ifdef __SYCL_DEVICE_ONLY__ + // TODO change this to the vectors assignment when the assignment will be + // fixed on Intel GPU NEO OpenCL runtime + Boolean(const vector_t rhs) { + for (size_t I = 0; I < N; ++I) { + value[I] = rhs[I]; + } + } +#endif + + template Boolean(const T rhs) { + static_assert(is_vgeninteger::value, "Invalid constructor"); + Assigner::template init, T, typename T::element_type>( + *this, rhs); + } + +#ifdef __SYCL_DEVICE_ONLY__ + operator vector_t() const { return value; } +#endif + + template operator T() const { + static_assert(is_vgeninteger::value, "Invalid conversion"); + T r; + Assigner::assign(r, *this); + return r * -1; + } + +private: + template friend struct Assigner; + DataType value; +}; + +template <> struct alignas(1) Boolean<1> { + + using element_type = bool; + +#ifdef __SYCL_DEVICE_ONLY__ + using DataType = element_type; + using vector_t = DataType; +#else + using DataType = element_type; +#endif + + Boolean() : value(false) {} + + Boolean(const Boolean &rhs) : value(rhs.value) {} + +#ifdef __SYCL_DEVICE_ONLY__ + Boolean(const vector_t rhs) : value(rhs) {} +#endif + + template Boolean(T val) : value(val) { + static_assert(is_sgeninteger::value, "Invalid constructor"); + } + +#ifdef __SYCL_DEVICE_ONLY__ + operator vector_t() const { return value; } +#endif + + template operator T() const { + static_assert(is_sgeninteger::value, "Invalid conversion"); + return value; + } + +private: + DataType value; +}; + +} // namespace detail +} // namespace sycl +} // namespace cl \ No newline at end of file diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index 69223620110e4..ce873a59b1ba3 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include // TODO Delete this include after solving the problems in the test @@ -18,79 +19,6 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -namespace cl { -namespace sycl { -namespace detail { - -// Try to get pointer_t, otherwise T -template class TryToGetPointerT { - static T check(...); - template static typename A::pointer_t check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get element_type, otherwise T -template class TryToGetElementType { - static T check(...); - template static typename A::element_type check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get vector_t, otherwise T -template class TryToGetVectorT { - static T check(...); - template static typename A::vector_t check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get pointer_t (if pointer_t indicates on the type with vector_t -// creates a pointer type on vector_t), otherwise T -template class TryToGetPointerVecT { - static T check(...); - template - static typename PtrValueType< - typename TryToGetVectorT::type>::type, - A::address_space>::type * - check(const A &); - -public: - using type = decltype(check(T())); -}; - -template ::value, std::true_type>::type> -typename TryToGetPointerVecT::type TryToGetPointer(T &t) { - // TODO find the better way to get the pointer to underlying data from vec - // class - return reinterpret_cast::type>(t.get()); -} - -template ::value, std::false_type>::type> -T TryToGetPointer(T &t) { - return t; -} - -// Converts T to OpenCL friendly -template -using ConvertToOpenCLType = std::conditional< - TryToGetVectorT::value, typename TryToGetVectorT::type, - typename std::conditional::value, - typename TryToGetPointerVecT::type, T>::type>; - -} // namespace detail -} // namespace sycl -} // namespace cl - #define MAKE_CALL_ARG1(call) \ template \ ALWAYS_INLINE \ diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 6b51f4cb454be..a795726db8275 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -367,11 +367,15 @@ template using is_gentype = std::integral_constant::value || is_geninteger::value>; +// forward declarations +template class TryToGetElementType; + // genintegerNbit All types within geninteger whose base type are N bits in // size, where N = 8, 16, 32, 64 template using is_igenintegerNbit = typename std::integral_constant< - bool, is_igeninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_igeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // igeninteger8bit All types within igeninteger whose base type are 8 bits in // size @@ -393,7 +397,8 @@ template using is_igeninteger64bit = is_igenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_ugenintegerNbit = typename std::integral_constant< - bool, is_ugeninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_ugeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // ugeninteger8bit All types within ugeninteger whose base type are 8 bits in // size @@ -415,7 +420,8 @@ template using is_ugeninteger64bit = is_ugenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_genintegerNbit = typename std::integral_constant< - bool, is_geninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_geninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // geninteger8bit All types within geninteger whose base type are 8 bits in size template using is_geninteger8bit = is_genintegerNbit; @@ -707,6 +713,183 @@ template <> struct make_upper { using type = cl::sycl::cl_ulong; }; +// Try to get pointer_t, otherwise T +template class TryToGetPointerT { + static T check(...); + template static typename A::pointer_t check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get element_type, otherwise T +template class TryToGetElementType { + static T check(...); + template static typename A::element_type check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get vector_t, otherwise T +template class TryToGetVectorT { + static T check(...); + template static typename A::vector_t check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get pointer_t (if pointer_t indicates on the type with vector_t +// creates a pointer type on vector_t), otherwise T +template class TryToGetPointerVecT { + static T check(...); + template + static typename PtrValueType< + typename TryToGetVectorT::type>::type, + A::address_space>::type * + check(const A &); + +public: + using type = decltype(check(T())); +}; + +template ::value, std::true_type>::type> +typename TryToGetPointerVecT::type TryToGetPointer(T &t) { + // TODO find the better way to get the pointer to underlying data from vec + // class + return reinterpret_cast::type>(t.get()); +} + +template ::value, std::false_type>::type> +T TryToGetPointer(T &t) { + return t; +} + +// Converts T to OpenCL friendly +template +using ConvertToOpenCLType = std::conditional< + TryToGetVectorT::value, typename TryToGetVectorT::type, + typename std::conditional::value, + typename TryToGetPointerVecT::type, T>::type>; + +// Used for all,any and select relational built-in functions +template inline constexpr T msbMask(T) { + using UT = typename std::make_unsigned::type; + return T(UT(1) << (sizeof(T) * 8 - 1)); +} + +template inline constexpr bool msbIsSet(const T x) { + return (x & msbMask(x)); +} + +template +using common_rel_ret_t = typename detail::float_point_to_sign_integral::type; + +// forward declaration +template struct Boolean; + +// Try to get vector element count or 1 otherwise +template class TryToGetNumElements; + +template +struct TryToGetNumElements< + T, typename std::enable_if::value>::type> { + static constexpr int value = T::get_count(); +}; +template +struct TryToGetNumElements< + T, typename std::enable_if::value>::type> { + static constexpr int value = 1; +}; + +// Used for relational comparison built-in functions +template struct RelationalReturnType { +#ifdef __SYCL_DEVICE_ONLY__ + using type = Boolean::value>; +#else + using type = common_rel_ret_t; +#endif +}; + +// Used for select built-in function +template struct SelectWrapperTypeArgC { +#ifdef __SYCL_DEVICE_ONLY__ + using type = Boolean::value>; +#else + using type = T; +#endif +}; + +template +using select_arg_c_t = typename SelectWrapperTypeArgC::type; + +template using rel_ret_t = typename RelationalReturnType::type; + +// Used for any and all built-in functions +template struct RelationalTestForSignBitType { +#ifdef __SYCL_DEVICE_ONLY__ + using return_type = detail::Boolean<1>; + using argument_type = detail::Boolean::value>; +#else + using return_type = cl::sycl::cl_int; + using argument_type = T; +#endif +}; + +template +using rel_sign_bit_test_ret_t = + typename RelationalTestForSignBitType::return_type; + +template +using rel_sign_bit_test_arg_t = + typename RelationalTestForSignBitType::argument_type; + +template struct RelConverter; + +template +struct RelConverter< + T, typename std::enable_if::value>::type> { + static const int N = T::get_count(); +#ifdef __SYCL_DEVICE_ONLY__ + using bool_t = typename Boolean::vector_t; + using ret_t = common_rel_ret_t; +#else + using bool_t = Boolean; + using ret_t = rel_ret_t; +#endif + + static ret_t apply(bool_t value) { +#ifdef __SYCL_DEVICE_ONLY__ + typename ret_t::vector_t result(0); + for (size_t I = 0; I < N; ++I) { + result[I] = 0 - value[I]; + } + return result; +#else + return value; +#endif + } +}; + +template +struct RelConverter< + T, typename std::enable_if::value>::type> { + using R = rel_ret_t; +#ifdef __SYCL_DEVICE_ONLY__ + using value_t = bool; +#else + using value_t = R; +#endif + + static R apply(value_t value) { return value; } +}; + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 566171a712b0e..fa191ffe7ee84 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include + #include #include #include @@ -177,8 +178,6 @@ namespace s = cl::sycl; __MAKE_SR_1V_OR(Fun, Fun, 8, Ret, Arg1) \ __MAKE_SR_1V_OR(Fun, Fun, 16, Ret, Arg1) -#define MSB_MASK(x) (((decltype(x))1) << ((sizeof(x) * 8) - 1)) - #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ @@ -1833,22 +1832,14 @@ MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_short, s::cl_half) #endif // (OpIsNan) // isnan -cl_int OpIsNan(s::cl_float x) __NOEXC { - return (std::isnan(x)); -} +cl_int OpIsNan(s::cl_float x) __NOEXC { return (std::isnan(x)); } cl_int OpIsNan(s::cl_double x) __NOEXC { return (std::isnan(x)); } -cl_int __vOpIsNan(s::cl_float x) __NOEXC { - return -(std::isnan(x)); -} +cl_int __vOpIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } cl_long __vOpIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsNan(s::cl_half x) __NOEXC { - return (std::isnan(x)); -} -cl_short __vOpIsNan(s::cl_half x) __NOEXC { - return -(std::isnan(x)); -} +cl_int OpIsNan(s::cl_half x) __NOEXC { return (std::isnan(x)); } +cl_short __vOpIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } #endif MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_long, s::cl_double) @@ -1857,21 +1848,13 @@ MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_short, s::cl_half) #endif // (OpIsNormal) // isnormal -cl_int OpIsNormal(s::cl_float x) __NOEXC { - return (std::isnormal(x)); -} -cl_int OpIsNornmal(s::cl_double x) __NOEXC { return (std::isnormal(x)); } -cl_int __vOpIsNormal(s::cl_float x) __NOEXC { - return -(std::isnormal(x)); -} -cl_long __vOpIsNornmal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } +cl_int OpIsNormal(s::cl_float x) __NOEXC { return (std::isnormal(x)); } +cl_int OpIsNormal(s::cl_double x) __NOEXC { return (std::isnormal(x)); } +cl_int __vOpIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } +cl_long __vOpIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsNormal(s::cl_half x) __NOEXC { - return (std::isnormal(x)); -} -cl_short __vOpIsNormal(s::cl_half x) __NOEXC { - return -(std::isnormal(x)); -} +cl_int OpIsNormal(s::cl_half x) __NOEXC { return (std::isnormal(x)); } +cl_short __vOpIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } #endif MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_long, s::cl_double) @@ -1881,16 +1864,16 @@ MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_short, s::cl_half) // (OpOrdered) // isordered cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return !(std::isunordered(x,y)); + return !(std::isunordered(x, y)); } cl_int OpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return !(std::isunordered(x,y)); + return !(std::isunordered(x, y)); } cl_int __vOpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return -(!(std::isunordered(x,y))); + return -(!(std::isunordered(x, y))); } cl_long __vOpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return -(!(std::isunordered(x,y))); + return -(!(std::isunordered(x, y))); } #ifndef NO_HALF_ENABLED cl_int OpOrdered(s::cl_half x, s::cl_half y) __NOEXC { @@ -1908,16 +1891,16 @@ MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_short, s::cl_half, s::cl_half) // (OpUnordered) // isunordered cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return std::isunordered(x,y); + return std::isunordered(x, y); } cl_int OpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return std::isunordered(x,y); + return std::isunordered(x, y); } cl_int __vOpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return -(std::isunordered(x,y)); + return -(std::isunordered(x, y)); } cl_long __vOpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return -(std::isunordered(x,y)); + return -(std::isunordered(x, y)); } #ifndef NO_HALF_ENABLED cl_int OpUnordered(s::cl_half x, s::cl_half y) __NOEXC { @@ -1952,15 +1935,7 @@ MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_short, s::cl_half) #endif // (OpAny) // any -template cl_int __OpAny(T x) { - return ((x & MSB_MASK(x)) == MSB_MASK(x)); -} -cl_int OpAny(s::cl_char x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_short x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_int x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_long x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::longlong x) __NOEXC { return __OpAny(x); } - +template cl_int OpAny(T x) { return sycl::detail::msbIsSet(x); } MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_char) MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_short) MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_int) @@ -1968,15 +1943,7 @@ MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_long) MAKE_SR_1V_OR(OpAny, s::cl_int, s::longlong) // (OpAll) // all -template cl_int __OpAll(T x) { - return ((x & MSB_MASK(x)) == MSB_MASK(x)); -} -cl_int OpAll(s::cl_char x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_short x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_int x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_long x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::longlong x) __NOEXC { return __OpAll(x); } - +template cl_int OpAll(T x) { return sycl::detail::msbIsSet(x); } MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_char) MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_short) MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_int) @@ -2057,82 +2024,83 @@ MAKE_SC_1V_2V_3V(bitselect, s::cl_half, s::cl_half, s::cl_half, s::cl_half) // for scalar: result = c ? b : a. // for vector: result[i] = (MSB of c[i] is set)? b[i] : a[i] -template T __OpSelect(T a, T b, T2 c) { +template T2 __OpSelect(T c, T2 b, T2 a) { return (c ? b : a); } -template T __vOpSelect(T a, T b, T2 c) { - return ((c && MSB_MASK(c)) ? b : a); +template T2 __vOpSelect(T c, T2 b, T2 a) { + return sycl::detail::msbIsSet(c) ? b : a; } -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_float, - s::cl_float, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_float, - s::cl_float, s::cl_uint) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::cl_ulong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::ulonglong) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_int, + s::cl_float, s::cl_float) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_uint, + s::cl_float, s::cl_float) + +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_long, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_ulong, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::longlong, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::ulonglong, + s::cl_double, s::cl_double) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, s::cl_char, s::cl_char) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, - s::cl_char, s::cl_uchar) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_uchar, + s::cl_char, s::cl_char) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, - s::cl_uchar, s::cl_char) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_char, + s::cl_uchar, s::cl_uchar) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, s::cl_short, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, - s::cl_short, s::cl_ushort) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_ushort, + s::cl_short, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, - s::cl_ushort, s::cl_short) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_short, + s::cl_ushort, s::cl_ushort) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, - s::cl_uint) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_uint, s::cl_int, + s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, - s::cl_uint, s::cl_int) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_int, + s::cl_uint, s::cl_uint) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, s::cl_long, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, - s::cl_long, s::cl_ulong) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_ulong, + s::cl_long, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, - s::cl_ulong, s::cl_long) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_long, + s::cl_ulong, s::cl_ulong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, s::longlong, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, - s::longlong, s::ulonglong) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::ulonglong, + s::longlong, s::longlong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, - s::ulonglong, s::longlong) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::longlong, + s::ulonglong, s::ulonglong) #ifndef NO_HALF_ENABLED -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_half, - s::cl_half, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_half, - s::cl_half, s::cl_ushort) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_short, + s::cl_half, s::cl_half) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_ushort, + s::cl_half, s::cl_half) #endif /* --------------- 4.13.3 Native Math functions. Host version ---------------*/ diff --git a/sycl/test/basic_tests/boolean.cpp b/sycl/test/basic_tests/boolean.cpp new file mode 100644 index 0000000000000..439a5410ebf39 --- /dev/null +++ b/sycl/test/basic_tests/boolean.cpp @@ -0,0 +1,150 @@ +// RUN: %clang -std=c++11 -fsycl %s -o %t.out -lstdc++ -lOpenCL +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#include + +using namespace cl::sycl; +namespace s = cl::sycl; +namespace d = s::detail; + +d::Boolean<3> foo() { + d::Boolean<3> b3{true, false, true}; + return b3; +} + +int main() { + { + s::cl_long4 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + d::Boolean<4> b4{false, true, false, false}; + AccR[0] = b4; + }); + }); + } + s::cl_long r1 = r.s0(); + s::cl_long r2 = r.s1(); + s::cl_long r3 = r.s2(); + s::cl_long r4 = r.s3(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 + << std::endl; + + assert(r1 == 0); + assert(r2 == -1); + assert(r3 == 0); + assert(r4 == 0); + } + + { + s::cl_short3 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { AccR[0] = foo(); }); + }); + } + s::cl_short r1 = r.s0(); + s::cl_short r2 = r.s1(); + s::cl_short r3 = r.s2(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << std::endl; + + assert(r1 == -1); + assert(r2 == 0); + assert(r3 == -1); + } + + { + s::cl_int r1[6]; + s::cl_int r2[6]; + { + buffer BufR1(r1, range<1>(6)); + buffer BufR2(r2, range<1>(6)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR1 = BufR1.get_access(cgh); + auto AccR2 = BufR2.get_access(cgh); + cgh.single_task([=]() { + AccR1[0] = sizeof(d::Boolean<1>); + AccR1[1] = sizeof(d::Boolean<2>); + AccR1[2] = sizeof(d::Boolean<3>); + AccR1[3] = sizeof(d::Boolean<4>); + AccR1[4] = sizeof(d::Boolean<8>); + AccR1[5] = sizeof(d::Boolean<16>); + + AccR2[0] = alignof(d::Boolean<1>); + AccR2[1] = alignof(d::Boolean<2>); + AccR2[2] = alignof(d::Boolean<3>); + AccR2[3] = alignof(d::Boolean<4>); + AccR2[4] = alignof(d::Boolean<8>); + AccR2[5] = alignof(d::Boolean<16>); + }); + }); + } + + for (size_t I = 0; I < 6; I++) { + std::cout << " r1[" << I << "] " << r1[I]; + } + std::cout << std::endl; + + for (size_t I = 0; I < 6; I++) { + std::cout << " r2[" << I << "] " << r2[I]; + } + std::cout << std::endl; + assert(r1[0] == sizeof(d::Boolean<1>)); + assert(r1[1] == sizeof(d::Boolean<2>)); + assert(r1[2] == sizeof(d::Boolean<3>)); + assert(r1[3] == sizeof(d::Boolean<4>)); + assert(r1[4] == sizeof(d::Boolean<8>)); + assert(r1[5] == sizeof(d::Boolean<16>)); + + assert(r2[0] == alignof(d::Boolean<1>)); + assert(r2[1] == alignof(d::Boolean<2>)); + assert(r2[2] == alignof(d::Boolean<3>)); + assert(r2[3] == alignof(d::Boolean<4>)); + assert(r2[4] == alignof(d::Boolean<8>)); + assert(r2[5] == alignof(d::Boolean<16>)); + } + + { + s::cl_int4 i4 = {1, -2, 0, -3}; + d::Boolean<4> b4(i4); + i4 = b4; + + s::cl_int r1 = i4.s0(); + s::cl_int r2 = i4.s1(); + s::cl_int r3 = i4.s2(); + s::cl_int r4 = i4.s3(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 + << std::endl; + assert(r1 == 0); + assert(r2 == -1); + assert(r3 == 0); + assert(r4 == -1); + } + + { + s::cl_int r1 = d::Boolean<1>(s::cl_int{-1}); + s::cl_int r2 = d::Boolean<1>(s::cl_int{0}); + s::cl_int r3 = d::Boolean<1>(s::cl_int{1}); + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << std::endl; + assert(r1 == 1); + assert(r2 == 0); + assert(r3 == 1); + } + + return 0; +} diff --git a/sycl/test/built-ins/scalar_relational.cpp b/sycl/test/built-ins/scalar_relational.cpp index e61b7ef42a2ff..976d29c753388 100644 --- a/sycl/test/built-ins/scalar_relational.cpp +++ b/sycl/test/built-ins/scalar_relational.cpp @@ -27,7 +27,7 @@ int main() { }); }); } - std::cout << "garima isequal r \t" << r << std::endl; + std::cout << "isequal r \t" << r << std::endl; assert(r == 1); } @@ -139,7 +139,7 @@ int main() { assert(r == 0); } - // isfinite-float : host only + // isfinite-float { cl::sycl::cl_int r{1}; { @@ -147,20 +147,15 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 0; -#else - AccR[0] = cl::sycl::isfinite(cl::sycl::cl_float{NAN}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::isfinite(cl::sycl::cl_float{NAN}); }); }); } std::cout << "isfinite r \t" << r << std::endl; assert(r == 0); } - // isinf-float : host only + // isinf-float { cl::sycl::cl_int r{0}; { @@ -168,20 +163,15 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::isinf(cl::sycl::cl_float{INFINITY}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::isinf(cl::sycl::cl_float{INFINITY}); }); }); } std::cout << "isinf r \t" << r << std::endl; assert(r == 1); } - // isnan-float : host only + // isnan-float { cl::sycl::cl_int r{0}; { @@ -189,20 +179,15 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::isnan(cl::sycl::cl_float{NAN}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::isnan(cl::sycl::cl_float{NAN}); }); }); } std::cout << "isnan r \t" << r << std::endl; assert(r == 1); } - // isnormal-float : host only + // isnormal-float { cl::sycl::cl_int r{1}; { @@ -211,11 +196,24 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 0; -#else AccR[0] = cl::sycl::isnormal(cl::sycl::cl_float{INFINITY}); -#endif + }); + }); + } + std::cout << "isnormal r \t" << r << std::endl; + assert(r == 0); + } + + // isnormal-double + { + cl::sycl::cl_int r{1}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::isnormal(cl::sycl::cl_double{INFINITY}); }); }); } @@ -259,7 +257,7 @@ int main() { assert(r == 1); } - // signbit-float : host only + // signbit-float { cl::sycl::cl_int r{0}; { @@ -267,20 +265,15 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::signbit(cl::sycl::cl_float{-12.0f}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::signbit(cl::sycl::cl_float{-12.0f}); }); }); } std::cout << "signbit r \t" << r << std::endl; assert(r == 1); } - // any-integer : host only + // any-integer { cl::sycl::cl_int r{0}; { @@ -288,20 +281,46 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::any(cl::sycl::cl_int{-12}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{12}); }); + }); + } + std::cout << "any + r \t" << r << std::endl; + assert(r == 0); + } + // any-integer + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{0}); }); + }); + } + std::cout << "any 0 r \t" << r << std::endl; + assert(r == 0); + } + + // any-integer + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{-12}); }); }); } - std::cout << "any r \t" << r << std::endl; + std::cout << "any - r \t" << r << std::endl; assert(r == 1); } - // all-integer : host only + // all-integer { cl::sycl::cl_int r{0}; { @@ -309,16 +328,43 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::all(cl::sycl::cl_int{-12}); -#endif - }); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{12}); }); + }); + } + std::cout << "all + r \t" << r << std::endl; + assert(r == 0); + } + + // all-integer + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{0}); }); + }); + } + std::cout << "all 0 r \t" << r << std::endl; + assert(r == 0); + } + + // all-integer + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{-12}); }); }); } - std::cout << "all r \t" << r << std::endl; + std::cout << "all - r \t" << r << std::endl; assert(r == 1); } @@ -341,7 +387,7 @@ int main() { assert(r <= 80.5478 && r >= 80.5476); // r = 80.5477 } - // select-float,int : host only + // select-float,int { cl::sycl::cl_float r{0}; { @@ -349,18 +395,52 @@ int main() { queue myQueue; myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 123.123; -#else + cgh.single_task([=]() { AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, cl::sycl::cl_float{123.123}, cl::sycl::cl_int{1}); -#endif }); }); } - std::cout << "select r \t" << r << std::endl; + std::cout << "select + r \t" << r << std::endl; + assert(r <= 123.124 && r >= 123.122); // r = 123.123 + } + + // select-float,int + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, + cl::sycl::cl_float{123.123}, + cl::sycl::cl_int{0}); + }); + }); + } + std::cout << "select 0 r \t" << r << std::endl; + assert(r <= 34.35 && r >= 34.33); // r = 34.34 + } + + // select-float,int + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, + cl::sycl::cl_float{123.123}, + cl::sycl::cl_int{-1}); + }); + }); + } + std::cout << "select - r \t" << r << std::endl; assert(r <= 123.124 && r >= 123.122); // r = 123.123 } diff --git a/sycl/test/built-ins/vector_relational.cpp b/sycl/test/built-ins/vector_relational.cpp index 4e0ac2bc37fdc..e2203df2a8021 100644 --- a/sycl/test/built-ins/vector_relational.cpp +++ b/sycl/test/built-ins/vector_relational.cpp @@ -210,7 +210,7 @@ int main() { // other value except Infinity. } - // isfinite : host only + // isfinite { cl::sycl::cl_int4 r{0}; { @@ -219,12 +219,8 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{-1, -1, 0, 0}; -#else AccR[0] = cl::sycl::isfinite( cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif }); }); } @@ -241,7 +237,7 @@ int main() { assert(r4 == 0); } - // isinf : host only + // isinf { cl::sycl::cl_int4 r{0}; { @@ -250,12 +246,8 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, 0, 0, -1}; -#else AccR[0] = cl::sycl::isinf(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif }); }); } @@ -272,7 +264,7 @@ int main() { assert(r4 == -1); } - // isnan : host only + // isnan { cl::sycl::cl_int4 r{0}; { @@ -281,12 +273,8 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, 0, -1, 0}; -#else AccR[0] = cl::sycl::isnan(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif }); }); } @@ -303,7 +291,7 @@ int main() { assert(r4 == 0); } - // isnormal : host only + // isnormal { cl::sycl::cl_int4 r{0}; { @@ -312,12 +300,8 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{-1, -1, 0, 0}; -#else AccR[0] = cl::sycl::isnormal( cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif }); }); } @@ -390,7 +374,7 @@ int main() { assert(r4 == 0); } - // signbit : host only + // signbit { cl::sycl::cl_int4 r{0}; { @@ -399,12 +383,8 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, -1, 0, 0}; -#else AccR[0] = cl::sycl::signbit( cl::sycl::cl_float4{0.5f, -12.0f, NAN, INFINITY}); -#endif }); }); } @@ -421,7 +401,7 @@ int main() { assert(r4 == 0); } - // any : host only. + // any. // Call to the device function with vector parameters work. Scalars do not. { cl::sycl::cl_int r{0}; @@ -431,11 +411,7 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else AccR[0] = cl::sycl::any(cl::sycl::cl_int4{-12, -12, 0, 1}); -#endif }); }); } @@ -445,7 +421,67 @@ int main() { assert(r1 == 1); } - // all : host only. + // any. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::any(cl::sycl::cl_int4{-12, -12, -12, -12}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "Any - r1 " << r1 << std::endl; + assert(r1 == 1); + } + + // any. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::any(cl::sycl::cl_int4{0, 0, 0, 0}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "Any 0 r1 " << r1 << std::endl; + assert(r1 == 0); + } + + // any. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::any(cl::sycl::cl_int4{12, 12, 12, 12}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "Any + r1 " << r1 << std::endl; + assert(r1 == 0); + } + + // all. // Call to the device function with vector parameters work. Scalars do not. { cl::sycl::cl_int r{0}; @@ -455,14 +491,10 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else AccR[0] = cl::sycl::all(cl::sycl::cl_int4{-12, -12, -12, -12}); // Infinity (positive or negative) or Nan are not integers. // Passing them creates inconsistent results between host and device // execution. -#endif }); }); } @@ -472,6 +504,66 @@ int main() { assert(r1 == 1); } + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::all(cl::sycl::cl_int4{-12, -12, -12, -12}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "All - r1 " << r1 << std::endl; + assert(r1 == 1); + } + + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::all(cl::sycl::cl_int4{0, 0, 0, 0}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "All 0 r1 " << r1 << std::endl; + assert(r1 == 0); + } + + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + cl::sycl::cl_int r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::all(cl::sycl::cl_int4{12, 12, 12, 12}); + }); + }); + } + cl::sycl::cl_int r1 = r; + + std::cout << "All + r1 " << r1 << std::endl; + assert(r1 == 0); + } + // bitselect { cl::sycl::cl_float4 r{0}; @@ -502,7 +594,7 @@ int main() { assert(abs(r4 - 0.0f) < 0.01); } - // select : host only + // select { cl::sycl::cl_float4 r{0}; { @@ -511,16 +603,12 @@ int main() { myQueue.submit([&](handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_float4{112.112f, 112.112f, 112.112f, 112.112f}; -#else AccR[0] = cl::sycl::select( cl::sycl::cl_float4{112.112f, 34.34f, 112.112f, 34.34f}, cl::sycl::cl_float4{34.34f, 112.112f, 34.34f, 112.112f}, - cl::sycl::cl_int4{0, -1, 0, -1}); + cl::sycl::cl_int4{0, -1, 0, 1}); // Using NAN/infinity as an input, which gets // selected by -1, produces a NAN/infinity as expected. -#endif }); }); } @@ -534,7 +622,7 @@ int main() { assert(r1 == 112.112f); assert(r2 == 112.112f); assert(r3 == 112.112f); - assert(r4 == 112.112f); + assert(r4 == 34.34f); } return 0; From 27dcf61cebba45322966e234bc465f204b1efdc8 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 2 Apr 2019 20:14:19 +0300 Subject: [PATCH 02/11] [SYCL] Fix the literal inside the host fract builtin function. Signed-off-by: Alexey Voronov --- sycl/source/detail/builtins.cpp | 60 +++++++++++++++-------------- sycl/test/built-ins/scalar_math.cpp | 5 +-- sycl/test/built-ins/vector_math.cpp | 4 +- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index fa191ffe7ee84..a1ce863ac8920 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -704,22 +704,42 @@ MAKE_1V_2V(fmod, s::cl_double, s::cl_double, s::cl_double) #ifndef NO_HALF_ENABLED MAKE_1V_2V(fmod, s::cl_half, s::cl_half, s::cl_half) #endif -/* fract - disabled until proper C++11 compatible implementation + +// nextafter +cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { + return std::nextafter(x, y); +} +cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { + return std::nextafter(x, y); +} +#ifdef __HAFL_ENABLED +cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { + return std::nextafter(x, y); +} +#endif +MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) +MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) +#ifdef __HAFL_ENABLED +MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) +#endif + +// fract +template +T __fract(T x, T* iptr) { + T f = std::floor(x); + *(iptr) = f; + return std::fmin(x - f, nextafter(T(1.0), T(0.0))); +} + cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } cl_double fract(s::cl_double x, s::cl_double *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } #ifdef __HAFL_ENABLED cl_half fract(s::cl_half x, s::cl_half *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } #endif MAKE_1V_2P(fract, s::cl_float, s::cl_float, s::cl_float) @@ -727,7 +747,7 @@ MAKE_1V_2P(fract, s::cl_double, s::cl_double, s::cl_double) #ifdef __HAFL_ENABLED MAKE_1V_2P(fract, s::cl_half, s::cl_half, s::cl_half) #endif -*/ + // frexp cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); @@ -1006,24 +1026,6 @@ MAKE_1V(nan, s::cl_double, s::ulonglong) MAKE_1V(nan, s::cl_half, s::cl_ushort) #endif -// nextafter -cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { - return std::nextafter(x, y); -} -cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { - return std::nextafter(x, y); -} -#ifdef __HAFL_ENABLED -cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { - return std::nextafter(x, y); -} -#endif -MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) -MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED -MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) -#endif - // pow cl_float pow(s::cl_float x, s::cl_float y) __NOEXC { return std::pow(x, y); } cl_double pow(s::cl_double x, s::cl_double y) __NOEXC { return std::pow(x, y); } diff --git a/sycl/test/built-ins/scalar_math.cpp b/sycl/test/built-ins/scalar_math.cpp index f2ccea71c483b..640e9e6ef5c8f 100644 --- a/sycl/test/built-ins/scalar_math.cpp +++ b/sycl/test/built-ins/scalar_math.cpp @@ -485,9 +485,8 @@ int main() { assert(r == 2.1f); } - // fract // fract with global memory - /*{ + { cl::sycl::cl_float r{0}; cl::sycl::cl_float i{999}; { @@ -532,7 +531,7 @@ int main() { std::cout << "r " << r << " i " << i << std::endl; assert(r == 0.5f); assert(i == 1.0f); - }*/ + } // nan { diff --git a/sycl/test/built-ins/vector_math.cpp b/sycl/test/built-ins/vector_math.cpp index 3b1c9d5b1220c..6c45b6c7309fb 100644 --- a/sycl/test/built-ins/vector_math.cpp +++ b/sycl/test/built-ins/vector_math.cpp @@ -94,7 +94,7 @@ int main() { } // fract with global memory - /*{ + { cl::sycl::cl_float2 r{0, 0}; cl::sycl::cl_float2 i{0, 0}; { @@ -154,7 +154,7 @@ int main() { assert(r2 == 0.5f); assert(i1 == 1.0f); assert(i2 == 2.0f); - }*/ + } return 0; } From 4da72ff8d19b6a0b6029695d8a04f557a37cb6ed Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 2 Apr 2019 19:14:44 +0300 Subject: [PATCH 03/11] [SYCL] Add the host side geometric built-in functions. Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 15 -- .../CL/sycl/detail/generic_type_traits.hpp | 19 ++ sycl/source/detail/builtins.cpp | 190 +++++++++++++++++- sycl/test/built-ins/scalar_geometric.cpp | 101 ++++++++++ sycl/test/built-ins/vector_geometric.cpp | 181 +++++++++++++++++ 5 files changed, 480 insertions(+), 26 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 87259b42ae126..b77c57c492eee 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1099,7 +1099,6 @@ dot(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_float distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } @@ -1107,7 +1106,6 @@ cl::sycl::cl_float distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_double distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } @@ -1116,7 +1114,6 @@ cl::sycl::cl_double distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_half distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } #endif @@ -1125,7 +1122,6 @@ cl::sycl::cl_half distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_float length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } @@ -1133,7 +1129,6 @@ cl::sycl::cl_float length(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_double length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } @@ -1142,7 +1137,6 @@ cl::sycl::cl_double length(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_half length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } #endif @@ -1151,7 +1145,6 @@ cl::sycl::cl_half length(T p) __NOEXC { template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } @@ -1159,7 +1152,6 @@ normalize(T p) __NOEXC { template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } @@ -1168,7 +1160,6 @@ normalize(T p) __NOEXC { template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } #endif @@ -1177,7 +1168,6 @@ normalize(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_float fast_distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_distance, T) return __sycl_std::__invoke_fast_distance(p0, p1); } @@ -1185,7 +1175,6 @@ cl::sycl::cl_float fast_distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_double fast_distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_distance, T) return __sycl_std::__invoke_fast_distance(p0, p1); } @@ -1193,7 +1182,6 @@ cl::sycl::cl_double fast_distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_float fast_length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_length, T) return __sycl_std::__invoke_fast_length(p); } @@ -1201,7 +1189,6 @@ cl::sycl::cl_float fast_length(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_double fast_length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_length, T) return __sycl_std::__invoke_fast_length(p); } @@ -1209,7 +1196,6 @@ cl::sycl::cl_double fast_length(T p) __NOEXC { template typename std::enable_if::value, T>::type fast_normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_normalize, T) return __sycl_std::__invoke_fast_normalize(p); } @@ -1217,7 +1203,6 @@ fast_normalize(T p) __NOEXC { template typename std::enable_if::value, T>::type fast_normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_normalize, T) return __sycl_std::__invoke_fast_normalize(p); } diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index a795726db8275..628d10d7cb6b7 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -136,6 +136,25 @@ using is_vgengeohalf = typename is_contained>::type; #endif +// sgengeo: float, double, half +template +using is_sgengeo = + std::integral_constant>::value>; + +// vgengeo: vgengeofloat, vgengeodouble, vgengeohalf +template +using is_vgengeo = std::integral_constant::value || + is_vgengeodouble::value +#ifndef __HALF_NO_ENABLED + || is_vgengeohalf::value +#endif + >; + // gencrossfloat: float3, float4 template using is_gencrossfloat = diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index a1ce863ac8920..34d7100046d04 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -57,6 +57,15 @@ namespace s = cl::sycl; return r; \ } +#define __MAKE_1V_RS(Fun, Call, N, Ret, Arg1) \ + Ret Fun __NOEXC(Arg1##N x) { \ + Ret r = Ret(); \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_rs( \ + r, [](Ret &r, base1_t x) { return cl::__host_std::Call(r, x); }, x); \ + return r; \ + } + #define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ Ret##N r; \ @@ -185,12 +194,10 @@ namespace s = cl::sycl; __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) -#define MAKE_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ +#define MAKE_GEO_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 16, Ret, Arg1, Arg2) + __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) #define MAKE_1V_2V_3P(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3P(Fun, 2, Ret, Arg1, Arg2, Arg3) \ @@ -227,6 +234,12 @@ template struct helper { op(r, x.template swizzle(), y.template swizzle()); } + template + void run_1v_rs(Res &r, Op op, T1 x) { + helper().run_1v_rs(r, op, x); + op(r, x.template swizzle()); + } + template void run_1v_2p(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2p(r, op, x, y); @@ -283,6 +296,10 @@ template <> struct helper<0> { void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { op(r, x.template swizzle<0>(), y.template swizzle<0>()); } + template + void run_1v_rs(Res &r, Op op, T1 x) { + op(r, x.template swizzle<0>()); + } template void run_1v_2p(Res &r, Op op, T1 x, T2 y) { // TODO avoid creating a temporary variable @@ -1600,12 +1617,149 @@ cl_float OpFMul(s::cl_half p0, s::cl_half p1) { } #endif // OpDot -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_float, s::cl_float, s::cl_float) -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_double, s::cl_double, s::cl_double) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_float, s::cl_float, s::cl_float) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_double, s::cl_double, s::cl_double) +#ifndef NO_HALF_ENABLED +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_half, s::cl_half, s::cl_half) +#endif + +// length +template +typename std::enable_if::value, T>::type +__length(T t) { + return std::sqrt(OpFMul(t, t)); +} + +template +typename std::enable_if::value, + typename T::element_type>::type +__length(T t) { + return std::sqrt(OpDot(t, t)); +} + +cl_float length(s::cl_float p) { return __length(p); } +cl_double length(s::cl_double p) { return __length(p); } +#ifndef NO_HALF_ENABLED +cl_half length(s::cl_half p) { return __length(p); } +#endif +cl_float length(s::cl_float2 p) { return __length(p); } +cl_float length(s::cl_float3 p) { return __length(p); } +cl_float length(s::cl_float4 p) { return __length(p); } +cl_double length(s::cl_double2 p) { return __length(p); } +cl_double length(s::cl_double3 p) { return __length(p); } +cl_double length(s::cl_double4 p) { return __length(p); } +#ifndef NO_HALF_ENABLED +cl_half length(s::cl_half2 p) { return __length(p); } +cl_half length(s::cl_half3 p) { return __length(p); } +cl_half length(s::cl_half4 p) { return __length(p); } +#endif + +// distance +cl_float distance(s::cl_float p0, s::cl_float p1) { return length(p0 - p1); } +cl_double distance(s::cl_double p0, s::cl_double p1) { return length(p0 - p1); } +#ifndef NO_HALF_ENABLED +cl_half distance(s::cl_half p0, s::cl_half p1) { return length(p0 - p1); } +#endif +cl_float distance(s::cl_float2 p0, s::cl_float2 p1) { return length(p0 - p1); } +cl_float distance(s::cl_float3 p0, s::cl_float3 p1) { return length(p0 - p1); } +cl_float distance(s::cl_float4 p0, s::cl_float4 p1) { return length(p0 - p1); } +cl_double distance(s::cl_double2 p0, s::cl_double2 p1) { + return length(p0 - p1); +} +cl_double distance(s::cl_double3 p0, s::cl_double3 p1) { + return length(p0 - p1); +} +cl_double distance(s::cl_double4 p0, s::cl_double4 p1) { + return length(p0 - p1); +} #ifndef NO_HALF_ENABLED -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_half, s::cl_half, s::cl_half) +cl_half distance(s::cl_half2 p0, s::cl_half2 p1) { return length(p0 - p1); } +cl_half distance(s::cl_half3 p0, s::cl_half3 p1) { return length(p0 - p1); } +cl_half distance(s::cl_half4 p0, s::cl_half4 p1) { return length(p0 - p1); } #endif +// normalize +template +typename std::enable_if::value, T>::type +__normalize(T t) { + T r = length(t); + return t / T(r); +} + +template +typename std::enable_if::value, T>::type +__normalize(T t) { + typename T::element_type r = length(t); + return t / T(r); +} + +s::cl_float normalize(s::cl_float p) { return __normalize(p); } +s::cl_float2 normalize(s::cl_float2 p) { return __normalize(p); } +s::cl_float3 normalize(s::cl_float3 p) { return __normalize(p); } +s::cl_float4 normalize(s::cl_float4 p) { return __normalize(p); } + +s::cl_double normalize(s::cl_double p) { return __normalize(p); } +s::cl_double2 normalize(s::cl_double2 p) { return __normalize(p); } +s::cl_double3 normalize(s::cl_double3 p) { return __normalize(p); } +s::cl_double4 normalize(s::cl_double4 p) { return __normalize(p); } +#ifndef NO_HALF_ENABLED +s::cl_half normalize(s::cl_half p) { return __normalize(p); } +s::cl_half2 normalize(s::cl_half2 p) { return __normalize(p); } +s::cl_half3 normalize(s::cl_half3 p) { return __normalize(p); } +s::cl_half4 normalize(s::cl_half4 p) { return __normalize(p); } +#endif + +// fast_length +template +typename std::enable_if::value, + typename T::element_type>::type +__fast_length(T t) { + return std::sqrt(OpDot(t, t)); +} +cl_float fast_length(s::cl_float p) { return std::sqrt(OpFMul(p, p)); } +cl_float fast_length(s::cl_float2 p) { return __fast_length(p); } +cl_float fast_length(s::cl_float3 p) { return __fast_length(p); } +cl_float fast_length(s::cl_float4 p) { return __fast_length(p); } + +// fast_normalize +s::cl_int OpAll(s::cl_int2); +s::cl_int OpAll(s::cl_int3); +s::cl_int OpAll(s::cl_int4); +template +typename std::enable_if::value, T>::type +__fast_normalize(T t) { + if (OpAll(t == T(0.0f))) { + return t; + } + typename T::element_type r = std::sqrt(OpDot(t, t)); + return t / T(r); +} + +s::cl_float fast_normalize(s::cl_float p) { + if (p == 0.0f) { + return p; + } + s::cl_float r = std::sqrt(OpFMul(p, p)); + return p / r; +} +s::cl_float2 fast_normalize(s::cl_float2 p) { return __fast_normalize(p); } +s::cl_float3 fast_normalize(s::cl_float3 p) { return __fast_normalize(p); } +s::cl_float4 fast_normalize(s::cl_float4 p) { return __fast_normalize(p); } + +// fast_distance +cl_float fast_distance(s::cl_float p0, s::cl_float p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float2 p0, s::cl_float2 p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float3 p0, s::cl_float3 p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float4 p0, s::cl_float4 p1) { + return fast_length(p0 - p1); +} + /* --------------- 4.13.7 Relational functions. Host version --------------*/ // OpFOrdEqual-isequal cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { return (x == y); } @@ -2223,17 +2377,31 @@ MAKE_1V(half_tan, s::cl_float, s::cl_float) } // namespace __host_std } // namespace cl +#undef __NOEXC +#undef NO_HALF_ENABLED #undef __MAKE_1V #undef __MAKE_1V_2V +#undef __MAKE_1V_2V_RS +#undef __MAKE_1V_RS +#undef __MAKE_1V_2V_3V #undef __MAKE_1V_2S +#undef __MAKE_SR_1V_AND +#undef __MAKE_SR_1V_OR #undef __MAKE_1V_2P -#undef __MAKE_1V_2V_3V #undef __MAKE_1V_2V_3P #undef MAKE_1V +#undef MAKE_1V_FUNC #undef MAKE_1V_2V +#undef MAKE_1V_2V_FUNC +#undef MAKE_1V_2V_3V +#undef MAKE_1V_2V_3V_FUNC +#undef MAKE_SC_1V_2V_3V +#undef MAKE_SC_FSC_1V_2V_3V_FV +#undef MAKE_SC_3ARG #undef MAKE_1V_2S +#undef MAKE_SR_1V_AND +#undef MAKE_SR_1V_OR +#undef MSB_MASK #undef MAKE_1V_2P -#undef MAKE_1V_2V_3V +#undef MAKE_GEO_1V_2V_RS #undef MAKE_1V_2V_3P - -#undef __NOEXC diff --git a/sycl/test/built-ins/scalar_geometric.cpp b/sycl/test/built-ins/scalar_geometric.cpp index 1aeb4c7421d8d..d885958017934 100644 --- a/sycl/test/built-ins/scalar_geometric.cpp +++ b/sycl/test/built-ins/scalar_geometric.cpp @@ -30,5 +30,106 @@ int main() { assert(r == 0.8f); } + // distance + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::distance(cl::sycl::cl_float{1.f}, + cl::sycl::cl_float{3.f}); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(r == 2.f); + } + + // length + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::length(cl::sycl::cl_float{1.f}); }); + }); + } + std::cout << "r " << r << std::endl; + assert(r == 1.f); + } + // normalize + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = cl::sycl::normalize(cl::sycl::cl_float{2.f}); }); + }); + } + + std::cout << "r " << r << std::endl; + assert(r == 1.f); + } + + // fast_distance + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_distance(cl::sycl::cl_float{1.f}, + cl::sycl::cl_float{3.f}); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(r == 2.f); + } + // fast_length + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_length(cl::sycl::cl_float{2.f}); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(r == 2.f); + } + // fast_normalize + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_normalize(cl::sycl::cl_float{2.f}); + }); + }); + } + + std::cout << "r " << r << std::endl; + assert(r == 1.f); + } + return 0; } \ No newline at end of file diff --git a/sycl/test/built-ins/vector_geometric.cpp b/sycl/test/built-ins/vector_geometric.cpp index cdcdc8f17752f..0f7c5a8b3d499 100644 --- a/sycl/test/built-ins/vector_geometric.cpp +++ b/sycl/test/built-ins/vector_geometric.cpp @@ -8,9 +8,14 @@ #include #include +#include using namespace cl::sycl; +bool isFloatEqualTo(float x, float y, float epsilon = 0.005f){ + return std::fabs(x - y) <= epsilon; +} + int main() { // dot { @@ -34,5 +39,181 @@ int main() { assert(r == 16.f); } + // cross + { + cl::sycl::cl_float4 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::cross( + cl::sycl::cl_float4{ + 2.f, + 3.f, + 4.f, + 0.f, + }, + cl::sycl::cl_float4{ + 5.f, + 6.f, + 7.f, + 0.f, + }); + }); + }); + } + + cl::sycl::cl_float r1 = r.x(); + cl::sycl::cl_float r2 = r.y(); + cl::sycl::cl_float r3 = r.z(); + cl::sycl::cl_float r4 = r.w(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 + << std::endl; + assert(r1 == -3.f); + assert(r2 == 6.f); + assert(r3 == -3.f); + assert(r4 == 0.0f); + } + + // distance + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::distance( + cl::sycl::cl_float2{ + 1.f, + 2.f, + }, + cl::sycl::cl_float2{ + 3.f, + 4.f, + }); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(isFloatEqualTo(r, 2.82843f)); + } + + // length + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::length(cl::sycl::cl_float2{ + 1.f, + 2.f, + }); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(isFloatEqualTo(r, 2.23607f)); + } + // normalize + { + cl::sycl::cl_float2 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::normalize(cl::sycl::cl_float2{ + 1.f, + 2.f, + }); + }); + }); + } + cl::sycl::cl_float r1 = r.x(); + cl::sycl::cl_float r2 = r.y(); + + std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + assert(isFloatEqualTo(r1, 0.447214f)); + assert(isFloatEqualTo(r2, 0.894427f)); + } + + // fast_distance + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_distance( + cl::sycl::cl_float2{ + 1.f, + 2.f, + }, + cl::sycl::cl_float2{ + 3.f, + 4.f, + }); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(isFloatEqualTo(r, 2.82843f)); + } + + // fast_length + { + cl::sycl::cl_float r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_length(cl::sycl::cl_float2{ + 1.f, + 2.f, + }); + }); + }); + } + std::cout << "r " << r << std::endl; + assert(isFloatEqualTo(r, 2.23607f)); + } + + // fast_normalize + { + cl::sycl::cl_float2 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = cl::sycl::fast_normalize(cl::sycl::cl_float2{ + 1.f, + 2.f, + }); + }); + }); + } + cl::sycl::cl_float r1 = r.x(); + cl::sycl::cl_float r2 = r.y(); + + std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + assert(isFloatEqualTo(r1, 0.447144)); + assert(isFloatEqualTo(r2, 0.894287)); + } + return 0; } \ No newline at end of file From ac9345e93c521ff90ab059789e5d11a9d560a95a Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 2 Apr 2019 19:09:16 +0300 Subject: [PATCH 04/11] [SYCL] Fix max,min integer and clamp common builtin functions. The GPU SPIRV translator cannot handle those function with the vector first argument type and the scalar type second /third argument together. Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index b77c57c492eee..50dc5501133c2 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -795,7 +795,7 @@ typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { __NO_SUPPORT_HOST_VERSION(clamp, T) - return __sycl_std::__invoke_s_clamp(x, minval, maxval); + return __sycl_std::__invoke_s_clamp(x, T(minval), T(maxval)); } // geninteger clamp (geninteger x, sgeninteger minval, sgeninteger maxval) @@ -804,7 +804,7 @@ typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { __NO_SUPPORT_HOST_VERSION(clamp, T) - return __sycl_std::__invoke_u_clamp(x, minval, maxval); + return __sycl_std::__invoke_u_clamp(x, T(minval), T(maxval)); } // geninteger clz (geninteger x) @@ -847,60 +847,60 @@ mad_sat(T a, T b, T c) __NOEXC { return __sycl_std::__invoke_u_mad_sat(a, b, c); } -// geninteger max (geninteger x, geninteger y) +// igeninteger max (igeninteger x, igeninteger y) template typename std::enable_if::value, T>::type max(T x, T y) __NOEXC { return __sycl_std::__invoke_s_max(x, y); } -// geninteger max (geninteger x, geninteger y) +// ugeninteger max (ugeninteger x, ugeninteger y) template typename std::enable_if::value, T>::type max(T x, T y) __NOEXC { return __sycl_std::__invoke_u_max(x, y); } -// geninteger max (geninteger x, sgeninteger y) +// igeninteger max (vigeninteger x, sigeninteger y) template typename std::enable_if::value, T>::type max(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_s_max(x, y); + return __sycl_std::__invoke_s_max(x, T(y)); } -// geninteger max (geninteger x, sgeninteger y) +// vugeninteger max (vugeninteger x, sugeninteger y) template typename std::enable_if::value, T>::type max(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_u_max(x, y); + return __sycl_std::__invoke_u_max(x, T(y)); } -// geninteger min (geninteger x, geninteger y) +// igeninteger min (igeninteger x, igeninteger y) template typename std::enable_if::value, T>::type min(T x, T y) __NOEXC { return __sycl_std::__invoke_s_min(x, y); } -// geninteger min (geninteger x, geninteger y) +// ugeninteger min (ugeninteger x, ugeninteger y) template typename std::enable_if::value, T>::type min(T x, T y) __NOEXC { return __sycl_std::__invoke_u_min(x, y); } -// geninteger min (geninteger x, sgeninteger y) +// vigeninteger min (vigeninteger x, sigeninteger y) template typename std::enable_if::value, T>::type min(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_s_min(x, y); + return __sycl_std::__invoke_s_min(x, T(y)); } -// geninteger min (geninteger x, sgeninteger y) +// vugeninteger min (vugeninteger x, sugeninteger y) template typename std::enable_if::value, T>::type min(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_u_min(x, y); + return __sycl_std::__invoke_u_min(x, T(y)); } // geninteger mul_hi (geninteger x, geninteger y) From bf837d49c3ff3a7be1e030fc3007d29dcf0bfa93 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 2 Apr 2019 20:17:25 +0300 Subject: [PATCH 05/11] [SYCL] Delete cl::sycl::native::exp test. This function has the accuracy that implementation-defined and cannot checked independed. Simply calling of this function is checked by CTS. Signed-off-by: Alexey Voronov --- sycl/test/built-ins/scalar_math.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/sycl/test/built-ins/scalar_math.cpp b/sycl/test/built-ins/scalar_math.cpp index 640e9e6ef5c8f..f1d5b6256c165 100644 --- a/sycl/test/built-ins/scalar_math.cpp +++ b/sycl/test/built-ins/scalar_math.cpp @@ -59,7 +59,6 @@ int main() { assert(r > 0.333f && r < 0.334f); // ~0.33333333333333337 } - // todo // asin { cl::sycl::cl_float r{0}; @@ -548,22 +547,5 @@ int main() { assert(std::isnan(r)); } - // native exp - { - cl::sycl::cl_float r{0}; - { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::native::exp(cl::sycl::cl_float{1.0f}); - }); - }); - } - std::cout << "r " << r << std::endl; - assert(r > 2.718f && r < 2.719f); // ~2.718281828459045 - } - return 0; } From 907babe3c3bcafacc09353d4c452e3083540211f Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Fri, 5 Apr 2019 16:06:39 +0300 Subject: [PATCH 06/11] [SYCL] Add host side implementation of integer built-in functions Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 91 +-- sycl/include/CL/sycl/detail/builtins.hpp | 4 +- .../CL/sycl/detail/generic_type_traits.hpp | 177 ++-- sycl/source/detail/builtins.cpp | 755 +++++++++++++++++- sycl/test/built-ins/scalar_integer.cpp | 407 +++++++++- sycl/test/built-ins/vector_integer.cpp | 583 ++++++++++++-- 6 files changed, 1781 insertions(+), 236 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 50dc5501133c2..f200460ec23dc 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -23,25 +23,13 @@ namespace cl { namespace sycl { #ifdef __SYCL_DEVICE_ONLY__ -#define __DEVICE_SIDE 1 namespace __sycl_std = cl::__spirv; #else -#define __DEVICE_SIDE 0 namespace __sycl_std = __host_std; #endif } // namespace sycl } // namespace cl -#define __NO_SUPPORT_HOST_VERSION(name, T) \ - static_assert(__DEVICE_SIDE != 0 && \ - /* compile time dependence*/ sizeof(T) > 0, \ - STRINGIFY_LINE(name) " host version is not implemented yet"); - -#define __NO_SUPPORT_DEVICE_VERSION(name, T) \ - static_assert( \ - __DEVICE_SIDE != 1 && /* compile time dependence*/ sizeof(T) > 0, \ - STRINGIFY_LINE(name) " device version is not implemented yet"); - namespace cl { namespace sycl { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ @@ -689,11 +677,9 @@ sign(T x) __NOEXC { /* --------------- 4.13.4 Integer functions. --------------------------------*/ // ugeninteger abs (geninteger x) template -typename std::enable_if::value, - typename detail::make_unsigned::type>::type +typename std::enable_if::value, T>::type abs(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs, T) - return __sycl_std::__invoke_u_abs::type>(x); + return __sycl_std::__invoke_u_abs(x); } // ugeninteger abs (geninteger x) @@ -701,18 +687,14 @@ template typename std::enable_if::value, typename detail::make_unsigned::type>::type abs(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs, T) return __sycl_std::__invoke_s_abs::type>(x); } // ugeninteger abs_diff (geninteger x, geninteger y) template -typename std::enable_if::value, - typename detail::make_unsigned::type>::type +typename std::enable_if::value, T>::type abs_diff(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs_diff, T) - return __sycl_std::__invoke_u_abs_diff< - typename detail::make_unsigned::type>(x, y); + return __sycl_std::__invoke_u_abs_diff(x, y); } // ugeninteger abs_diff (geninteger x, geninteger y) @@ -720,7 +702,6 @@ template typename std::enable_if::value, typename detail::make_unsigned::type>::type abs_diff(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs_diff, T) return __sycl_std::__invoke_s_abs_diff< typename detail::make_unsigned::type>(x, y); } @@ -729,7 +710,6 @@ abs_diff(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type add_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(add_sat, T) return __sycl_std::__invoke_s_add_sat(x, y); } @@ -737,7 +717,6 @@ add_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type add_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(add_sat, T) return __sycl_std::__invoke_u_add_sat(x, y); } @@ -745,7 +724,6 @@ add_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type hadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(hadd, T) return __sycl_std::__invoke_s_hadd(x, y); } @@ -753,7 +731,6 @@ hadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type hadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(hadd, T) return __sycl_std::__invoke_u_hadd(x, y); } @@ -761,7 +738,6 @@ hadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rhadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rhadd, T) return __sycl_std::__invoke_s_rhadd(x, y); } @@ -769,7 +745,6 @@ rhadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rhadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rhadd, T) return __sycl_std::__invoke_u_rhadd(x, y); } @@ -777,7 +752,6 @@ rhadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type clamp(T x, T minval, T maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_s_clamp(x, minval, maxval); } @@ -785,7 +759,6 @@ clamp(T x, T minval, T maxval) __NOEXC { template typename std::enable_if::value, T>::type clamp(T x, T minval, T maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_u_clamp(x, minval, maxval); } @@ -794,7 +767,6 @@ template typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_s_clamp(x, T(minval), T(maxval)); } @@ -803,7 +775,6 @@ template typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_u_clamp(x, T(minval), T(maxval)); } @@ -811,31 +782,27 @@ clamp(T x, typename T::element_type minval, template typename std::enable_if::value, T>::type clz(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clz, T) return __sycl_std::__invoke_clz(x); } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type -mad_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_hi, T) - return __sycl_std::__invoke_s_mad_hi(x, y); +mad_hi(T x, T y, T z) __NOEXC { + return __sycl_std::__invoke_s_mad_hi(x, y, z); } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type -mad_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_hi, T) - return __sycl_std::__invoke_u_mad_hi(x, y); +mad_hi(T x, T y, T z) __NOEXC { + return __sycl_std::__invoke_u_mad_hi(x, y, z); } // geninteger mad_sat (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type mad_sat(T a, T b, T c) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_sat, T) return __sycl_std::__invoke_s_mad_sat(a, b, c); } @@ -843,7 +810,6 @@ mad_sat(T a, T b, T c) __NOEXC { template typename std::enable_if::value, T>::type mad_sat(T a, T b, T c) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_sat, T) return __sycl_std::__invoke_u_mad_sat(a, b, c); } @@ -907,7 +873,6 @@ min(T x, typename T::element_type y) __NOEXC { template typename std::enable_if::value, T>::type mul_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul_hi, T) return __sycl_std::__invoke_s_mul_hi(x, y); } @@ -915,7 +880,6 @@ mul_hi(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type mul_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul_hi, T) return __sycl_std::__invoke_u_mul_hi(x, y); } @@ -923,7 +887,6 @@ mul_hi(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rotate(T v, T i) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rotate, T) return __sycl_std::__invoke_rotate(v, i); } @@ -931,7 +894,6 @@ rotate(T v, T i) __NOEXC { template typename std::enable_if::value, T>::type sub_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(sub_sat, T) return __sycl_std::__invoke_s_sub_sat(x, y); } @@ -939,15 +901,18 @@ sub_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type sub_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(sub_sat, T) return __sycl_std::__invoke_u_sub_sat(x, y); } +// TODO delete when Intel CPU OpenCL runtime will be fixed +// OpExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) +#define __invoke_s_upsample __invoke_u_upsample + // ugeninteger16bit upsample (ugeninteger8bit hi, ugeninteger8bit lo) -template ::value, T>::type> -typename detail::make_upper::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) +template +typename std::enable_if::value, + typename detail::make_upper::type>::type +upsample(T hi, T lo) __NOEXC { return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -958,7 +923,6 @@ typename std::enable_if::value && detail::is_ugeninteger8bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } @@ -968,7 +932,6 @@ template typename std::enable_if::value, typename detail::make_upper::type>::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -979,7 +942,6 @@ typename std::enable_if::value && detail::is_ugeninteger16bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } @@ -989,7 +951,6 @@ template typename std::enable_if::value, typename detail::make_upper::type>::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -1000,7 +961,6 @@ typename std::enable_if::value && detail::is_ugeninteger32bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } @@ -1008,26 +968,23 @@ upsample(T hi, T2 lo) __NOEXC { // geninteger popcount (geninteger x) template typename std::enable_if::value, T>::type -popcount(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(popcount, T) - return __sycl_std::__invoke_popcount(x, y); +popcount(T x) __NOEXC { + return __sycl_std::__invoke_popcount(x); } -// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, geninteger32bit -// z) +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) template typename std::enable_if::value, T>::type mad24(T x, T y, T z) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad24, T) return __sycl_std::__invoke_s_mad24(x, y, z); } -// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, geninteger32bit -// z) +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) template typename std::enable_if::value, T>::type mad24(T x, T y, T z) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad24, T) return __sycl_std::__invoke_u_mad24(x, y, z); } @@ -1035,7 +992,6 @@ mad24(T x, T y, T z) __NOEXC { template typename std::enable_if::value, T>::type mul24(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul24, T) return __sycl_std::__invoke_s_mul24(x, y); } @@ -1043,7 +999,6 @@ mul24(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type mul24(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul24, T) return __sycl_std::__invoke_u_mul24(x, y); } @@ -1697,6 +1652,4 @@ tan(T x) __NOEXC { #undef __HALF_NO_ENABLED #undef __NOEXC -#undef __NO_SUPPORT_HOST_VERSION -#undef __NO_SUPPORT_DEVICE_VERSION #undef __DEVICE_SIDE diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index ce873a59b1ba3..bbe99b2927ae3 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -192,8 +192,8 @@ MAKE_CALL_ARG2(s_upsample) MAKE_CALL_ARG1(popcount) MAKE_CALL_ARG3(s_mad24) MAKE_CALL_ARG3(u_mad24) -MAKE_CALL_ARG3(s_mul24) -MAKE_CALL_ARG3(u_mul24) +MAKE_CALL_ARG2(s_mul24) +MAKE_CALL_ARG2(u_mul24) /* --------------- 4.13.5 Common functions. ---------------------------------*/ MAKE_CALL_ARG3(fclamp) MAKE_CALL_ARG1(degrees) diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 628d10d7cb6b7..3ec8e1c4ea21b 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -12,6 +12,7 @@ #include #include +#include // TODO Delete when half type will supported by SYCL Runtime #define __HALF_NO_ENABLED @@ -674,34 +675,35 @@ template <> struct float_point_to_int { using type = cl_int8; }; template <> struct float_point_to_int { using type = cl_int16; }; // Used for abs and abs_diff built-in -template struct make_unsigned; -template <> struct make_unsigned { using type = uchar; }; -template <> struct make_unsigned { using type = uchar2; }; -template <> struct make_unsigned { using type = uchar3; }; -template <> struct make_unsigned { using type = uchar4; }; -template <> struct make_unsigned { using type = uchar8; }; -template <> struct make_unsigned { using type = uchar16; }; - -template <> struct make_unsigned { using type = ushort; }; -template <> struct make_unsigned { using type = ushort2; }; -template <> struct make_unsigned { using type = ushort3; }; -template <> struct make_unsigned { using type = ushort4; }; -template <> struct make_unsigned { using type = ushort8; }; -template <> struct make_unsigned { using type = ushort16; }; - -template <> struct make_unsigned { using type = uint; }; -template <> struct make_unsigned { using type = uint2; }; -template <> struct make_unsigned { using type = uint3; }; -template <> struct make_unsigned { using type = uint4; }; -template <> struct make_unsigned { using type = uint8; }; -template <> struct make_unsigned { using type = uint16; }; - -template <> struct make_unsigned { using type = ulong; }; -template <> struct make_unsigned { using type = ulong2; }; -template <> struct make_unsigned { using type = ulong3; }; -template <> struct make_unsigned { using type = ulong4; }; -template <> struct make_unsigned { using type = ulong8; }; -template <> struct make_unsigned { using type = ulong16; }; +template struct make_unsigned { using type = T; }; + +template <> struct make_unsigned { using type = cl_uchar; }; +template <> struct make_unsigned { using type = cl_uchar2; }; +template <> struct make_unsigned { using type = cl_uchar3; }; +template <> struct make_unsigned { using type = cl_uchar4; }; +template <> struct make_unsigned { using type = cl_uchar8; }; +template <> struct make_unsigned { using type = cl_uchar16; }; + +template <> struct make_unsigned { using type = cl_ushort; }; +template <> struct make_unsigned { using type = cl_ushort2; }; +template <> struct make_unsigned { using type = cl_ushort3; }; +template <> struct make_unsigned { using type = cl_ushort4; }; +template <> struct make_unsigned { using type = cl_ushort8; }; +template <> struct make_unsigned { using type = cl_ushort16; }; + +template <> struct make_unsigned { using type = cl_uint; }; +template <> struct make_unsigned { using type = cl_uint2; }; +template <> struct make_unsigned { using type = cl_uint3; }; +template <> struct make_unsigned { using type = cl_uint4; }; +template <> struct make_unsigned { using type = cl_uint8; }; +template <> struct make_unsigned { using type = cl_uint16; }; + +template <> struct make_unsigned { using type = cl_ulong; }; +template <> struct make_unsigned { using type = cl_ulong2; }; +template <> struct make_unsigned { using type = cl_ulong3; }; +template <> struct make_unsigned { using type = cl_ulong4; }; +template <> struct make_unsigned { using type = cl_ulong8; }; +template <> struct make_unsigned { using type = cl_ulong16; }; template <> struct make_unsigned { using type = ulonglong; }; template <> struct make_unsigned { using type = ulonglong2; }; @@ -710,27 +712,102 @@ template <> struct make_unsigned { using type = ulonglong4; }; template <> struct make_unsigned { using type = ulonglong8; }; template <> struct make_unsigned { using type = ulonglong16; }; +template struct make_signed { using type = T; }; + +template <> struct make_signed { using type = cl_char; }; +template <> struct make_signed { using type = cl_char2; }; +template <> struct make_signed { using type = cl_char3; }; +template <> struct make_signed { using type = cl_char4; }; +template <> struct make_signed { using type = cl_char8; }; +template <> struct make_signed { using type = cl_char16; }; + +template <> struct make_signed { using type = cl_short; }; +template <> struct make_signed { using type = cl_short2; }; +template <> struct make_signed { using type = cl_short3; }; +template <> struct make_signed { using type = cl_short4; }; +template <> struct make_signed { using type = cl_short8; }; +template <> struct make_signed { using type = cl_short16; }; + +template <> struct make_signed { using type = cl_int; }; +template <> struct make_signed { using type = cl_int2; }; +template <> struct make_signed { using type = cl_int3; }; +template <> struct make_signed { using type = cl_int4; }; +template <> struct make_signed { using type = cl_int8; }; +template <> struct make_signed { using type = cl_int16; }; + +template <> struct make_signed { using type = cl_long; }; +template <> struct make_signed { using type = cl_long2; }; +template <> struct make_signed { using type = cl_long3; }; +template <> struct make_signed { using type = cl_long4; }; +template <> struct make_signed { using type = cl_long8; }; +template <> struct make_signed { using type = cl_long16; }; + +template <> struct make_signed { using type = longlong; }; +template <> struct make_signed { using type = longlong2; }; +template <> struct make_signed { using type = longlong3; }; +template <> struct make_signed { using type = longlong4; }; +template <> struct make_signed { using type = longlong8; }; +template <> struct make_signed { using type = longlong16; }; + // Used for upsample built-in // Bases on Table 4.93: Scalar data type aliases supported by SYCL template struct make_upper; -template <> struct make_upper { - using type = cl::sycl::cl_short; -}; -template <> struct make_upper { - using type = cl::sycl::cl_ushort; -}; -template <> struct make_upper { - using type = cl::sycl::cl_int; -}; -template <> struct make_upper { - using type = cl::sycl::cl_uint; -}; -template <> struct make_upper { - using type = cl::sycl::cl_long; -}; -template <> struct make_upper { - using type = cl::sycl::cl_ulong; -}; + +template <> struct make_upper { using type = cl_short; }; +template <> struct make_upper { using type = cl_short2; }; +template <> struct make_upper { using type = cl_short3; }; +template <> struct make_upper { using type = cl_short4; }; +template <> struct make_upper { using type = cl_short8; }; +template <> struct make_upper { using type = cl_short16; }; + +template <> struct make_upper { using type = cl_ushort; }; +template <> struct make_upper { using type = cl_ushort2; }; +template <> struct make_upper { using type = cl_ushort3; }; +template <> struct make_upper { using type = cl_ushort4; }; +template <> struct make_upper { using type = cl_ushort8; }; +template <> struct make_upper { using type = cl_ushort16; }; + +template <> struct make_upper { using type = cl_int; }; +template <> struct make_upper { using type = cl_int2; }; +template <> struct make_upper { using type = cl_int3; }; +template <> struct make_upper { using type = cl_int4; }; +template <> struct make_upper { using type = cl_int8; }; +template <> struct make_upper { using type = cl_int16; }; + +template <> struct make_upper { using type = cl_uint; }; +template <> struct make_upper { using type = cl_uint2; }; +template <> struct make_upper { using type = cl_uint3; }; +template <> struct make_upper { using type = cl_uint4; }; +template <> struct make_upper { using type = cl_uint8; }; +template <> struct make_upper { using type = cl_uint16; }; + +template <> struct make_upper { using type = cl_long; }; +template <> struct make_upper { using type = cl_long2; }; +template <> struct make_upper { using type = cl_long3; }; +template <> struct make_upper { using type = cl_long4; }; +template <> struct make_upper { using type = cl_long8; }; +template <> struct make_upper { using type = cl_long16; }; + +template <> struct make_upper { using type = cl_ulong; }; +template <> struct make_upper { using type = cl_ulong2; }; +template <> struct make_upper { using type = cl_ulong3; }; +template <> struct make_upper { using type = cl_ulong4; }; +template <> struct make_upper { using type = cl_ulong8; }; +template <> struct make_upper { using type = cl_ulong16; }; + +template <> struct make_upper { using type = longlong; }; +template <> struct make_upper { using type = longlong2; }; +template <> struct make_upper { using type = longlong3; }; +template <> struct make_upper { using type = longlong4; }; +template <> struct make_upper { using type = longlong8; }; +template <> struct make_upper { using type = longlong16; }; + +template <> struct make_upper { using type = ulonglong; }; +template <> struct make_upper { using type = ulonglong2; }; +template <> struct make_upper { using type = ulonglong3; }; +template <> struct make_upper { using type = ulonglong4; }; +template <> struct make_upper { using type = ulonglong8; }; +template <> struct make_upper { using type = ulonglong16; }; // Try to get pointer_t, otherwise T template class TryToGetPointerT { @@ -909,6 +986,14 @@ struct RelConverter< static R apply(value_t value) { return value; } }; +template static constexpr T max_v() { + return std::numeric_limits::max(); +} + +template static constexpr T min_v() { + return std::numeric_limits::min(); +} + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 34d7100046d04..7ac01580162c8 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -22,6 +22,7 @@ #define NO_HALF_ENABLED namespace s = cl::sycl; +namespace d = s::detail; #define __MAKE_1V(Fun, Call, N, Ret, Arg1) \ Ret##N Fun __NOEXC(Arg1##N x) { \ @@ -81,6 +82,19 @@ namespace s = cl::sycl; return r; \ } +#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_2s_3s( \ + r, \ + [](base1_t x, Arg2 y, Arg3 z) { \ + return cl::__host_std::Fun(x, y, z); \ + }, \ + x, y, z); \ + return r; \ + } + #define __MAKE_1V_2S(Fun, N, Ret, Arg1, Arg2) \ Ret##N Fun __NOEXC(Arg1##N x, Arg2 y) { \ Ret##N r; \ @@ -89,6 +103,7 @@ namespace s = cl::sycl; r, [](base1_t x, Arg2 y) { return cl::__host_std::Fun(x, y); }, x, y); \ return r; \ } + #define __MAKE_SR_1V_AND(Fun, Call, N, Ret, Arg1) \ Ret Fun __NOEXC(Arg1##N x) { \ Ret r; \ @@ -97,6 +112,7 @@ namespace s = cl::sycl; r, [](base_t x) { return cl::__host_std::Call(x); }, x); \ return r; \ } + #define __MAKE_SR_1V_OR(Fun, Call, N, Ret, Arg1) \ Ret Fun __NOEXC(Arg1##N x) { \ Ret r; \ @@ -133,6 +149,7 @@ namespace s = cl::sycl; } #define MAKE_1V(Fun, Ret, Arg1) MAKE_1V_FUNC(Fun, Fun, Ret, Arg1) + #define MAKE_1V_FUNC(Fun, Call, Ret, Arg1) \ __MAKE_1V(Fun, Call, 2, Ret, Arg1) \ __MAKE_1V(Fun, Call, 3, Ret, Arg1) \ @@ -142,6 +159,7 @@ namespace s = cl::sycl; #define MAKE_1V_2V(Fun, Ret, Arg1, Arg2) \ MAKE_1V_2V_FUNC(Fun, Fun, Ret, Arg1, Arg2) + #define MAKE_1V_2V_FUNC(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ @@ -151,6 +169,7 @@ namespace s = cl::sycl; #define MAKE_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + #define MAKE_1V_2V_3V_FUNC(Fun, Call, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 2, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ @@ -161,11 +180,14 @@ namespace s = cl::sycl; #define MAKE_SC_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + #define MAKE_SC_FSC_1V_2V_3V_FV(FunSc, FunV, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(FunSc, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) + #define MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ Ret Fun __NOEXC(Arg1 x, Arg2 y, Arg3 z) { return (Ret)__##Fun(x, y, z); } + #define MAKE_1V_2S(Fun, Ret, Arg1, Arg2) \ __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \ @@ -173,6 +195,14 @@ namespace s = cl::sycl; __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) +#define MAKE_1V_2S_3S(Fun, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 2, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) + + #define MAKE_SR_1V_AND(Fun, Ret, Arg1) \ __MAKE_SR_1V_AND(Fun, Fun, 2, Ret, Arg1) \ __MAKE_SR_1V_AND(Fun, Fun, 3, Ret, Arg1) \ @@ -216,18 +246,26 @@ template struct helper { helper().run_1v(r, op, x); r.template swizzle() = op(x.template swizzle()); } + template void run_1v_2v(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y.template swizzle()); } + template void run_1v_2s(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2s(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y); } + template + void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + helper().run_1v_2s_3s(r, op, x, y, z); + r.template swizzle() = op(x.template swizzle(), y, z); + } + template void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v_rs(r, op, x, y); @@ -266,11 +304,13 @@ template struct helper { op(x.template swizzle(), y.template swizzle(), z.template swizzle()); } + template void run_1v_sr_or(Res &r, Op op, T1 x) { helper().run_1v_sr_or(r, op, x); r = (op(x.template swizzle()) || r); } + template void run_1v_sr_and(Res &r, Op op, T1 x) { helper().run_1v_sr_and(r, op, x); @@ -283,23 +323,33 @@ template <> struct helper<0> { void run_1v(Res &r, Op op, T1 x) { r.template swizzle<0>() = op(x.template swizzle<0>()); } + template void run_1v_2v(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>()); } + template void run_1v_2s(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y); } + + template + void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + r.template swizzle<0>() = op(x.template swizzle<0>(), y, z); + } + template void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { op(r, x.template swizzle<0>(), y.template swizzle<0>()); } + template void run_1v_rs(Res &r, Op op, T1 x) { op(r, x.template swizzle<0>()); } + template void run_1v_2p(Res &r, Op op, T1 x, T2 y) { // TODO avoid creating a temporary variable @@ -307,6 +357,7 @@ template <> struct helper<0> { r.template swizzle<0>() = op(x.template swizzle<0>(), &temp); y->template swizzle<0>() = temp; } + template void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { // TODO avoid creating a temporary variable @@ -315,22 +366,26 @@ template <> struct helper<0> { op(x.template swizzle<0>(), y.template swizzle<0>(), &temp); z->template swizzle<0>() = temp; } + template void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>(), z.template swizzle<0>()); } + template void run_1v_sr_or(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } + template void run_1v_sr_and(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } }; } // namespace detail + /* ----------------- 4.13.3 Math functions. Host version --------------------*/ // acos cl_float acos(s::cl_float x) __NOEXC { return std::acos(x); } @@ -1305,19 +1360,517 @@ MAKE_1V(trunc, s::cl_half, s::cl_half) #endif /* --------------- 4.13.4 Integer functions. Host version -------------------*/ +// u_abs +cl_uchar u_abs(s::cl_uchar x) __NOEXC { return std::abs(x); } +cl_ushort u_abs(s::cl_ushort x) __NOEXC { return std::abs(x); } +cl_uint u_abs(s::cl_uint x) __NOEXC { return std::abs(x); } +cl_ulong u_abs(s::cl_ulong x) __NOEXC { return std::abs(x); } +s::ulonglong u_abs(s::ulonglong x) __NOEXC { return std::abs(x); } +MAKE_1V(u_abs, s::cl_uchar, s::cl_uchar) +MAKE_1V(u_abs, s::cl_ushort, s::cl_ushort) +MAKE_1V(u_abs, s::cl_uint, s::cl_uint) +MAKE_1V(u_abs, s::cl_ulong, s::cl_ulong) +MAKE_1V(u_abs, s::ulonglong, s::ulonglong) + +// s_abs +cl_uchar s_abs(s::cl_char x) __NOEXC { return std::abs(x); } +cl_ushort s_abs(s::cl_short x) __NOEXC { return std::abs(x); } +cl_uint s_abs(s::cl_int x) __NOEXC { return std::abs(x); } +cl_ulong s_abs(s::cl_long x) __NOEXC { return std::abs(x); } +s::ulonglong s_abs(s::longlong x) __NOEXC { return std::abs(x); } +MAKE_1V(s_abs, s::cl_uchar, s::cl_char) +MAKE_1V(s_abs, s::cl_ushort, s::cl_short) +MAKE_1V(s_abs, s::cl_uint, s::cl_int) +MAKE_1V(s_abs, s::cl_ulong, s::cl_long) +MAKE_1V(s_abs, s::ulonglong, s::longlong) + +// u_abs_diff +cl_uchar u_abs_diff(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return std::abs(x - y); +} +cl_ushort u_abs_diff(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return std::abs(x - y); +} +cl_uint u_abs_diff(s::cl_uint x, s::cl_uint y) __NOEXC { + return std::abs(x - y); +} +cl_ulong u_abs_diff(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return std::abs(x - y); +} +s::ulonglong u_abs_diff(s::ulonglong x, s::ulonglong y) __NOEXC { + return std::abs(x - y); +} +MAKE_1V_2V(u_abs_diff, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_abs_diff, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_abs_diff, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_abs_diff, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_abs_diff, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_abs_diff +cl_uchar s_abs_diff(s::cl_char x, s::cl_char y) __NOEXC { + return std::abs(x - y); +} +cl_ushort s_abs_diff(s::cl_short x, s::cl_short y) __NOEXC { + return std::abs(x - y); +} +cl_uint s_abs_diff(s::cl_int x, s::cl_int y) __NOEXC { return std::abs(x - y); } +cl_ulong s_abs_diff(s::cl_long x, s::cl_long y) __NOEXC { + return std::abs(x - y); +} +s::ulonglong s_abs_diff(s::longlong x, s::longlong y) __NOEXC { + return std::abs(x - y); +} +MAKE_1V_2V(s_abs_diff, s::cl_uchar, s::cl_char, s::cl_char) +MAKE_1V_2V(s_abs_diff, s::cl_ushort, s::cl_short, s::cl_short) +MAKE_1V_2V(s_abs_diff, s::cl_uint, s::cl_int, s::cl_int) +MAKE_1V_2V(s_abs_diff, s::cl_ulong, s::cl_long, s::cl_long) +MAKE_1V_2V(s_abs_diff, s::ulonglong, s::longlong, s::longlong) + +// u_add_sat +template T __u_add_sat(T x, T y) { + return (x < (d::max_v() - y) ? x + y : d::max_v()); +} + +cl_uchar u_add_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __u_add_sat(x, y); +} +cl_ushort u_add_sat(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __u_add_sat(x, y); +} +cl_uint u_add_sat(s::cl_uint x, s::cl_uint y) __NOEXC { + return __u_add_sat(x, y); +} +cl_ulong u_add_sat(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __u_add_sat(x, y); +} +s::ulonglong u_add_sat(s::ulonglong x, s::ulonglong y) __NOEXC { + return __u_add_sat(x, y); +} +MAKE_1V_2V(u_add_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_add_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_add_sat, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_add_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_add_sat, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_add_sat +template T __s_add_sat(T x, T y) { + if (x > 0 && y > 0) + return (x < (d::max_v() - y) ? (x + y) : d::max_v()); + if (x < 0 && y < 0) + return (x > (d::min_v() - y) ? (x + y) : d::min_v()); + return x + y; +} + +cl_char s_add_sat(s::cl_char x, s::cl_char y) __NOEXC { + return __s_add_sat(x, y); +} +cl_short s_add_sat(s::cl_short x, s::cl_short y) __NOEXC { + return __s_add_sat(x, y); +} +cl_int s_add_sat(s::cl_int x, s::cl_int y) __NOEXC { return __s_add_sat(x, y); } +cl_long s_add_sat(s::cl_long x, s::cl_long y) __NOEXC { + return __s_add_sat(x, y); +} +s::longlong s_add_sat(s::longlong x, s::longlong y) __NOEXC { + return __s_add_sat(x, y); +} +MAKE_1V_2V(s_add_sat, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_add_sat, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_add_sat, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_add_sat, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_add_sat, s::longlong, s::longlong, s::longlong) + +// u_hadd +template T __hadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y & x) & one); +} + +cl_uchar u_hadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __hadd(x, y); } +cl_ushort u_hadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __hadd(x, y); +} +cl_uint u_hadd(s::cl_uint x, s::cl_uint y) __NOEXC { return __hadd(x, y); } +cl_ulong u_hadd(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __hadd(x, y); } +s::ulonglong u_hadd(s::ulonglong x, s::ulonglong y) __NOEXC { + return __hadd(x, y); +} +MAKE_1V_2V(u_hadd, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_hadd, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_hadd, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_hadd, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_hadd, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_hadd +cl_char s_hadd(s::cl_char x, s::cl_char y) __NOEXC { return __hadd(x, y); } +cl_short s_hadd(s::cl_short x, s::cl_short y) __NOEXC { return __hadd(x, y); } +cl_int s_hadd(s::cl_int x, s::cl_int y) __NOEXC { return __hadd(x, y); } +cl_long s_hadd(s::cl_long x, s::cl_long y) __NOEXC { return __hadd(x, y); } +s::longlong s_hadd(s::longlong x, s::longlong y) __NOEXC { + return __hadd(x, y); +} +MAKE_1V_2V(s_hadd, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_hadd, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_hadd, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_hadd, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_hadd, s::longlong, s::longlong, s::longlong) + +// u_rhadd +template T __rhadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y | x) & one); +} + +cl_uchar u_rhadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rhadd(x, y); } +cl_ushort u_rhadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __rhadd(x, y); +} +cl_uint u_rhadd(s::cl_uint x, s::cl_uint y) __NOEXC { return __rhadd(x, y); } +cl_ulong u_rhadd(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __rhadd(x, y); } +s::ulonglong u_rhadd(s::ulonglong x, s::ulonglong y) __NOEXC { + return __rhadd(x, y); +} +MAKE_1V_2V(u_rhadd, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_rhadd, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_rhadd, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_rhadd, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_rhadd, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_rhadd +cl_char s_rhadd(s::cl_char x, s::cl_char y) __NOEXC { return __rhadd(x, y); } +cl_short s_rhadd(s::cl_short x, s::cl_short y) __NOEXC { return __rhadd(x, y); } +cl_int s_rhadd(s::cl_int x, s::cl_int y) __NOEXC { return __rhadd(x, y); } +cl_long s_rhadd(s::cl_long x, s::cl_long y) __NOEXC { return __rhadd(x, y); } +s::longlong s_rhadd(s::longlong x, s::longlong y) __NOEXC { + return __rhadd(x, y); +} +MAKE_1V_2V(s_rhadd, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_rhadd, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_rhadd, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_rhadd, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_rhadd, s::longlong, s::longlong, s::longlong) + +// u_clamp +template T __clamp(T x, T minval, T maxval) { + return std::min(std::max(x, minval), maxval); +} + +cl_uchar u_clamp(s::cl_uchar x, s::cl_uchar minval, + s::cl_uchar maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_ushort u_clamp(s::cl_ushort x, s::cl_ushort minval, + s::cl_ushort maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_uint u_clamp(s::cl_uint x, s::cl_uint minval, s::cl_uint maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_ulong u_clamp(s::cl_ulong x, s::cl_ulong minval, + s::cl_ulong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +s::ulonglong u_clamp(s::ulonglong x, s::ulonglong minval, + s::ulonglong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +MAKE_1V_2V_3V(u_clamp, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_clamp, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_clamp, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_clamp, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_clamp, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) +MAKE_1V_2S_3S(u_clamp, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2S_3S(u_clamp, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2S_3S(u_clamp, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2S_3S(u_clamp, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S_3S(u_clamp, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_clamp +cl_char s_clamp(s::cl_char x, s::cl_char minval, s::cl_char maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_short s_clamp(s::cl_short x, s::cl_short minval, + s::cl_short maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_int s_clamp(s::cl_int x, s::cl_int minval, s::cl_int maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_long s_clamp(s::cl_long x, s::cl_long minval, s::cl_long maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +s::longlong s_clamp(s::longlong x, s::longlong minval, + s::longlong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} + +MAKE_1V_2V_3V(s_clamp, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_clamp, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_clamp, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_clamp, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_clamp, s::longlong, s::longlong, s::longlong, s::longlong) +MAKE_1V_2S_3S(s_clamp, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2S_3S(s_clamp, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2S_3S(s_clamp, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2S_3S(s_clamp, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S_3S(s_clamp, s::longlong, s::longlong, s::longlong, s::longlong) + +// clz +template inline constexpr T __clz_impl(T x, T m, T n = 0) { + return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n); +} + +template inline constexpr T __clz(T x) { + using UT = typename std::make_unsigned::type; + return (x == T(0)) ? sizeof(T) * 8 : __clz_impl(x, d::msbMask(x)); +} + +cl_uchar clz(s::cl_uchar x) __NOEXC { return __clz(x); } +cl_char clz(s::cl_char x) __NOEXC { return __clz(x); } +cl_ushort clz(s::cl_ushort x) __NOEXC { return __clz(x); } +cl_short clz(s::cl_short x) __NOEXC { return __clz(x); } +cl_uint clz(s::cl_uint x) __NOEXC { return __clz(x); } +cl_int clz(s::cl_int x) __NOEXC { return __clz(x); } +cl_ulong clz(s::cl_ulong x) __NOEXC { return __clz(x); } +cl_long clz(s::cl_long x) __NOEXC { return __clz(x); } +s::ulonglong clz(s::ulonglong x) __NOEXC { return __clz(x); } +s::longlong clz(s::longlong x) __NOEXC { return __clz(x); } +MAKE_1V(clz, s::cl_uchar, s::cl_uchar) +MAKE_1V(clz, s::cl_char, s::cl_char) +MAKE_1V(clz, s::cl_ushort, s::cl_ushort) +MAKE_1V(clz, s::cl_short, s::cl_short) +MAKE_1V(clz, s::cl_uint, s::cl_uint) +MAKE_1V(clz, s::cl_int, s::cl_int) +MAKE_1V(clz, s::cl_ulong, s::cl_ulong) +MAKE_1V(clz, s::cl_long, s::cl_long) +MAKE_1V(clz, s::longlong, s::longlong) +MAKE_1V(clz, s::ulonglong, s::ulonglong) + +// s_mul_hi +template T __mul_hi(T a, T b) { + using UPT = typename d::make_upper::type; + UPT a_s = a; + UPT b_s = b; + UPT mul = a_s * b_s; + return (mul >> (sizeof(T) * 8)); +} + +// T is minimum of 64 bits- long or longlong +template T __long_mul_hi(T a, T b) { + int halfsize = (sizeof(T) * 8) / 2; + T a1 = a >> halfsize; + T a0 = (a << halfsize) >> halfsize; + T b1 = b >> halfsize; + T b0 = (b << halfsize) >> halfsize; + + // a1b1 - for bits - [64-128) + // a1b0 a0b1 for bits - [32-96) + // a0b0 for bits - [0-64) + T a1b1 = a1 * b1; + T a0b1 = a0 * b1; + T a1b0 = a1 * b0; + T a0b0 = a0 * b0; + + // To get the upper 64 bits: + // 64 bits from a1b1, upper 32 bits from [a1b0 + (a0b1 + a0b0>>32 (carry bit + // in 33rd bit))] with carry bit on 64th bit - use of hadd. Add the a1b1 to + // the above 32 bit result. + T result = + a1b1 + (__hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1)); + return result; +} + +cl_char s_mul_hi(cl_char a, cl_char b) { return __mul_hi(a, b); } +cl_short s_mul_hi(cl_short a, cl_short b) { return __mul_hi(a, b); } +cl_int s_mul_hi(cl_int a, cl_int b) { return __mul_hi(a, b); } +cl_long s_mul_hi(s::cl_long x, s::cl_long y) __NOEXC { + return __long_mul_hi(x, y); +} +s::longlong s_mul_hi(s::longlong x, s::longlong y) __NOEXC { + return __long_mul_hi(x, y); +} + +MAKE_1V_2V(s_mul_hi, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_mul_hi, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_mul_hi, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_mul_hi, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_mul_hi, s::longlong, s::longlong, s::longlong) + +// u_mul_hi +cl_uchar u_mul_hi(cl_uchar a, cl_uchar b) { return __mul_hi(a, b); } +cl_ushort u_mul_hi(cl_ushort a, cl_ushort b) { return __mul_hi(a, b); } +cl_uint u_mul_hi(cl_uint a, cl_uint b) { return __mul_hi(a, b); } +cl_ulong u_mul_hi(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __long_mul_hi(x, y); +} +s::ulonglong u_mul_hi(s::ulonglong x, s::ulonglong y) __NOEXC { + return __long_mul_hi(x, y); +} + +MAKE_1V_2V(u_mul_hi, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_mul_hi, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_mul_hi, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_mul_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_mul_hi, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_mad_hi +template T __mad_hi(T a, T b, T c) { return __mul_hi(a, b) + c; } +template T __long_mad_hi(T a, T b, T c) { + return __long_mul_hi(a, b) + c; +} + +cl_char s_mad_hi(s::cl_char x, s::cl_char minval, s::cl_char maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_short s_mad_hi(s::cl_short x, s::cl_short minval, + s::cl_short maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_int s_mad_hi(s::cl_int x, s::cl_int minval, s::cl_int maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_long s_mad_hi(s::cl_long x, s::cl_long minval, s::cl_long maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +s::longlong s_mad_hi(s::longlong x, s::longlong minval, + s::longlong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} + +MAKE_1V_2V_3V(s_mad_hi, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_mad_hi, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_mad_hi, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_mad_hi, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_mad_hi, s::longlong, s::longlong, s::longlong, s::longlong) + +// u_mad_hi +cl_uchar u_mad_hi(s::cl_uchar x, s::cl_uchar minval, + s::cl_uchar maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_ushort u_mad_hi(s::cl_ushort x, s::cl_ushort minval, + s::cl_ushort maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_uint u_mad_hi(s::cl_uint x, s::cl_uint minval, s::cl_uint maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_ulong u_mad_hi(s::cl_ulong x, s::cl_ulong minval, + s::cl_ulong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +s::ulonglong u_mad_hi(s::ulonglong x, s::ulonglong minval, + s::ulonglong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +MAKE_1V_2V_3V(u_mad_hi, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_mad_hi, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_mad_hi, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_mad_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_mad_hi, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_mad_sat +template T __s_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT max = d::max_v(); + const UPT min = d::min_v(); + mul = std::min(std::max(mul, min), max); + return s_add_sat(T(mul), c); +} + +template T __s_long_mad_sat(T a, T b, T c) { + bool neg_prod = (a < 0) ^ (b < 0); + T mulhi = s_mul_hi(a, b); + // check mul_hi. If it is any value != 0. + // if prod is +ve, any value in mulhi means we need to saturate. + // if prod is -ve, any value in mulhi besides -1 means we need to saturate. + if (!neg_prod && mulhi != 0) + return d::max_v(); + else if (neg_prod && mulhi != -1) + return d::max_v(); // essentially some other negative value. + return s_add_sat(T(a * b), c); +} + +cl_char s_mad_sat(s::cl_char a, s::cl_char b, s::cl_char c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_short s_mad_sat(s::cl_short a, s::cl_short b, s::cl_short c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_int s_mad_sat(s::cl_int a, s::cl_int b, s::cl_int c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_long s_mad_sat(s::cl_long a, s::cl_long b, s::cl_long c) __NOEXC { + return __s_long_mad_sat(a, b, c); +} +s::longlong s_mad_sat(s::longlong a, s::longlong b, s::longlong c) __NOEXC { + return __s_long_mad_sat(a, b, c); +} +MAKE_1V_2V_3V(s_mad_sat, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_mad_sat, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_mad_sat, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_mad_sat, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_mad_sat, s::longlong, s::longlong, s::longlong, s::longlong) + +// u_mad_sat +template T __u_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT min = d::min_v(); + const UPT max = d::max_v(); + mul = std::min(std::max(mul, min), max); + return u_add_sat(T(mul), c); +} + +template T __u_long_mad_sat(T a, T b, T c) { + T mulhi = u_mul_hi(a, b); + // check mul_hi. If it is any value != 0. + if (mulhi != 0) + return d::max_v(); + else + return u_add_sat(T(a * b), c); +} + +cl_uchar u_mad_sat(s::cl_uchar a, s::cl_uchar b, s::cl_uchar c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_ushort u_mad_sat(s::cl_ushort a, s::cl_ushort b, s::cl_ushort c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_uint u_mad_sat(s::cl_uint a, s::cl_uint b, s::cl_uint c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_ulong u_mad_sat(s::cl_ulong a, s::cl_ulong b, s::cl_ulong c) __NOEXC { + return __u_long_mad_sat(a, b, c); +} +s::ulonglong u_mad_sat(s::ulonglong a, s::ulonglong b, s::ulonglong c) __NOEXC { + return __u_long_mad_sat(a, b, c); +} +MAKE_1V_2V_3V(u_mad_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_mad_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_mad_sat, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_mad_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_mad_sat, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + // s_max cl_char s_max(s::cl_char x, s::cl_char y) __NOEXC { return std::max(x, y); } cl_short s_max(s::cl_short x, s::cl_short y) __NOEXC { return std::max(x, y); } cl_int s_max(s::cl_int x, s::cl_int y) __NOEXC { return std::max(x, y); } cl_long s_max(s::cl_long x, s::cl_long y) __NOEXC { return std::max(x, y); } +s::longlong s_max(s::longlong x, s::longlong y) __NOEXC { + return std::max(x, y); +} MAKE_1V_2V(s_max, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V(s_max, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V(s_max, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2V(s_max, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_max, s::longlong, s::longlong, s::longlong) MAKE_1V_2S(s_max, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2S(s_max, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2S(s_max, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2S(s_max, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S(s_max, s::longlong, s::longlong, s::longlong) // u_max cl_uchar u_max(s::cl_uchar x, s::cl_uchar y) __NOEXC { return std::max(x, y); } @@ -1326,28 +1879,38 @@ cl_ushort u_max(s::cl_ushort x, s::cl_ushort y) __NOEXC { } cl_uint u_max(s::cl_uint x, s::cl_uint y) __NOEXC { return std::max(x, y); } cl_ulong u_max(s::cl_ulong x, s::cl_ulong y) __NOEXC { return std::max(x, y); } +s::ulonglong u_max(s::ulonglong x, s::ulonglong y) __NOEXC { + return std::max(x, y); +} MAKE_1V_2V(u_max, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_max, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_max, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2V(u_max, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_max, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_1V_2S(u_max, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2S(u_max, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2S(u_max, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2S(u_max, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S(u_max, s::ulonglong, s::ulonglong, s::ulonglong) // s_min cl_char s_min(s::cl_char x, s::cl_char y) __NOEXC { return std::min(x, y); } cl_short s_min(s::cl_short x, s::cl_short y) __NOEXC { return std::min(x, y); } cl_int s_min(s::cl_int x, s::cl_int y) __NOEXC { return std::min(x, y); } cl_long s_min(s::cl_long x, s::cl_long y) __NOEXC { return std::min(x, y); } +s::longlong s_min(s::longlong x, s::longlong y) __NOEXC { + return std::min(x, y); +} MAKE_1V_2V(s_min, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V(s_min, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V(s_min, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2V(s_min, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_min, s::longlong, s::longlong, s::longlong) MAKE_1V_2S(s_min, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2S(s_min, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2S(s_min, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2S(s_min, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S(s_min, s::longlong, s::longlong, s::longlong) // u_min cl_uchar u_min(s::cl_uchar x, s::cl_uchar y) __NOEXC { return std::min(x, y); } @@ -1356,14 +1919,205 @@ cl_ushort u_min(s::cl_ushort x, s::cl_ushort y) __NOEXC { } cl_uint u_min(s::cl_uint x, s::cl_uint y) __NOEXC { return std::min(x, y); } cl_ulong u_min(s::cl_ulong x, s::cl_ulong y) __NOEXC { return std::min(x, y); } +s::ulonglong u_min(s::ulonglong x, s::ulonglong y) __NOEXC { + return std::min(x, y); +} MAKE_1V_2V(u_min, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_min, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_min, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2V(u_min, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_min, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_1V_2S(u_min, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2S(u_min, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2S(u_min, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2S(u_min, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S(u_min, s::ulonglong, s::ulonglong, s::ulonglong) + +// rotate +template T __rotate(T x, T n) { + using UT = typename std::make_unsigned::type; + return (x << n) | (UT(x) >> ((sizeof(x) * 8) - n)); +} + +cl_uchar rotate(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rotate(x, y); } +cl_ushort rotate(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __rotate(x, y); +} +cl_uint rotate(s::cl_uint x, s::cl_uint y) __NOEXC { return __rotate(x, y); } +cl_ulong rotate(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __rotate(x, y); } +s::ulonglong rotate(s::ulonglong x, s::ulonglong y) __NOEXC { + return __rotate(x, y); +} +cl_char rotate(s::cl_char x, s::cl_char y) __NOEXC { return __rotate(x, y); } +cl_short rotate(s::cl_short x, s::cl_short y) __NOEXC { return __rotate(x, y); } +cl_int rotate(s::cl_int x, s::cl_int y) __NOEXC { return __rotate(x, y); } +cl_long rotate(s::cl_long x, s::cl_long y) __NOEXC { return __rotate(x, y); } +s::longlong rotate(s::longlong x, s::longlong y) __NOEXC { + return __rotate(x, y); +} +MAKE_1V_2V(rotate, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(rotate, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(rotate, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(rotate, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(rotate, s::ulonglong, s::ulonglong, s::ulonglong) +MAKE_1V_2V(rotate, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(rotate, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(rotate, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(rotate, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(rotate, s::longlong, s::longlong, s::longlong) + +// u_sub_sat +template T __u_sub_sat(T x, T y) { + return (y < (x - d::min_v())) ? (x - y) : d::min_v(); +} + +cl_uchar u_sub_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_ushort u_sub_sat(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_uint u_sub_sat(s::cl_uint x, s::cl_uint y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_ulong u_sub_sat(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __u_sub_sat(x, y); +} +s::ulonglong u_sub_sat(s::ulonglong x, s::ulonglong y) __NOEXC { + return __u_sub_sat(x, y); +} +MAKE_1V_2V(u_sub_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_sub_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_sub_sat, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_sub_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_sub_sat, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_sub_sat +template T __s_sub_sat(T x, T y) { + if (y > 0) + return (y < (x - d::min_v()) ? x - y : d::min_v()); + else if (y < 0) + return (y > (x - d::max_v()) ? x - y : d::max_v()); + else + return x; +} + +cl_char s_sub_sat(s::cl_char x, s::cl_char y) __NOEXC { + return __s_sub_sat(x, y); +} +cl_short s_sub_sat(s::cl_short x, s::cl_short y) __NOEXC { + return __s_sub_sat(x, y); +} +cl_int s_sub_sat(s::cl_int x, s::cl_int y) __NOEXC { return __s_sub_sat(x, y); } +cl_long s_sub_sat(s::cl_long x, s::cl_long y) __NOEXC { + return __s_sub_sat(x, y); +} +s::longlong s_sub_sat(s::longlong x, s::longlong y) __NOEXC { + return __s_sub_sat(x, y); +} +MAKE_1V_2V(s_sub_sat, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_sub_sat, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_sub_sat, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_sub_sat, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_sub_sat, s::longlong, s::longlong, s::longlong) + +// u_upsample +template +typename d::make_upper::type __upsample(T1 hi, T2 lo) { + using UT = typename d::make_upper::type; + return (UT(hi) << (sizeof(T1) * 8)) | lo; +} + +cl_ushort u_upsample(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __upsample(x, y); +} +cl_uint u_upsample(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __upsample(x, y); +} +cl_ulong u_upsample(s::cl_uint x, s::cl_uint y) __NOEXC { + return __upsample(x, y); +} +MAKE_1V_2V(u_upsample, s::cl_ushort, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_upsample, s::cl_uint, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_upsample, s::cl_ulong, s::cl_uint, s::cl_uint) + +// TODO delete when Intel CPU OpenCL runtime will be fixed +// OpExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) +#define s_upsample u_upsample + +cl_short s_upsample(s::cl_char x, s::cl_uchar y) __NOEXC { + return __upsample(x, y); +} +cl_int s_upsample(s::cl_short x, s::cl_ushort y) __NOEXC { + return __upsample(x, y); +} +cl_long s_upsample(s::cl_int x, s::cl_uint y) __NOEXC { + return __upsample(x, y); +} +MAKE_1V_2V(u_upsample, s::cl_short, s::cl_char, s::cl_uchar) +MAKE_1V_2V(u_upsample, s::cl_int, s::cl_short, s::cl_ushort) +MAKE_1V_2V(u_upsample, s::cl_long, s::cl_int, s::cl_uint) + +#undef s_upsample + +// popcount +template inline constexpr T __popcount_impl(T x, size_t n = 0) { + return (x == T(0)) ? n : __popcount_impl(x >> 1, ((x & T(1)) ? ++n : n)); +} + +template inline constexpr T __popcount(T x) { + using UT = typename d::make_unsigned::type; + return __popcount_impl(UT(x)); +} + +cl_uchar popcount(s::cl_uchar x) __NOEXC { return __popcount(x); } +cl_ushort popcount(s::cl_ushort x) __NOEXC { return __popcount(x); } +cl_uint popcount(s::cl_uint x) __NOEXC { return __popcount(x); } +cl_ulong popcount(s::cl_ulong x) __NOEXC { return __popcount(x); } +s::ulonglong popcount(s::ulonglong x) __NOEXC { return __popcount(x); } +MAKE_1V(popcount, s::cl_uchar, s::cl_uchar) +MAKE_1V(popcount, s::cl_ushort, s::cl_ushort) +MAKE_1V(popcount, s::cl_uint, s::cl_uint) +MAKE_1V(popcount, s::cl_ulong, s::cl_ulong) +MAKE_1V(popcount, s::ulonglong, s::ulonglong) + +cl_char popcount(s::cl_char x) __NOEXC { return __popcount(x); } +cl_short popcount(s::cl_short x) __NOEXC { return __popcount(x); } +cl_int popcount(s::cl_int x) __NOEXC { return __popcount(x); } +cl_long popcount(s::cl_long x) __NOEXC { return __popcount(x); } +s::longlong popcount(s::longlong x) __NOEXC { return __popcount(x); } +MAKE_1V(popcount, s::cl_char, s::cl_char) +MAKE_1V(popcount, s::cl_short, s::cl_short) +MAKE_1V(popcount, s::cl_int, s::cl_int) +MAKE_1V(popcount, s::cl_long, s::cl_long) +MAKE_1V(popcount, s::longlong, s::longlong) + +// u_mad24 +template T __mad24(T x, T y, T z) { return (x * y) + z; } + +cl_uint u_mad24(s::cl_uint x, s::cl_uint y, s::cl_uint z) __NOEXC { + return __mad24(x, y, z); +} +MAKE_1V_2V_3V(u_mad24, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) + +// s_mad24 +cl_int s_mad24(s::cl_int x, s::cl_int y, s::cl_int z) __NOEXC { + return __mad24(x, y, z); +} + +MAKE_1V_2V_3V(s_mad24, s::cl_int, s::cl_int, s::cl_int, s::cl_int) + +// u_mul24 +template T __mul24(T x, T y) { return (x * y); } + +cl_uint u_mul24(s::cl_uint x, s::cl_uint y) __NOEXC { return __mul24(x, y); } + +MAKE_1V_2V(u_mul24, s::cl_uint, s::cl_uint, s::cl_uint) + +// s_mul24 +cl_int s_mul24(s::cl_int x, s::cl_int y) __NOEXC { return __mul24(x, y); } + +MAKE_1V_2V(s_mul24, s::cl_int, s::cl_int, s::cl_int) /* --------------- 4.13.5 Common functions. Host version --------------------*/ // fclamp @@ -2401,7 +3155,6 @@ MAKE_1V(half_tan, s::cl_float, s::cl_float) #undef MAKE_1V_2S #undef MAKE_SR_1V_AND #undef MAKE_SR_1V_OR -#undef MSB_MASK #undef MAKE_1V_2P #undef MAKE_GEO_1V_2V_RS #undef MAKE_1V_2V_3P diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 76f28ceb52080..18e6a0f93aec6 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -9,73 +9,404 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_int r{0}; + s::cl_int r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_int{5}, cl::sycl::cl_int{2}); - }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::max(s::cl_int{5}, s::cl_int{2}); }); }); } - std::cout << "r " << r << std::endl; assert(r == 5); } + // max { - cl::sycl::cl_uint r{0}; + s::cl_uint r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_uint{5}, cl::sycl::cl_uint{2}); - }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::max(s::cl_uint{5}, s::cl_uint{2}); }); }); } - std::cout << "r " << r << std::endl; assert(r == 5); } + // min { - cl::sycl::cl_int r{0}; + s::cl_int r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_int{5}, cl::sycl::cl_int{2}); - }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::min(s::cl_int{5}, s::cl_int{2}); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2); } + // min { - cl::sycl::cl_uint r{0}; + s::cl_uint r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_uint{5}, cl::sycl::cl_uint{2}); - }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::min(s::cl_uint{5}, s::cl_uint{2}); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2); } + // abs + { + s::cl_uint r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::abs(s::cl_int{-5}); }); + }); + } + assert(r == 5); + } + + // abs_diff + { + s::cl_uint r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::abs_diff(s::cl_int{-5}, s::cl_int{-1}); }); + }); + } + assert(r == 4); + } + + // add_sat + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::add_sat(s::cl_int{0x7FFFFFFF}, s::cl_int{100}); + }); + }); + } + assert(r == 0x7FFFFFFF); + } + + // hadd + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::hadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); + }); + }); + } + assert(r == 0x0000004F); + } + + // rhadd + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rhadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); + }); + }); + } + assert(r == 0x50); + } + + // clamp + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clamp(s::cl_int{5}, s::cl_int{10}, s::cl_int{30}); + }); + }); + } + assert(r == 10); + } + + // clz + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::clz(s::cl_int{0x0FFFFFFF}); }); + }); + } + assert(r == 4); + } + + // mad_hi + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}, + s::cl_int{0x00000001}); + }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. + }); + } + assert(r == 0x11); + } + + // mad_sat + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_sat(s::cl_int{0x10000000}, s::cl_int{0x00000100}, + s::cl_int{0x00000001}); + }); // 2^31 * 2^8 = 2^39 -> 0x80 00000000 -> reuslt is saturated in the + // product. + }); + } + assert(r == 0x7FFFFFFF); + } + + // mul_hi + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}); + }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. + }); + } + assert(r == 0x10); + } + + // rotate + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rotate(s::cl_int{0x11100000}, s::cl_int{12}); + }); + }); + } + assert(r == 0x00000111); + } + + // sub_sat + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::sub_sat(s::cl_int{10}, s::cl_int(0x80000000)); + }); // 10 - (-2^31(minimum value)) = saturates on Maximum value + }); + } + assert(r == 0x7FFFFFFF); + } + + // upsample - 1 + { + s::cl_ushort r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uchar{0x10}, s::cl_uchar{0x10}); + }); + }); + } + assert(r == 0x1010); + } + + // upsample - 2 + { + s::cl_short r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_char{0x10}, s::cl_uchar{0x10}); + }); + }); + } + assert(r == 0x1010); + } + + // upsample - 3 + { + s::cl_uint r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_ushort{0x0010}, s::cl_ushort{0x0010}); + }); + }); + } + assert(r == 0x00100010); + } + + // upsample - 4 + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_short{0x0010}, s::cl_ushort{0x0010}); + }); + }); + } + assert(r == 0x00100010); + } + + // upsample - 5 + { + s::cl_ulong r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uint{0x00000010}, s::cl_uint{0x00000010}); + }); + }); + } + assert(r == 0x0000001000000010); + } + + // upsample - 6 + { + s::cl_long r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_int{0x00000010}, s::cl_uint{0x00000010}); + }); + }); + } + assert(r == 0x0000001000000010); + } + + // popcount + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task( + [=]() { AccR[0] = s::popcount(s::cl_int{0x000000FF}); }); + }); + } + assert(r == 8); + } + + // mad24 + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{20}, s::cl_int{20}); + }); + }); + } + assert(r == 0); + } + + // mul24 + { + s::cl_int r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{20}); + }); + }); + } + assert(r == -20); + } + return 0; -} \ No newline at end of file +} diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index c6e4ee9b0247a..dabf8715640e3 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -1,7 +1,7 @@ // RUN: %clang -std=c++11 -fsycl %s -o %t.out -lstdc++ -lOpenCL -lsycl // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out #include @@ -9,174 +9,597 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int2{2, 7}); + AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int2{2, 7}); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 5); assert(r2 == 7); } // max { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint2{2, 7}); + AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 5); assert(r2 == 7); } // max { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int{2}); + AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int{2}); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 5); assert(r2 == 3); } // max { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint{2}); + AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint{2}); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 5); assert(r2 == 3); } // min { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int2{2, 7}); + AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int2{2, 7}); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // min { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint2{2, 7}); + AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // min { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int{2}); + AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int{2}); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 2); assert(r2 == 2); } // min { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{0}; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint{2}); + AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint{2}); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 2); assert(r2 == 2); } + // abs + { + s::cl_uint2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs(s::cl_int2{-5, -2}); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 5); + assert(r2 == 2); + } + + // abs_diff + { + s::cl_uint2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs_diff(s::cl_int2{-5, -2}, s::cl_int2{-1, -1}); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 4); + assert(r2 == 1); + } + + // add_sat + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::add_sat(s::cl_int2{0x7FFFFFFF, 0x7FFFFFFF}, + s::cl_int2{100, 90}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // hadd + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::hadd(s::cl_int2{0x0000007F, 0x0000007F}, + s::cl_int2{0x00000020, 0x00000020}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x0000004F); + assert(r2 == 0x0000004F); + } + + // rhadd + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rhadd(s::cl_int2{0x0000007F, 0x0000007F}, + s::cl_int2{0x00000020, 0x00000020}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00000050); + assert(r2 == 0x00000050); + } + + // clamp - 1 + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int2{10, 10}, + s::cl_int2{30, 30}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 10); + assert(r2 == 10); + } + + // clamp - 2 + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int{10}, s::cl_int{30}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 10); + assert(r2 == 10); + } + + // clz + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clz(s::cl_int2{0x0FFFFFFF, 0x0FFFFFFF}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 4); + assert(r2 == 4); + } + + // mad_hi + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::mad_hi(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x11); + assert(r2 == 0x11); + } + + // mad_sat + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::mad_sat(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // mul_hi + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul_hi(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x10); + assert(r2 == 0x10); + } + + // rotate + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::rotate(s::cl_int2{0x11100000, 0x11100000}, s::cl_int2{12, 12}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00000111); + assert(r2 == 0x00000111); + } + + // sub_sat + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::sub_sat(s::cl_int2{10, 10}, + s::cl_int2{int(0x80000000), int(0x80000000)}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // upsample - 1 + { + s::cl_ushort2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::upsample(s::cl_uchar2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); + }); + }); + } + s::cl_ushort r1 = r.x(); + s::cl_ushort r2 = r.y(); + assert(r1 == 0x1010); + assert(r2 == 0x1010); + } + + // upsample - 2 + { + s::cl_short2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::upsample(s::cl_char2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); + }); + }); + } + s::cl_short r1 = r.x(); + s::cl_short r2 = r.y(); + assert(r1 == 0x1010); + assert(r2 == 0x1010); + } + + // upsample - 3 + { + s::cl_uint2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_ushort2{0x0010, 0x0010}, + s::cl_ushort2{0x0010, 0x0010}); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 0x00100010); + assert(r2 == 0x00100010); + } + + // upsample - 4 + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_short2{0x0010, 0x0010}, + s::cl_ushort2{0x0010, 0x0010}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00100010); + assert(r2 == 0x00100010); + } + + // upsample - 5 + { + s::cl_ulong2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uint2{0x00000010, 0x00000010}, + s::cl_uint2{0x00000010, 0x00000010}); + }); + }); + } + s::cl_ulong r1 = r.x(); + s::cl_ulong r2 = r.y(); + assert(r1 == 0x0000001000000010); + assert(r2 == 0x0000001000000010); + } + + // upsample - 6 + { + s::cl_long2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_int2{0x00000010, 0x00000010}, + s::cl_uint2{0x00000010, 0x00000010}); + }); + }); + } + s::cl_long r1 = r.x(); + s::cl_long r2 = r.y(); + assert(r1 == 0x0000001000000010); + assert(r2 == 0x0000001000000010); + } + + // popcount + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::popcount(s::cl_int2{0x000000FF, 0x000000FF}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 8); + assert(r2 == 8); + } + + // mad24 + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, + s::cl_int2{20, 20}, s::cl_int2{20, 20}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0); + assert(r2 == 0); + } + + // mul24 + { + s::cl_int2 r{0}; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myqueue; + myqueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::mul24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, s::cl_int2{20, 20}); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == -20); + assert(r2 == -20); + } + return 0; } From 7bf66469b4b88a8b09ab13b4599de30d615fc816 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Fri, 5 Apr 2019 16:10:20 +0300 Subject: [PATCH 07/11] [SYCL][NFS] Apply clang format Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 90 +-- sycl/include/CL/sycl/detail/builtins.hpp | 15 +- .../CL/sycl/detail/generic_type_traits.hpp | 736 ++++++++++++------ sycl/source/detail/builtins.cpp | 161 ++-- sycl/test/built-ins/scalar_integer.cpp | 130 ++-- sycl/test/built-ins/vector_integer.cpp | 159 ++-- 6 files changed, 769 insertions(+), 522 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index f200460ec23dc..f30258484ef58 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -260,7 +260,7 @@ fmod(T x, T y) __NOEXC { // genfloat fract (genfloat x, genfloatptr iptr) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type fract(T x, T2 iptr) __NOEXC { return __sycl_std::__invoke_fract(x, iptr); } @@ -268,7 +268,7 @@ fract(T x, T2 iptr) __NOEXC { // genfloat frexp (genfloat x, genintptr exp) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genintptr::value, T>::type + detail::is_genfloat::value &&detail::is_genintptr::value, T>::type frexp(T x, T2 exp) __NOEXC { return __sycl_std::__invoke_frexp(x, exp); } @@ -308,7 +308,7 @@ ldexp(T x, int k) __NOEXC { // vgenfloat ldexp (vgenfloat x, genint k) template typename std::enable_if< - detail::is_vgenfloat::value && detail::is_intn::value, T>::type + detail::is_vgenfloat::value &&detail::is_intn::value, T>::type ldexp(T x, T2 k) __NOEXC { return __sycl_std::__invoke_ldexp(x, k); } @@ -323,7 +323,7 @@ lgamma(T x) __NOEXC { // genfloat lgamma_r (genfloat x, genintptr signp) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genintptr::value, T>::type + detail::is_genfloat::value &&detail::is_genintptr::value, T>::type lgamma_r(T x, T2 signp) __NOEXC { return __sycl_std::__invoke_lgamma_r(x, signp); } @@ -387,7 +387,7 @@ minmag(T x, T y) __NOEXC { // genfloat modf (genfloat x, genfloatptr iptr) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type modf(T x, T2 iptr) __NOEXC { return __sycl_std::__invoke_modf(x, iptr); } @@ -420,7 +420,7 @@ pow(T x, T y) __NOEXC { // genfloat pown (genfloat x, genint y) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genint::value, T>::type + detail::is_genfloat::value &&detail::is_genint::value, T>::type pown(T x, T2 y) __NOEXC { return __sycl_std::__invoke_pown(x, y); } @@ -442,7 +442,7 @@ remainder(T x, T y) __NOEXC { // genfloat remquo (genfloat x, genfloat y, genintptr quo) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genintptr::value, T>::type + detail::is_genfloat::value &&detail::is_genintptr::value, T>::type remquo(T x, T y, T2 quo) __NOEXC { return __sycl_std::__invoke_remquo(x, y, quo); } @@ -457,7 +457,7 @@ rint(T x) __NOEXC { // genfloat rootn (genfloat x, genint y) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genint::value, T>::type + detail::is_genfloat::value &&detail::is_genint::value, T>::type rootn(T x, T2 y) __NOEXC { return __sycl_std::__invoke_rootn(x, y); } @@ -486,7 +486,7 @@ sin(T x) __NOEXC { // genfloat sincos (genfloat x, genfloatptr cosval) template typename std::enable_if< - detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type sincos(T x, T2 cosval) __NOEXC { return __sycl_std::__invoke_sincos(x, cosval); } @@ -1171,7 +1171,7 @@ template ::value, T>::type> detail::common_rel_ret_t isequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdEqual>(x, y)); + __sycl_std::__invoke_OpFOrdEqual >(x, y)); } // int isnotequal (half x, half y) @@ -1183,7 +1183,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFUnordNotEqual>(x, y)); + __sycl_std::__invoke_OpFUnordNotEqual >(x, y)); } // int isgreater (half x, half y) @@ -1195,7 +1195,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThan>(x, y)); + __sycl_std::__invoke_OpFOrdGreaterThan >(x, y)); } // int isgreaterequal (half x, half y) @@ -1207,7 +1207,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThanEqual>(x, y)); + __sycl_std::__invoke_OpFOrdGreaterThanEqual >(x, y)); } // int isless (half x, half y) @@ -1219,7 +1219,7 @@ template ::value, T>::type> detail::common_rel_ret_t isless(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThan>(x, y)); + __sycl_std::__invoke_OpFOrdLessThan >(x, y)); } // int islessequal (half x, half y) @@ -1231,7 +1231,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThanEqual>(x, y)); + __sycl_std::__invoke_OpFOrdLessThanEqual >(x, y)); } // int islessgreater (half x, half y) @@ -1243,7 +1243,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpLessOrGreater>(x, y)); + __sycl_std::__invoke_OpLessOrGreater >(x, y)); } // int isfinite (half x) @@ -1255,7 +1255,7 @@ template ::value, T>::type> detail::common_rel_ret_t isfinite(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsFinite>(x)); + __sycl_std::__invoke_OpIsFinite >(x)); } // int isinf (half x) @@ -1267,7 +1267,7 @@ template ::value, T>::type> detail::common_rel_ret_t isinf(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsInf>(x)); + __sycl_std::__invoke_OpIsInf >(x)); } // int isnan (half x) @@ -1279,7 +1279,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnan(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNan>(x)); + __sycl_std::__invoke_OpIsNan >(x)); } // int isnormal (half x) @@ -1291,7 +1291,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnormal(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNormal>(x)); + __sycl_std::__invoke_OpIsNormal >(x)); } // int isordered (half x) @@ -1303,7 +1303,7 @@ template ::value, T>::type> detail::common_rel_ret_t isordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpOrdered>(x, y)); + __sycl_std::__invoke_OpOrdered >(x, y)); } // int isunordered (half x, half y) @@ -1315,7 +1315,7 @@ template ::value, T>::type> detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpUnordered>(x, y)); + __sycl_std::__invoke_OpUnordered >(x, y)); } // int signbit (half x) @@ -1327,7 +1327,7 @@ template ::value, T>::type> detail::common_rel_ret_t signbit(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpSignBitSet>(x)); + __sycl_std::__invoke_OpSignBitSet >(x)); } // int any (sigeninteger x) @@ -1344,7 +1344,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type any(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAny>( + __sycl_std::__invoke_OpAny >( detail::rel_sign_bit_test_arg_t(x))); } @@ -1362,7 +1362,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type all(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAll>( + __sycl_std::__invoke_OpAll >( detail::rel_sign_bit_test_arg_t(x))); } @@ -1375,18 +1375,18 @@ bitselect(T a, T b, T c) __NOEXC { // geninteger select (geninteger a, geninteger b, igeninteger c) template -typename std::enable_if::value && - detail::is_igeninteger::value, - T>::type +typename std::enable_if< + detail::is_geninteger::value &&detail::is_igeninteger::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // geninteger select (geninteger a, geninteger b, ugeninteger c) template -typename std::enable_if::value && - detail::is_ugeninteger::value, - T>::type +typename std::enable_if< + detail::is_geninteger::value &&detail::is_ugeninteger::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } @@ -1394,7 +1394,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, genint c) template typename std::enable_if< - detail::is_genfloatf::value && detail::is_genint::value, T>::type + detail::is_genfloatf::value &&detail::is_genint::value, T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } @@ -1402,25 +1402,25 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, ugenint c) template typename std::enable_if< - detail::is_genfloatf::value && detail::is_ugenint::value, T>::type + detail::is_genfloatf::value &&detail::is_ugenint::value, T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) template -typename std::enable_if::value && - detail::is_igeninteger64bit::value, - T>::type +typename std::enable_if< + detail::is_genfloatd::value &&detail::is_igeninteger64bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) template -typename std::enable_if::value && - detail::is_ugeninteger64bit::value, - T>::type +typename std::enable_if< + detail::is_genfloatd::value &&detail::is_ugeninteger64bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } @@ -1428,18 +1428,18 @@ select(T a, T b, T2 c) __NOEXC { #ifndef __HALF_NO_ENABLED // genfloath select (genfloath a, genfloath b, igeninteger16 c) template -typename std::enable_if::value && - detail::is_igeninteger16bit::value, - T>::type +typename std::enable_if< + detail::is_genfloath::value &&detail::is_igeninteger16bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloath select (genfloath a, genfloath b, ugeninteger16 c) template -typename std::enable_if::value && - detail::is_ugeninteger16bit::value, - T>::type +typename std::enable_if< + detail::is_genfloath::value &&detail::is_ugeninteger16bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index bbe99b2927ae3..763cccd8fe4d8 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -21,9 +21,8 @@ #define MAKE_CALL_ARG1(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ extern Ret call(Arg1); \ @@ -32,9 +31,8 @@ #define MAKE_CALL_ARG2(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1, T2 t2) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1, T2 t2) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ @@ -45,9 +43,8 @@ #define MAKE_CALL_ARG3(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1, T2 t2, T3 t3) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1, T2 t2, T3 t3) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 3ec8e1c4ea21b..2e25e50133ded 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -35,44 +35,43 @@ struct is_contained : std::conditional::type, typename TL::head>::value, std::true_type, - is_contained>::type {}; + is_contained >::type {}; -template struct is_contained> : std::false_type {}; +template struct is_contained > : std::false_type {}; // floatn: float2, float3, float4, float8, float16 template -using is_floatn = typename is_contained< - T, type_list>::type; +using is_floatn = + typename is_contained >::type; // genfloatf: float, floatn template -using is_genfloatf = - std::integral_constant>::value || - is_floatn::value>; +using is_genfloatf = std::integral_constant< + bool, is_contained >::value || is_floatn::value>; // doublen: double2, double3, double4, double8, double16 template using is_doublen = typename is_contained>::type; + cl_double8, cl_double16> >::type; // genfloatd: double, doublen template -using is_genfloatd = - std::integral_constant>::value || - is_doublen::value>; +using is_genfloatd = std::integral_constant< + bool, + is_contained >::value || is_doublen::value>; #ifndef __HALF_NO_ENABLED // halfn: half2, half3, half4, half8, half16 template using is_halfn = typename is_contained< - T, type_list>::type; + T, type_list >::type; // genfloath: half, halfn template -using is_genfloath = - std::integral_constant>::value || - is_halfn::value>; +using is_genfloath = std::integral_constant< + bool, is_contained >::value || is_halfn::value>; #endif // genfloat: genfloatf, genfloatd, genfloath @@ -91,7 +90,7 @@ using is_sgenfloat = typename is_contained>::type; + > >::type; // vgenfloat: floatn, doublen, halfn template @@ -105,36 +104,35 @@ using is_vgenfloat = // gengeofloat: float, float2, float3, float4 template using is_gengeofloat = typename is_contained< - T, type_list>::type; + T, type_list >::type; // gengeodouble: double, double2, double3, double4 template using is_gengeodouble = typename is_contained< - T, type_list>::type; + T, type_list >::type; #ifndef __HALF_NO_ENABLED // gengeohalf: half, half2, half3, half4 template using is_gengeohalf = typename is_contained< - T, type_list>::type; + T, type_list >::type; #endif // gengeofloat: float, float2, float3, float4 template using is_vgengeofloat = - typename is_contained>::type; + typename is_contained >::type; // gengeodouble: double, double2, double3, double4 template -using is_vgengeodouble = - typename is_contained>::type; +using is_vgengeodouble = typename is_contained< + T, type_list >::type; #ifndef __HALF_NO_ENABLED // gengeohalf: half2, half3, half4 template using is_vgengeohalf = - typename is_contained>::type; + typename is_contained >::type; #endif // sgengeo: float, double, half @@ -145,7 +143,7 @@ using is_sgengeo = , cl_half #endif - >>::value>; + > >::value>; // vgengeo: vgengeofloat, vgengeodouble, vgengeohalf template @@ -159,164 +157,159 @@ using is_vgengeo = std::integral_constant::value || // gencrossfloat: float3, float4 template using is_gencrossfloat = - typename is_contained>::type; + typename is_contained >::type; // gencrossdouble: double3, double4 template using is_gencrossdouble = - typename is_contained>::type; + typename is_contained >::type; #ifndef __HALF_NO_ENABLED // gencrosshalf: half3, half4 template using is_gencrosshalf = - typename is_contained>::type; + typename is_contained >::type; #endif // gencross: gencrossfloat, gencrossdouble, gencrosshalf template -using is_gencross = - std::integral_constant::value || - is_gencrossdouble::value +using is_gencross = std::integral_constant< + bool, is_gencrossfloat::value || is_gencrossdouble::value #ifndef __HALF_NO_ENABLED - || is_gencrosshalf::value + || is_gencrosshalf::value #endif - >; + >; // charn: char2, char3, char4, char8, char16 template using is_charn = typename is_contained< - T, type_list>::type; + T, type_list >::type; // scharn: schar2, schar3, schar4, schar8, schar16 template -using is_scharn = typename is_contained< - T, type_list>::type; +using is_scharn = + typename is_contained >::type; // ucharn: uchar2, uchar3, uchar4, uchar8, uchar16 template -using is_ucharn = typename is_contained< - T, type_list>::type; +using is_ucharn = + typename is_contained >::type; // igenchar: signed char, scharn template -using is_igenchar = - std::integral_constant>::value || - is_scharn::value>; +using is_igenchar = std::integral_constant< + bool, is_contained >::value || is_scharn::value>; // ugenchar: unsigned char, ucharn template -using is_ugenchar = - std::integral_constant>::value || - is_ucharn::value>; +using is_ugenchar = std::integral_constant< + bool, is_contained >::value || is_ucharn::value>; // genchar: char, charn, igenchar, ugenchar template using is_genchar = std::integral_constant< - bool, is_contained>::value || is_charn::value || + bool, is_contained >::value || is_charn::value || is_igenchar::value || is_ugenchar::value>; // shortn: short2, short3, short4, short8, short16 template -using is_shortn = typename is_contained< - T, type_list>::type; +using is_shortn = + typename is_contained >::type; // genshort: short, shortn template -using is_genshort = - std::integral_constant>::value || - is_shortn::value>; +using is_genshort = std::integral_constant< + bool, is_contained >::value || is_shortn::value>; // ushortn: ushort2, ushort3, ushort4, ushort8, ushort16 template using is_ushortn = typename is_contained>::type; + cl_ushort8, cl_ushort16> >::type; // genushort: ushort, ushortn template -using is_ugenshort = - std::integral_constant>::value || - is_ushortn::value>; +using is_ugenshort = std::integral_constant< + bool, + is_contained >::value || is_ushortn::value>; // uintn: uint2, uint3, uint4, uint8, uint16 template using is_uintn = typename is_contained< - T, type_list>::type; + T, type_list >::type; // ugenint: unsigned int, uintn template -using is_ugenint = - std::integral_constant>::value || - is_uintn::value>; +using is_ugenint = std::integral_constant< + bool, is_contained >::value || is_uintn::value>; // intn: int2, int3, int4, int8, int16 template using is_intn = typename is_contained< - T, type_list>::type; + T, type_list >::type; // genint: int, intn template -using is_genint = - std::integral_constant>::value || - is_intn::value>; +using is_genint = std::integral_constant< + bool, is_contained >::value || is_intn::value>; // ulongn: ulong2, ulong3, ulong4, ulong8,ulong16 template -using is_ulongn = typename is_contained< - T, type_list>::type; +using is_ulongn = + typename is_contained >::type; // ugenlong: unsigned long int, ulongn template -using is_ugenlong = - std::integral_constant>::value || - is_ulongn::value>; +using is_ugenlong = std::integral_constant< + bool, is_contained >::value || is_ulongn::value>; // longn: long2, long3, long4, long8, long16 template using is_longn = typename is_contained< - T, type_list>::type; + T, type_list >::type; // genlong: long int, longn template -using is_genlong = - std::integral_constant>::value || - is_longn::value>; +using is_genlong = std::integral_constant< + bool, is_contained >::value || is_longn::value>; // ulonglongn: ulonglong2, ulonglong3, ulonglong4,ulonglong8, ulonglong16 template using is_ulonglongn = typename is_contained>::type; + ulonglong8, ulonglong16> >::type; // ugenlonglong: unsigned long long int, ulonglongn template -using is_ugenlonglong = - std::integral_constant>::value || - is_ulonglongn::value>; +using is_ugenlonglong = std::integral_constant< + bool, + is_contained >::value || is_ulonglongn::value>; // longlongn: longlong2, longlong3, longlong4,longlong8, longlong16 template -using is_longlongn = typename is_contained< - T, type_list>::type; +using is_longlongn = + typename is_contained >::type; // genlonglong: long long int, longlongn template using is_genlonglong = - std::integral_constant>::value || + std::integral_constant >::value || is_longlongn::value>; // igenlonginteger: genlong, genlonglong template -using is_igenlonginteger = - std::integral_constant::value || is_genlonglong::value>; +using is_igenlonginteger = std::integral_constant< + bool, is_genlong::value || is_genlonglong::value>; // ugenlonginteger ugenlong, ugenlonglong template -using is_ugenlonginteger = - std::integral_constant::value || is_ugenlonglong::value>; +using is_ugenlonginteger = std::integral_constant< + bool, is_ugenlong::value || is_ugenlonglong::value>; // geninteger: genchar, genshort, ugenshort, genint, ugenint, igenlonginteger, // ugenlonginteger @@ -345,7 +338,7 @@ using is_ugeninteger = std::integral_constant< template using is_sgeninteger = typename is_contained< T, type_list>::type; + cl_uint, cl_long, cl_ulong, longlong, ulonglong> >::type; // vgeninteger: charn, scharn, ucharn, shortn, ushortn, intn, uintn, longn, // ulongn, longlongn, ulonglongn @@ -359,14 +352,15 @@ using is_vgeninteger = std::integral_constant< // sigeninteger: char, signed char, short, int, long int, , long long int template -using is_sigeninteger = typename is_contained< - T, type_list>::type; +using is_sigeninteger = + typename is_contained >::type; // sugeninteger: unsigned char, unsigned short, unsigned int, unsigned long // int, unsigned long long int template using is_sugeninteger = typename is_contained< - T, type_list>::type; + T, type_list >::type; // vigeninteger: charn, scharn, shortn, intn, longn, longlongn template @@ -394,8 +388,8 @@ template class TryToGetElementType; // size, where N = 8, 16, 32, 64 template using is_igenintegerNbit = typename std::integral_constant< - bool, is_igeninteger::value && - (sizeof(typename TryToGetElementType::type) == N)>; + bool, is_igeninteger::value &&( + sizeof(typename TryToGetElementType::type) == N)>; // igeninteger8bit All types within igeninteger whose base type are 8 bits in // size @@ -417,8 +411,8 @@ template using is_igeninteger64bit = is_igenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_ugenintegerNbit = typename std::integral_constant< - bool, is_ugeninteger::value && - (sizeof(typename TryToGetElementType::type) == N)>; + bool, is_ugeninteger::value &&( + sizeof(typename TryToGetElementType::type) == N)>; // ugeninteger8bit All types within ugeninteger whose base type are 8 bits in // size @@ -440,8 +434,8 @@ template using is_ugeninteger64bit = is_ugenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_genintegerNbit = typename std::integral_constant< - bool, is_geninteger::value && - (sizeof(typename TryToGetElementType::type) == N)>; + bool, is_geninteger::value &&( + sizeof(typename TryToGetElementType::type) == N)>; // geninteger8bit All types within geninteger whose base type are 8 bits in size template using is_geninteger8bit = is_genintegerNbit; @@ -460,9 +454,9 @@ template using is_geninteger64bit = is_genintegerNbit; template using is_MultiPtrOfGLR = - std::integral_constant>::value || - std::is_same>::value || - std::is_same>::value>; + std::integral_constant >::value || + std::is_same >::value || + std::is_same >::value>; // genintptr All permutations of multi_ptr where dataT is // all types within genint and addressSpace is @@ -653,161 +647,417 @@ template <> struct float_point_to_sign_integral { // Used for ilogb built-in template struct float_point_to_int; -template <> struct float_point_to_int { using type = cl_int; }; -template <> struct float_point_to_int { using type = cl_int2; }; -template <> struct float_point_to_int { using type = cl_int3; }; -template <> struct float_point_to_int { using type = cl_int4; }; -template <> struct float_point_to_int { using type = cl_int8; }; -template <> struct float_point_to_int { using type = cl_int16; }; +template <> struct float_point_to_int { + using type = cl_int; +}; +template <> struct float_point_to_int { + using type = cl_int2; +}; +template <> struct float_point_to_int { + using type = cl_int3; +}; +template <> struct float_point_to_int { + using type = cl_int4; +}; +template <> struct float_point_to_int { + using type = cl_int8; +}; +template <> struct float_point_to_int { + using type = cl_int16; +}; #ifndef __HALF_NO_ENABLED -template <> struct float_point_to_int { using type = cl_int; }; -template <> struct float_point_to_int { using type = cl_int2; }; -template <> struct float_point_to_int { using type = cl_int3; }; -template <> struct float_point_to_int { using type = cl_int4; }; -template <> struct float_point_to_int { using type = cl_int8; }; -template <> struct float_point_to_int { using type = cl_int16; }; +template <> struct float_point_to_int { + using type = cl_int; +}; +template <> struct float_point_to_int { + using type = cl_int2; +}; +template <> struct float_point_to_int { + using type = cl_int3; +}; +template <> struct float_point_to_int { + using type = cl_int4; +}; +template <> struct float_point_to_int { + using type = cl_int8; +}; +template <> struct float_point_to_int { + using type = cl_int16; +}; #endif -template <> struct float_point_to_int { using type = cl_int; }; -template <> struct float_point_to_int { using type = cl_int2; }; -template <> struct float_point_to_int { using type = cl_int3; }; -template <> struct float_point_to_int { using type = cl_int4; }; -template <> struct float_point_to_int { using type = cl_int8; }; -template <> struct float_point_to_int { using type = cl_int16; }; +template <> struct float_point_to_int { + using type = cl_int; +}; +template <> struct float_point_to_int { + using type = cl_int2; +}; +template <> struct float_point_to_int { + using type = cl_int3; +}; +template <> struct float_point_to_int { + using type = cl_int4; +}; +template <> struct float_point_to_int { + using type = cl_int8; +}; +template <> struct float_point_to_int { + using type = cl_int16; +}; // Used for abs and abs_diff built-in -template struct make_unsigned { using type = T; }; - -template <> struct make_unsigned { using type = cl_uchar; }; -template <> struct make_unsigned { using type = cl_uchar2; }; -template <> struct make_unsigned { using type = cl_uchar3; }; -template <> struct make_unsigned { using type = cl_uchar4; }; -template <> struct make_unsigned { using type = cl_uchar8; }; -template <> struct make_unsigned { using type = cl_uchar16; }; - -template <> struct make_unsigned { using type = cl_ushort; }; -template <> struct make_unsigned { using type = cl_ushort2; }; -template <> struct make_unsigned { using type = cl_ushort3; }; -template <> struct make_unsigned { using type = cl_ushort4; }; -template <> struct make_unsigned { using type = cl_ushort8; }; -template <> struct make_unsigned { using type = cl_ushort16; }; - -template <> struct make_unsigned { using type = cl_uint; }; -template <> struct make_unsigned { using type = cl_uint2; }; -template <> struct make_unsigned { using type = cl_uint3; }; -template <> struct make_unsigned { using type = cl_uint4; }; -template <> struct make_unsigned { using type = cl_uint8; }; -template <> struct make_unsigned { using type = cl_uint16; }; - -template <> struct make_unsigned { using type = cl_ulong; }; -template <> struct make_unsigned { using type = cl_ulong2; }; -template <> struct make_unsigned { using type = cl_ulong3; }; -template <> struct make_unsigned { using type = cl_ulong4; }; -template <> struct make_unsigned { using type = cl_ulong8; }; -template <> struct make_unsigned { using type = cl_ulong16; }; - -template <> struct make_unsigned { using type = ulonglong; }; -template <> struct make_unsigned { using type = ulonglong2; }; -template <> struct make_unsigned { using type = ulonglong3; }; -template <> struct make_unsigned { using type = ulonglong4; }; -template <> struct make_unsigned { using type = ulonglong8; }; -template <> struct make_unsigned { using type = ulonglong16; }; - -template struct make_signed { using type = T; }; - -template <> struct make_signed { using type = cl_char; }; -template <> struct make_signed { using type = cl_char2; }; -template <> struct make_signed { using type = cl_char3; }; -template <> struct make_signed { using type = cl_char4; }; -template <> struct make_signed { using type = cl_char8; }; -template <> struct make_signed { using type = cl_char16; }; - -template <> struct make_signed { using type = cl_short; }; -template <> struct make_signed { using type = cl_short2; }; -template <> struct make_signed { using type = cl_short3; }; -template <> struct make_signed { using type = cl_short4; }; -template <> struct make_signed { using type = cl_short8; }; -template <> struct make_signed { using type = cl_short16; }; - -template <> struct make_signed { using type = cl_int; }; -template <> struct make_signed { using type = cl_int2; }; -template <> struct make_signed { using type = cl_int3; }; -template <> struct make_signed { using type = cl_int4; }; -template <> struct make_signed { using type = cl_int8; }; -template <> struct make_signed { using type = cl_int16; }; - -template <> struct make_signed { using type = cl_long; }; -template <> struct make_signed { using type = cl_long2; }; -template <> struct make_signed { using type = cl_long3; }; -template <> struct make_signed { using type = cl_long4; }; -template <> struct make_signed { using type = cl_long8; }; -template <> struct make_signed { using type = cl_long16; }; - -template <> struct make_signed { using type = longlong; }; -template <> struct make_signed { using type = longlong2; }; -template <> struct make_signed { using type = longlong3; }; -template <> struct make_signed { using type = longlong4; }; -template <> struct make_signed { using type = longlong8; }; -template <> struct make_signed { using type = longlong16; }; +template struct make_unsigned { + using type = T; +}; + +template <> struct make_unsigned { + using type = cl_uchar; +}; +template <> struct make_unsigned { + using type = cl_uchar2; +}; +template <> struct make_unsigned { + using type = cl_uchar3; +}; +template <> struct make_unsigned { + using type = cl_uchar4; +}; +template <> struct make_unsigned { + using type = cl_uchar8; +}; +template <> struct make_unsigned { + using type = cl_uchar16; +}; + +template <> struct make_unsigned { + using type = cl_ushort; +}; +template <> struct make_unsigned { + using type = cl_ushort2; +}; +template <> struct make_unsigned { + using type = cl_ushort3; +}; +template <> struct make_unsigned { + using type = cl_ushort4; +}; +template <> struct make_unsigned { + using type = cl_ushort8; +}; +template <> struct make_unsigned { + using type = cl_ushort16; +}; + +template <> struct make_unsigned { + using type = cl_uint; +}; +template <> struct make_unsigned { + using type = cl_uint2; +}; +template <> struct make_unsigned { + using type = cl_uint3; +}; +template <> struct make_unsigned { + using type = cl_uint4; +}; +template <> struct make_unsigned { + using type = cl_uint8; +}; +template <> struct make_unsigned { + using type = cl_uint16; +}; + +template <> struct make_unsigned { + using type = cl_ulong; +}; +template <> struct make_unsigned { + using type = cl_ulong2; +}; +template <> struct make_unsigned { + using type = cl_ulong3; +}; +template <> struct make_unsigned { + using type = cl_ulong4; +}; +template <> struct make_unsigned { + using type = cl_ulong8; +}; +template <> struct make_unsigned { + using type = cl_ulong16; +}; + +template <> struct make_unsigned { + using type = ulonglong; +}; +template <> struct make_unsigned { + using type = ulonglong2; +}; +template <> struct make_unsigned { + using type = ulonglong3; +}; +template <> struct make_unsigned { + using type = ulonglong4; +}; +template <> struct make_unsigned { + using type = ulonglong8; +}; +template <> struct make_unsigned { + using type = ulonglong16; +}; + +template struct make_signed { + using type = T; +}; + +template <> struct make_signed { + using type = cl_char; +}; +template <> struct make_signed { + using type = cl_char2; +}; +template <> struct make_signed { + using type = cl_char3; +}; +template <> struct make_signed { + using type = cl_char4; +}; +template <> struct make_signed { + using type = cl_char8; +}; +template <> struct make_signed { + using type = cl_char16; +}; + +template <> struct make_signed { + using type = cl_short; +}; +template <> struct make_signed { + using type = cl_short2; +}; +template <> struct make_signed { + using type = cl_short3; +}; +template <> struct make_signed { + using type = cl_short4; +}; +template <> struct make_signed { + using type = cl_short8; +}; +template <> struct make_signed { + using type = cl_short16; +}; + +template <> struct make_signed { + using type = cl_int; +}; +template <> struct make_signed { + using type = cl_int2; +}; +template <> struct make_signed { + using type = cl_int3; +}; +template <> struct make_signed { + using type = cl_int4; +}; +template <> struct make_signed { + using type = cl_int8; +}; +template <> struct make_signed { + using type = cl_int16; +}; + +template <> struct make_signed { + using type = cl_long; +}; +template <> struct make_signed { + using type = cl_long2; +}; +template <> struct make_signed { + using type = cl_long3; +}; +template <> struct make_signed { + using type = cl_long4; +}; +template <> struct make_signed { + using type = cl_long8; +}; +template <> struct make_signed { + using type = cl_long16; +}; + +template <> struct make_signed { + using type = longlong; +}; +template <> struct make_signed { + using type = longlong2; +}; +template <> struct make_signed { + using type = longlong3; +}; +template <> struct make_signed { + using type = longlong4; +}; +template <> struct make_signed { + using type = longlong8; +}; +template <> struct make_signed { + using type = longlong16; +}; // Used for upsample built-in // Bases on Table 4.93: Scalar data type aliases supported by SYCL template struct make_upper; -template <> struct make_upper { using type = cl_short; }; -template <> struct make_upper { using type = cl_short2; }; -template <> struct make_upper { using type = cl_short3; }; -template <> struct make_upper { using type = cl_short4; }; -template <> struct make_upper { using type = cl_short8; }; -template <> struct make_upper { using type = cl_short16; }; - -template <> struct make_upper { using type = cl_ushort; }; -template <> struct make_upper { using type = cl_ushort2; }; -template <> struct make_upper { using type = cl_ushort3; }; -template <> struct make_upper { using type = cl_ushort4; }; -template <> struct make_upper { using type = cl_ushort8; }; -template <> struct make_upper { using type = cl_ushort16; }; - -template <> struct make_upper { using type = cl_int; }; -template <> struct make_upper { using type = cl_int2; }; -template <> struct make_upper { using type = cl_int3; }; -template <> struct make_upper { using type = cl_int4; }; -template <> struct make_upper { using type = cl_int8; }; -template <> struct make_upper { using type = cl_int16; }; - -template <> struct make_upper { using type = cl_uint; }; -template <> struct make_upper { using type = cl_uint2; }; -template <> struct make_upper { using type = cl_uint3; }; -template <> struct make_upper { using type = cl_uint4; }; -template <> struct make_upper { using type = cl_uint8; }; -template <> struct make_upper { using type = cl_uint16; }; - -template <> struct make_upper { using type = cl_long; }; -template <> struct make_upper { using type = cl_long2; }; -template <> struct make_upper { using type = cl_long3; }; -template <> struct make_upper { using type = cl_long4; }; -template <> struct make_upper { using type = cl_long8; }; -template <> struct make_upper { using type = cl_long16; }; - -template <> struct make_upper { using type = cl_ulong; }; -template <> struct make_upper { using type = cl_ulong2; }; -template <> struct make_upper { using type = cl_ulong3; }; -template <> struct make_upper { using type = cl_ulong4; }; -template <> struct make_upper { using type = cl_ulong8; }; -template <> struct make_upper { using type = cl_ulong16; }; - -template <> struct make_upper { using type = longlong; }; -template <> struct make_upper { using type = longlong2; }; -template <> struct make_upper { using type = longlong3; }; -template <> struct make_upper { using type = longlong4; }; -template <> struct make_upper { using type = longlong8; }; -template <> struct make_upper { using type = longlong16; }; - -template <> struct make_upper { using type = ulonglong; }; -template <> struct make_upper { using type = ulonglong2; }; -template <> struct make_upper { using type = ulonglong3; }; -template <> struct make_upper { using type = ulonglong4; }; -template <> struct make_upper { using type = ulonglong8; }; -template <> struct make_upper { using type = ulonglong16; }; +template <> struct make_upper { + using type = cl_short; +}; +template <> struct make_upper { + using type = cl_short2; +}; +template <> struct make_upper { + using type = cl_short3; +}; +template <> struct make_upper { + using type = cl_short4; +}; +template <> struct make_upper { + using type = cl_short8; +}; +template <> struct make_upper { + using type = cl_short16; +}; + +template <> struct make_upper { + using type = cl_ushort; +}; +template <> struct make_upper { + using type = cl_ushort2; +}; +template <> struct make_upper { + using type = cl_ushort3; +}; +template <> struct make_upper { + using type = cl_ushort4; +}; +template <> struct make_upper { + using type = cl_ushort8; +}; +template <> struct make_upper { + using type = cl_ushort16; +}; + +template <> struct make_upper { + using type = cl_int; +}; +template <> struct make_upper { + using type = cl_int2; +}; +template <> struct make_upper { + using type = cl_int3; +}; +template <> struct make_upper { + using type = cl_int4; +}; +template <> struct make_upper { + using type = cl_int8; +}; +template <> struct make_upper { + using type = cl_int16; +}; + +template <> struct make_upper { + using type = cl_uint; +}; +template <> struct make_upper { + using type = cl_uint2; +}; +template <> struct make_upper { + using type = cl_uint3; +}; +template <> struct make_upper { + using type = cl_uint4; +}; +template <> struct make_upper { + using type = cl_uint8; +}; +template <> struct make_upper { + using type = cl_uint16; +}; + +template <> struct make_upper { + using type = cl_long; +}; +template <> struct make_upper { + using type = cl_long2; +}; +template <> struct make_upper { + using type = cl_long3; +}; +template <> struct make_upper { + using type = cl_long4; +}; +template <> struct make_upper { + using type = cl_long8; +}; +template <> struct make_upper { + using type = cl_long16; +}; + +template <> struct make_upper { + using type = cl_ulong; +}; +template <> struct make_upper { + using type = cl_ulong2; +}; +template <> struct make_upper { + using type = cl_ulong3; +}; +template <> struct make_upper { + using type = cl_ulong4; +}; +template <> struct make_upper { + using type = cl_ulong8; +}; +template <> struct make_upper { + using type = cl_ulong16; +}; + +template <> struct make_upper { + using type = longlong; +}; +template <> struct make_upper { + using type = longlong2; +}; +template <> struct make_upper { + using type = longlong3; +}; +template <> struct make_upper { + using type = longlong4; +}; +template <> struct make_upper { + using type = longlong8; +}; +template <> struct make_upper { + using type = longlong16; +}; + +template <> struct make_upper { + using type = ulonglong; +}; +template <> struct make_upper { + using type = ulonglong2; +}; +template <> struct make_upper { + using type = ulonglong3; +}; +template <> struct make_upper { + using type = ulonglong4; +}; +template <> struct make_upper { + using type = ulonglong8; +}; +template <> struct make_upper { + using type = ulonglong16; +}; // Try to get pointer_t, otherwise T template class TryToGetPointerT { diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 7ac01580162c8..3da9ede8074b7 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -44,18 +44,17 @@ namespace d = s::detail; return r; \ } -#define __MAKE_1V_2V_RS(Fun, Call, N, Ret, Arg1, Arg2) \ - Ret Fun __NOEXC(Arg1##N x, Arg2##N y) { \ - Ret r = Ret(); \ - using base1_t = typename Arg1##N::element_type; \ - using base2_t = typename Arg2##N::element_type; \ - detail::helper().run_1v_2v_rs( \ - r, \ - [](Ret &r, base1_t x, base2_t y) { \ - return cl::__host_std::Call(r, x, y); \ - }, \ - x, y); \ - return r; \ +#define __MAKE_1V_2V_RS(Fun, Call, N, Ret, Arg1, Arg2) \ + Ret Fun __NOEXC(Arg1##N x, Arg2##N y) { \ + Ret r = Ret(); \ + using base1_t = typename Arg1##N::element_type; \ + using base2_t = typename Arg2##N::element_type; \ + detail::helper().run_1v_2v_rs(r, \ + [](Ret &r, base1_t x, base2_t y) { \ + return cl::__host_std::Call(r, x, y); \ + }, \ + x, y); \ + return r; \ } #define __MAKE_1V_RS(Fun, Call, N, Ret, Arg1) \ @@ -67,32 +66,30 @@ namespace d = s::detail; return r; \ } -#define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ - Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ - Ret##N r; \ - using base1_t = typename Arg1##N::element_type; \ - using base2_t = typename Arg2##N::element_type; \ - using base3_t = typename Arg3##N::element_type; \ - detail::helper().run_1v_2v_3v( \ - r, \ - [](base1_t x, base2_t y, base3_t z) { \ - return cl::__host_std::Call(x, y, z); \ - }, \ - x, y, z); \ - return r; \ +#define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + using base2_t = typename Arg2##N::element_type; \ + using base3_t = typename Arg3##N::element_type; \ + detail::helper().run_1v_2v_3v(r, \ + [](base1_t x, base2_t y, base3_t z) { \ + return cl::__host_std::Call(x, y, z); \ + }, \ + x, y, z); \ + return r; \ } -#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ - Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ - Ret##N r; \ - using base1_t = typename Arg1##N::element_type; \ - detail::helper().run_1v_2s_3s( \ - r, \ - [](base1_t x, Arg2 y, Arg3 z) { \ - return cl::__host_std::Fun(x, y, z); \ - }, \ - x, y, z); \ - return r; \ +#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_2s_3s(r, \ + [](base1_t x, Arg2 y, Arg3 z) { \ + return cl::__host_std::Fun(x, y, z); \ + }, \ + x, y, z); \ + return r; \ } #define __MAKE_1V_2S(Fun, N, Ret, Arg1, Arg2) \ @@ -140,10 +137,8 @@ namespace d = s::detail; using base2_t = typename Arg2##N::element_type; \ using base3_t = typename Arg3##N::element_type; \ detail::helper().run_1v_2v_3p( \ - r, \ - [](base1_t x, base2_t y, base3_t *z) { \ - return cl::__host_std::Fun(x, y, z); \ - }, \ + r, [](base1_t x, base2_t y, \ + base3_t *z) { return cl::__host_std::Fun(x, y, z); }, \ x, y, z); \ return r; \ } @@ -151,90 +146,85 @@ namespace d = s::detail; #define MAKE_1V(Fun, Ret, Arg1) MAKE_1V_FUNC(Fun, Fun, Ret, Arg1) #define MAKE_1V_FUNC(Fun, Call, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 2, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 3, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 4, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 8, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 16, Ret, Arg1) + __MAKE_1V(Fun, Call, 2, Ret, Arg1) __MAKE_1V(Fun, Call, 3, Ret, Arg1) \ + __MAKE_1V(Fun, Call, 4, Ret, Arg1) __MAKE_1V(Fun, Call, 8, Ret, Arg1) \ + __MAKE_1V(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2V(Fun, Ret, Arg1, Arg2) \ MAKE_1V_2V_FUNC(Fun, Fun, Ret, Arg1, Arg2) #define MAKE_1V_2V_FUNC(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 16, Ret, Arg1, Arg2) + __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 16, Ret, Arg1, Arg2) #define MAKE_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) #define MAKE_1V_2V_3V_FUNC(Fun, Call, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 16, Ret, Arg1, Arg2, Arg3) + __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 16, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ - MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_FSC_1V_2V_3V_FV(FunSc, FunV, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(FunSc, Ret, Arg1, Arg2, Arg3) \ - MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) + MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ Ret Fun __NOEXC(Arg1 x, Arg2 y, Arg3 z) { return (Ret)__##Fun(x, y, z); } #define MAKE_1V_2S(Fun, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) + __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) #define MAKE_1V_2S_3S(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2S_3S(Fun, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) - + __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) #define MAKE_SR_1V_AND(Fun, Ret, Arg1) \ __MAKE_SR_1V_AND(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 16, Ret, Arg1) + __MAKE_SR_1V_AND(Fun, Fun, 3, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Fun, 4, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Fun, 8, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Fun, 16, Ret, Arg1) #define MAKE_SR_1V_OR(Fun, Ret, Arg1) \ __MAKE_SR_1V_OR(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 16, Ret, Arg1) + __MAKE_SR_1V_OR(Fun, Fun, 3, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Fun, 4, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Fun, 8, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Fun, 16, Ret, Arg1) #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) + __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) #define MAKE_GEO_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) + __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) #define MAKE_1V_2V_3P(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3P(Fun, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 16, Ret, Arg1, Arg2, Arg3) + __MAKE_1V_2V_3P(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 16, Ret, Arg1, Arg2, Arg3) namespace cl { namespace __host_std { @@ -796,8 +786,7 @@ MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) #endif // fract -template -T __fract(T x, T* iptr) { +template T __fract(T x, T *iptr) { T f = std::floor(x); *(iptr) = f; return std::fmin(x - f, nextafter(T(1.0), T(0.0))); diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 18e6a0f93aec6..c5e1d3cdce5f7 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -14,14 +14,15 @@ namespace s = cl::sycl; int main() { // max { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::max(s::cl_int{5}, s::cl_int{2}); }); + cgh.single_task([=]() { + AccR[0] = s::max(s::cl_int{ 5 }, s::cl_int{ 2 }); + }); }); } assert(r == 5); @@ -29,14 +30,15 @@ int main() { // max { - s::cl_uint r{0}; + s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::max(s::cl_uint{5}, s::cl_uint{2}); }); + cgh.single_task([=]() { + AccR[0] = s::max(s::cl_uint{ 5 }, s::cl_uint{ 2 }); + }); }); } assert(r == 5); @@ -44,14 +46,15 @@ int main() { // min { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::min(s::cl_int{5}, s::cl_int{2}); }); + cgh.single_task([=]() { + AccR[0] = s::min(s::cl_int{ 5 }, s::cl_int{ 2 }); + }); }); } assert(r == 2); @@ -59,14 +62,15 @@ int main() { // min { - s::cl_uint r{0}; + s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::min(s::cl_uint{5}, s::cl_uint{2}); }); + cgh.single_task([=]() { + AccR[0] = s::min(s::cl_uint{ 5 }, s::cl_uint{ 2 }); + }); }); } assert(r == 2); @@ -74,14 +78,15 @@ int main() { // abs { - s::cl_uint r{0}; + s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::abs(s::cl_int{-5}); }); + cgh.single_task([=]() { + AccR[0] = s::abs(s::cl_int{ -5 }); + }); }); } assert(r == 5); @@ -89,14 +94,15 @@ int main() { // abs_diff { - s::cl_uint r{0}; + s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::abs_diff(s::cl_int{-5}, s::cl_int{-1}); }); + cgh.single_task([=]() { + AccR[0] = s::abs_diff(s::cl_int{ -5 }, s::cl_int{ -1 }); + }); }); } assert(r == 4); @@ -104,14 +110,14 @@ int main() { // add_sat { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::add_sat(s::cl_int{0x7FFFFFFF}, s::cl_int{100}); + AccR[0] = s::add_sat(s::cl_int{ 0x7FFFFFFF }, s::cl_int{ 100 }); }); }); } @@ -120,14 +126,14 @@ int main() { // hadd { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::hadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); + AccR[0] = s::hadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); }); }); } @@ -136,14 +142,14 @@ int main() { // rhadd { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rhadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); + AccR[0] = s::rhadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); }); }); } @@ -152,14 +158,14 @@ int main() { // clamp { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clamp(s::cl_int{5}, s::cl_int{10}, s::cl_int{30}); + AccR[0] = s::clamp(s::cl_int{ 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); }); }); } @@ -168,14 +174,15 @@ int main() { // clz { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::clz(s::cl_int{0x0FFFFFFF}); }); + cgh.single_task([=]() { + AccR[0] = s::clz(s::cl_int{ 0x0FFFFFFF }); + }); }); } assert(r == 4); @@ -183,15 +190,15 @@ int main() { // mad_hi { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}, - s::cl_int{0x00000001}); + AccR[0] = s::mad_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, + s::cl_int{ 0x00000001 }); }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. }); } @@ -200,15 +207,15 @@ int main() { // mad_sat { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_sat(s::cl_int{0x10000000}, s::cl_int{0x00000100}, - s::cl_int{0x00000001}); + AccR[0] = s::mad_sat(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, + s::cl_int{ 0x00000001 }); }); // 2^31 * 2^8 = 2^39 -> 0x80 00000000 -> reuslt is saturated in the // product. }); @@ -218,14 +225,14 @@ int main() { // mul_hi { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}); + AccR[0] = s::mul_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }); }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. }); } @@ -234,14 +241,14 @@ int main() { // rotate { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rotate(s::cl_int{0x11100000}, s::cl_int{12}); + AccR[0] = s::rotate(s::cl_int{ 0x11100000 }, s::cl_int{ 12 }); }); }); } @@ -250,14 +257,14 @@ int main() { // sub_sat { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::sub_sat(s::cl_int{10}, s::cl_int(0x80000000)); + AccR[0] = s::sub_sat(s::cl_int{ 10 }, s::cl_int(0x80000000)); }); // 10 - (-2^31(minimum value)) = saturates on Maximum value }); } @@ -266,14 +273,14 @@ int main() { // upsample - 1 { - s::cl_ushort r{0}; + s::cl_ushort r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uchar{0x10}, s::cl_uchar{0x10}); + AccR[0] = s::upsample(s::cl_uchar{ 0x10 }, s::cl_uchar{ 0x10 }); }); }); } @@ -282,14 +289,14 @@ int main() { // upsample - 2 { - s::cl_short r{0}; + s::cl_short r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_char{0x10}, s::cl_uchar{0x10}); + AccR[0] = s::upsample(s::cl_char{ 0x10 }, s::cl_uchar{ 0x10 }); }); }); } @@ -298,14 +305,14 @@ int main() { // upsample - 3 { - s::cl_uint r{0}; + s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_ushort{0x0010}, s::cl_ushort{0x0010}); + AccR[0] = s::upsample(s::cl_ushort{ 0x0010 }, s::cl_ushort{ 0x0010 }); }); }); } @@ -314,14 +321,14 @@ int main() { // upsample - 4 { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_short{0x0010}, s::cl_ushort{0x0010}); + AccR[0] = s::upsample(s::cl_short{ 0x0010 }, s::cl_ushort{ 0x0010 }); }); }); } @@ -330,14 +337,15 @@ int main() { // upsample - 5 { - s::cl_ulong r{0}; + s::cl_ulong r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uint{0x00000010}, s::cl_uint{0x00000010}); + AccR[0] = + s::upsample(s::cl_uint{ 0x00000010 }, s::cl_uint{ 0x00000010 }); }); }); } @@ -346,14 +354,15 @@ int main() { // upsample - 6 { - s::cl_long r{0}; + s::cl_long r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_int{0x00000010}, s::cl_uint{0x00000010}); + AccR[0] = + s::upsample(s::cl_int{ 0x00000010 }, s::cl_uint{ 0x00000010 }); }); }); } @@ -362,14 +371,15 @@ int main() { // popcount { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = s::popcount(s::cl_int{0x000000FF}); }); + cgh.single_task([=]() { + AccR[0] = s::popcount(s::cl_int{ 0x000000FF }); + }); }); } assert(r == 8); @@ -377,7 +387,7 @@ int main() { // mad24 { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; @@ -385,7 +395,7 @@ int main() { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = - s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{20}, s::cl_int{20}); + s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }, s::cl_int{ 20 }); }); }); } @@ -394,14 +404,14 @@ int main() { // mul24 { - s::cl_int r{0}; + s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{20}); + AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }); }); }); } diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index dabf8715640e3..5d6c721e2bb57 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -14,14 +14,14 @@ namespace s = cl::sycl; int main() { // max { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int2{2, 7}); + AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); }); }); } @@ -33,14 +33,14 @@ int main() { // max { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); + AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); }); }); } @@ -52,14 +52,14 @@ int main() { // max { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int{2}); + AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); }); }); } @@ -71,14 +71,14 @@ int main() { // max { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint{2}); + AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); }); }); } @@ -90,14 +90,14 @@ int main() { // min { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int2{2, 7}); + AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); }); }); } @@ -109,14 +109,14 @@ int main() { // min { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); + AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); }); }); } @@ -128,14 +128,14 @@ int main() { // min { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int{2}); + AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); }); }); } @@ -147,14 +147,14 @@ int main() { // min { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint{2}); + AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); }); }); } @@ -166,14 +166,14 @@ int main() { // abs { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::abs(s::cl_int2{-5, -2}); + AccR[0] = s::abs(s::cl_int2{ -5, -2 }); }); }); } @@ -185,14 +185,14 @@ int main() { // abs_diff { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::abs_diff(s::cl_int2{-5, -2}, s::cl_int2{-1, -1}); + AccR[0] = s::abs_diff(s::cl_int2{ -5, -2 }, s::cl_int2{ -1, -1 }); }); }); } @@ -204,15 +204,15 @@ int main() { // add_sat { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::add_sat(s::cl_int2{0x7FFFFFFF, 0x7FFFFFFF}, - s::cl_int2{100, 90}); + AccR[0] = s::add_sat(s::cl_int2{ 0x7FFFFFFF, 0x7FFFFFFF }, + s::cl_int2{ 100, 90 }); }); }); } @@ -224,15 +224,15 @@ int main() { // hadd { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::hadd(s::cl_int2{0x0000007F, 0x0000007F}, - s::cl_int2{0x00000020, 0x00000020}); + AccR[0] = s::hadd(s::cl_int2{ 0x0000007F, 0x0000007F }, + s::cl_int2{ 0x00000020, 0x00000020 }); }); }); } @@ -244,15 +244,15 @@ int main() { // rhadd { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rhadd(s::cl_int2{0x0000007F, 0x0000007F}, - s::cl_int2{0x00000020, 0x00000020}); + AccR[0] = s::rhadd(s::cl_int2{ 0x0000007F, 0x0000007F }, + s::cl_int2{ 0x00000020, 0x00000020 }); }); }); } @@ -264,15 +264,15 @@ int main() { // clamp - 1 { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int2{10, 10}, - s::cl_int2{30, 30}); + AccR[0] = s::clamp(s::cl_int2{ 5, 5 }, s::cl_int2{ 10, 10 }, + s::cl_int2{ 30, 30 }); }); }); } @@ -284,14 +284,15 @@ int main() { // clamp - 2 { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int{10}, s::cl_int{30}); + AccR[0] = + s::clamp(s::cl_int2{ 5, 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); }); }); } @@ -303,14 +304,14 @@ int main() { // clz { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clz(s::cl_int2{0x0FFFFFFF, 0x0FFFFFFF}); + AccR[0] = s::clz(s::cl_int2{ 0x0FFFFFFF, 0x0FFFFFFF }); }); }); } @@ -322,16 +323,16 @@ int main() { // mad_hi { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::mad_hi(s::cl_int2{0x10000000, 0x10000000}, - s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); + AccR[0] = s::mad_hi(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }, + s::cl_int2{ 1, 1 }); }); }); } @@ -343,16 +344,16 @@ int main() { // mad_sat { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::mad_sat(s::cl_int2{0x10000000, 0x10000000}, - s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); + AccR[0] = s::mad_sat(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }, + s::cl_int2{ 1, 1 }); }); }); } @@ -364,15 +365,15 @@ int main() { // mul_hi { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul_hi(s::cl_int2{0x10000000, 0x10000000}, - s::cl_int2{0x00000100, 0x00000100}); + AccR[0] = s::mul_hi(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }); }); }); } @@ -384,15 +385,15 @@ int main() { // rotate { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::rotate(s::cl_int2{0x11100000, 0x11100000}, s::cl_int2{12, 12}); + AccR[0] = s::rotate(s::cl_int2{ 0x11100000, 0x11100000 }, + s::cl_int2{ 12, 12 }); }); }); } @@ -404,15 +405,15 @@ int main() { // sub_sat { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::sub_sat(s::cl_int2{10, 10}, - s::cl_int2{int(0x80000000), int(0x80000000)}); + AccR[0] = s::sub_sat(s::cl_int2{ 10, 10 }, + s::cl_int2{ int(0x80000000), int(0x80000000) }); }); }); } @@ -424,15 +425,15 @@ int main() { // upsample - 1 { - s::cl_ushort2 r{0}; + s::cl_ushort2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::upsample(s::cl_uchar2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); + AccR[0] = s::upsample(s::cl_uchar2{ 0x10, 0x10 }, + s::cl_uchar2{ 0x10, 0x10 }); }); }); } @@ -444,15 +445,15 @@ int main() { // upsample - 2 { - s::cl_short2 r{0}; + s::cl_short2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::upsample(s::cl_char2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); + AccR[0] = s::upsample(s::cl_char2{ 0x10, 0x10 }, + s::cl_uchar2{ 0x10, 0x10 }); }); }); } @@ -464,15 +465,15 @@ int main() { // upsample - 3 { - s::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_ushort2{0x0010, 0x0010}, - s::cl_ushort2{0x0010, 0x0010}); + AccR[0] = s::upsample(s::cl_ushort2{ 0x0010, 0x0010 }, + s::cl_ushort2{ 0x0010, 0x0010 }); }); }); } @@ -484,15 +485,15 @@ int main() { // upsample - 4 { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_short2{0x0010, 0x0010}, - s::cl_ushort2{0x0010, 0x0010}); + AccR[0] = s::upsample(s::cl_short2{ 0x0010, 0x0010 }, + s::cl_ushort2{ 0x0010, 0x0010 }); }); }); } @@ -504,15 +505,15 @@ int main() { // upsample - 5 { - s::cl_ulong2 r{0}; + s::cl_ulong2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uint2{0x00000010, 0x00000010}, - s::cl_uint2{0x00000010, 0x00000010}); + AccR[0] = s::upsample(s::cl_uint2{ 0x00000010, 0x00000010 }, + s::cl_uint2{ 0x00000010, 0x00000010 }); }); }); } @@ -524,15 +525,15 @@ int main() { // upsample - 6 { - s::cl_long2 r{0}; + s::cl_long2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_int2{0x00000010, 0x00000010}, - s::cl_uint2{0x00000010, 0x00000010}); + AccR[0] = s::upsample(s::cl_int2{ 0x00000010, 0x00000010 }, + s::cl_uint2{ 0x00000010, 0x00000010 }); }); }); } @@ -544,14 +545,14 @@ int main() { // popcount { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::popcount(s::cl_int2{0x000000FF, 0x000000FF}); + AccR[0] = s::popcount(s::cl_int2{ 0x000000FF, 0x000000FF }); }); }); } @@ -563,15 +564,15 @@ int main() { // mad24 { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, - s::cl_int2{20, 20}, s::cl_int2{20, 20}); + AccR[0] = s::mad24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, + s::cl_int2{ 20, 20 }, s::cl_int2{ 20, 20 }); }); }); } @@ -583,15 +584,15 @@ int main() { // mul24 { - s::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); s::queue myqueue; myqueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::mul24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, s::cl_int2{20, 20}); + AccR[0] = s::mul24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, + s::cl_int2{ 20, 20 }); }); }); } From f80473c1d9716d0e8c00bf240a5b5e6e7fbc5053 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Fri, 5 Apr 2019 16:57:23 +0300 Subject: [PATCH 08/11] [SYCL] Fix some half built-in functions. Fix macro __HAFL_ENABLED. Signed-off-by: Alexey Voronov --- sycl/source/detail/builtins.cpp | 61 ++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 3da9ede8074b7..d1446aeff67f6 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -774,14 +774,14 @@ cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { return std::nextafter(x, y); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { return std::nextafter(x, y); } #endif MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) #endif @@ -798,14 +798,14 @@ cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { cl_double fract(s::cl_double x, s::cl_double *iptr) __NOEXC { return __fract(x, iptr); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half fract(s::cl_half x, s::cl_half *iptr) __NOEXC { return __fract(x, iptr); } #endif MAKE_1V_2P(fract, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(fract, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2P(fract, s::cl_half, s::cl_half, s::cl_half) #endif @@ -816,14 +816,14 @@ cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { cl_double frexp(s::cl_double x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half frexp(s::cl_half x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } #endif MAKE_1V_2P(frexp, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(frexp, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2P(frexp, s::cl_half, s::cl_half, s::cl_int) #endif @@ -888,14 +888,14 @@ cl_float lgamma_r(s::cl_float x, s::cl_int *signp) __NOEXC { cl_double lgamma_r(s::cl_double x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half lgamma_r(s::cl_half x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } #endif MAKE_1V_2P(lgamma_r, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(lgamma_r, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2P(lgamma_r, s::cl_half, s::cl_half, s::cl_int) #endif @@ -1056,14 +1056,17 @@ cl_float modf(s::cl_float x, s::cl_float *iptr) __NOEXC { cl_double modf(s::cl_double x, s::cl_double *iptr) __NOEXC { return std::modf(x, iptr); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half modf(s::cl_half x, s::cl_half *iptr) __NOEXC { - return std::modf(x, iptr); + float t = 0; + float r = std::modf(x, &t); + *iptr = t; + return r; } #endif MAKE_1V_2P(modf, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(modf, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2P(modf, s::cl_half, s::cl_half, s::cl_half) #endif @@ -1077,37 +1080,39 @@ cl_double nan(s::cl_ulong nancode) __NOEXC { cl_double nan(s::ulonglong nancode) __NOEXC { return std::numeric_limits::quiet_NaN(); } -#ifdef __HAFL_ENABLED -cl_half nan(s::cl_ushort nancode) __NOEXC { return NAN; } +#ifndef __HALF_NO_ENABLED +cl_half nan(s::cl_ushort nancode) __NOEXC { + return s::cl_half(std::numeric_limits::quiet_NaN()); +} #endif MAKE_1V(nan, s::cl_float, s::cl_uint) MAKE_1V(nan, s::cl_double, s::cl_ulong) MAKE_1V(nan, s::cl_double, s::ulonglong) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V(nan, s::cl_half, s::cl_ushort) #endif // pow cl_float pow(s::cl_float x, s::cl_float y) __NOEXC { return std::pow(x, y); } cl_double pow(s::cl_double x, s::cl_double y) __NOEXC { return std::pow(x, y); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half pow(s::cl_half x, s::cl_half y) __NOEXC { return std::pow(x, y); } #endif MAKE_1V_2V(pow, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(pow, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V(pow, s::cl_half, s::cl_half, s::cl_half) #endif // pown cl_float pown(s::cl_float x, s::cl_int y) __NOEXC { return std::pow(x, y); } cl_double pown(s::cl_double x, s::cl_int y) __NOEXC { return std::pow(x, y); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half pown(s::cl_half x, s::cl_int y) __NOEXC { return std::pow(x, y); } #endif MAKE_1V_2V(pown, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(pown, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V(pown, s::cl_half, s::cl_half, s::cl_int) #endif @@ -1118,14 +1123,14 @@ cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { return (x >= 0 ? std::pow(x, y) : x); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); + return (x >= s::cl_half(0) ? std::pow(x, y) : s::cl_float(x)); } #endif MAKE_1V_2V(powr, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(powr, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V(powr, s::cl_half, s::cl_half, s::cl_half) #endif @@ -1136,14 +1141,14 @@ cl_float remainder(s::cl_float x, s::cl_float y) __NOEXC { cl_double remainder(s::cl_double x, s::cl_double y) __NOEXC { return std::remainder(x, y); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half remainder(s::cl_half x, s::cl_half y) __NOEXC { return std::remainder(x, y); } #endif MAKE_1V_2V(remainder, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(remainder, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V(remainder, s::cl_half, s::cl_half, s::cl_half) #endif @@ -1154,14 +1159,14 @@ cl_float remquo(s::cl_float x, s::cl_float y, s::cl_int *quo) __NOEXC { cl_double remquo(s::cl_double x, s::cl_double y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED cl_half remquo(s::cl_half x, s::cl_half y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } #endif MAKE_1V_2V_3P(remquo, s::cl_float, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V_3P(remquo, s::cl_double, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED +#ifndef __HALF_NO_ENABLED MAKE_1V_2V_3P(remquo, s::cl_half, s::cl_half, s::cl_half, s::cl_int) #endif @@ -2281,11 +2286,11 @@ cl_double sign(s::cl_double x) __NOEXC { #ifndef NO_HALF_ENABLED cl_half sign(s::cl_half x) __NOEXC { if (std::isnan(x)) { - return 0.0; + return s::cl_half(0.0); } else if (x > 0) { - return 1.0; + return s::cl_half(1.0); } else if (x < 0) { - return -1.0; + return s::cl_half(-1.0); } else /* x is +0.0 or -0.0} */ { return x; } From 93319bf0414c8cde69a1fe988a98fb93412d92c1 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Fri, 5 Apr 2019 17:05:40 +0300 Subject: [PATCH 09/11] [SYCL][NFC] Refactoring of built-in functions' tests. Removed I/O. Replace namespace cl::sycl to s for the tests. Signed-off-by: Alexey Voronov --- sycl/test/built-ins/scalar_common.cpp | 17 +- sycl/test/built-ins/scalar_geometric.cpp | 107 ++--- sycl/test/built-ins/scalar_integer.cpp | 100 ++-- sycl/test/built-ins/scalar_math.cpp | 555 +++++++++++----------- sycl/test/built-ins/scalar_relational.cpp | 381 +++++++-------- sycl/test/built-ins/vector_common.cpp | 40 +- sycl/test/built-ins/vector_geometric.cpp | 175 +++---- sycl/test/built-ins/vector_integer.cpp | 120 ++--- sycl/test/built-ins/vector_math.cpp | 136 +++--- sycl/test/built-ins/vector_relational.cpp | 478 +++++++++---------- 10 files changed, 1001 insertions(+), 1108 deletions(-) diff --git a/sycl/test/built-ins/scalar_common.cpp b/sycl/test/built-ins/scalar_common.cpp index 0096c8610e64d..7a8c8e6697a1c 100644 --- a/sycl/test/built-ins/scalar_common.cpp +++ b/sycl/test/built-ins/scalar_common.cpp @@ -6,27 +6,24 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_float{0.5f}, cl::sycl::cl_float{2.3f}); + AccR[0] = s::max(s::cl_float{ 0.5f }, s::cl_float{ 2.3f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.3f); } diff --git a/sycl/test/built-ins/scalar_geometric.cpp b/sycl/test/built-ins/scalar_geometric.cpp index d885958017934..6060e9432c258 100644 --- a/sycl/test/built-ins/scalar_geometric.cpp +++ b/sycl/test/built-ins/scalar_geometric.cpp @@ -6,128 +6,121 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // dot { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::dot(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{1.6}); + AccR[0] = s::dot(s::cl_float{ 0.5 }, s::cl_float{ 1.6 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.8f); } // distance { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::distance(cl::sycl::cl_float{1.f}, - cl::sycl::cl_float{3.f}); + AccR[0] = s::distance(s::cl_float{ 1.f }, s::cl_float{ 3.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.f); } // length { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::length(cl::sycl::cl_float{1.f}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::length(s::cl_float{ 1.f }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.f); } + // normalize { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::normalize(cl::sycl::cl_float{2.f}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::normalize(s::cl_float{ 2.f }); + }); }); } - - std::cout << "r " << r << std::endl; assert(r == 1.f); } // fast_distance { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_distance(cl::sycl::cl_float{1.f}, - cl::sycl::cl_float{3.f}); + AccR[0] = s::fast_distance(s::cl_float{ 1.f }, s::cl_float{ 3.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.f); } + // fast_length { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_length(cl::sycl::cl_float{2.f}); + AccR[0] = s::fast_length(s::cl_float{ 2.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.f); } + // fast_normalize { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_normalize(cl::sycl::cl_float{2.f}); + AccR[0] = s::fast_normalize(s::cl_float{ 2.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.f); } diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index c5e1d3cdce5f7..fe630e5874770 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -17,8 +17,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_int{ 5 }, s::cl_int{ 2 }); @@ -33,8 +33,8 @@ int main() { s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_uint{ 5 }, s::cl_uint{ 2 }); @@ -49,8 +49,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_int{ 5 }, s::cl_int{ 2 }); @@ -65,8 +65,8 @@ int main() { s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_uint{ 5 }, s::cl_uint{ 2 }); @@ -81,8 +81,8 @@ int main() { s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::abs(s::cl_int{ -5 }); @@ -97,8 +97,8 @@ int main() { s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::abs_diff(s::cl_int{ -5 }, s::cl_int{ -1 }); @@ -113,8 +113,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::add_sat(s::cl_int{ 0x7FFFFFFF }, s::cl_int{ 100 }); @@ -129,8 +129,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::hadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); @@ -145,8 +145,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::rhadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); @@ -161,8 +161,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::clamp(s::cl_int{ 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); @@ -177,8 +177,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::clz(s::cl_int{ 0x0FFFFFFF }); @@ -193,8 +193,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mad_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, @@ -210,8 +210,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mad_sat(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, @@ -228,8 +228,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mul_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }); @@ -244,8 +244,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::rotate(s::cl_int{ 0x11100000 }, s::cl_int{ 12 }); @@ -260,8 +260,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::sub_sat(s::cl_int{ 10 }, s::cl_int(0x80000000)); @@ -276,8 +276,8 @@ int main() { s::cl_ushort r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_uchar{ 0x10 }, s::cl_uchar{ 0x10 }); @@ -292,8 +292,8 @@ int main() { s::cl_short r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_char{ 0x10 }, s::cl_uchar{ 0x10 }); @@ -308,8 +308,8 @@ int main() { s::cl_uint r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_ushort{ 0x0010 }, s::cl_ushort{ 0x0010 }); @@ -324,8 +324,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_short{ 0x0010 }, s::cl_ushort{ 0x0010 }); @@ -340,8 +340,8 @@ int main() { s::cl_ulong r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = @@ -357,8 +357,8 @@ int main() { s::cl_long r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = @@ -374,8 +374,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::popcount(s::cl_int{ 0x000000FF }); @@ -390,8 +390,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = @@ -407,8 +407,8 @@ int main() { s::cl_int r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }); diff --git a/sycl/test/built-ins/scalar_math.cpp b/sycl/test/built-ins/scalar_math.cpp index f1d5b6256c165..1ab181e89698b 100644 --- a/sycl/test/built-ins/scalar_math.cpp +++ b/sycl/test/built-ins/scalar_math.cpp @@ -10,540 +10,547 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // acos { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acos(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acos(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.047f && r < 1.048f); // ~1.0471975511965979 } + // acosh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acosh(cl::sycl::cl_float{2.4}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acosh(s::cl_float{ 2.4 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.522f && r < 1.523f); // ~1.5220793674636532 } + // acospi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acospi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acospi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.333f && r < 0.334f); // ~0.33333333333333337 } // asin { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asin(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asin(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.523f && r < 0.524f); // ~0.5235987755982989 } + // asinh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asinh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asinh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.481f && r < 0.482f); // ~0.48121182505960347 } + // asinpi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asinpi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asinpi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.166f && r < 0.167f); // ~0.16666666666666669 } + // atan { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atan(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atan(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.463f && r < 0.464f); // ~0.4636476090008061 } + // atan2 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::atan2(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.5}); + AccR[0] = s::atan2(s::cl_float{ 0.5 }, s::cl_float{ 0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.785f && r < 0.786f); // ~0.7853981633974483 } + // atanh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atanh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atanh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.549f && r < 0.550f); // ~0.5493061443340549 } + // atanpi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atanpi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atanpi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.147f && r < 0.148f); // ~0.14758361765043326 } // atan2pi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::atan2pi(cl::sycl::cl_float{0.5}, - cl::sycl::cl_float{0.5}); + AccR[0] = s::atan2pi(s::cl_float{ 0.5 }, s::cl_float{ 0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.249f && r < 0.251f); // ~0.25 } + // cbrt { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cbrt(cl::sycl::cl_float{27.0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cbrt(s::cl_float{ 27.0 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 3.f); } + // ceil { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::ceil(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::ceil(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.f); } + // copysign { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::copysign(cl::sycl::cl_float{1}, - cl::sycl::cl_float{-0.5}); + AccR[0] = s::copysign(s::cl_float{ 1 }, s::cl_float{ -0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == -1.f); } + // cos { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cos(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cos(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.877f && r < 0.878f); // ~0.8775825618903728 } + // cosh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cosh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cosh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.127f && r < 1.128f); // ~1.1276259652063807 } + // cospi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cospi(cl::sycl::cl_float{0.1}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cospi(s::cl_float{ 0.1 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.951f && r < 0.952f); // ~0.9510565162951535 } + // erfc { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::erfc(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::erfc(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.479f && r < 0.480f); // ~0.4795001221869535 } + // erf { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::erf(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::erf(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.520f && r < 0.521f); // ~0.5204998778130465 } + // exp { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.648f && r < 1.649f); // ~1.6487212707001282 } + // exp2 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp2(cl::sycl::cl_float{8.0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp2(s::cl_float{ 8.0 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 256.0f); } // exp10 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp10(cl::sycl::cl_float{2}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp10(s::cl_float{ 2 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 100.0f); } + // expm1 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::expm1(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::expm1(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.648f && r < 0.649f); // ~0.6487212707001282 } + // fabs { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::fabs(cl::sycl::cl_float{-0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fabs(s::cl_float{ -0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.5f); } + // fdim { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fdim(cl::sycl::cl_float{1.6}, cl::sycl::cl_float{0.6}); + AccR[0] = s::fdim(s::cl_float{ 1.6 }, s::cl_float{ 0.6 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.0f); } + // floor { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::floor(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::floor(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.f); } + // fma { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fma(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{10.0}, - cl::sycl::cl_float{3.0}); + AccR[0] = s::fma(s::cl_float{ 0.5 }, s::cl_float{ 10.0 }, + s::cl_float{ 3.0 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 8.0f); } + // fmax { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmax(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.8}); + AccR[0] = s::fmax(s::cl_float{ 0.5 }, s::cl_float{ 0.8 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.8f); } + // fmin { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmin(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.8}); + AccR[0] = s::fmin(s::cl_float{ 0.5 }, s::cl_float{ 0.8 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.5f); } + // fmod { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmod(cl::sycl::cl_float{5.1}, cl::sycl::cl_float{3.0}); + AccR[0] = s::fmod(s::cl_float{ 5.1 }, s::cl_float{ 3.0 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.1f); } // fract with global memory { - cl::sycl::cl_float r{0}; - cl::sycl::cl_float i{999}; + s::cl_float r{ 0 }; + s::cl_float i{ 999 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1), - {property::buffer::use_host_ptr()}); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1), + { s::property::buffer::use_host_ptr() }); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - global_ptr Iptr(AccI); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float{1.5}, Iptr); + s::global_ptr Iptr(AccI); + AccR[0] = s::fract(s::cl_float{ 1.5 }, Iptr); }); }); } - std::cout << "r " << r << " i " << i << std::endl; assert(r == 0.5f); assert(i == 1.0f); } // fract with private memory { - cl::sycl::cl_float r{0}; - cl::sycl::cl_float i{999}; + s::cl_float r{ 0 }; + s::cl_float i{ 999 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1), - {property::buffer::use_host_ptr()}); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1), + { s::property::buffer::use_host_ptr() }); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - cl::sycl::cl_float temp(0.0); - private_ptr Iptr(&temp); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float{1.5f}, Iptr); + s::cl_float temp(0.0); + s::private_ptr Iptr(&temp); + AccR[0] = s::fract(s::cl_float{ 1.5f }, Iptr); AccI[0] = *Iptr; }); }); } - std::cout << "r " << r << " i " << i << std::endl; assert(r == 0.5f); assert(i == 1.0f); } // nan { - cl::sycl::cl_double r{0}; + s::cl_double r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { AccR[0] = cl::sycl::nan(1LLU); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { AccR[0] = s::nan(1LLU); }); }); } - std::cout << "r " << r << std::endl; assert(std::isnan(r)); } diff --git a/sycl/test/built-ins/scalar_relational.cpp b/sycl/test/built-ins/scalar_relational.cpp index 976d29c753388..11642e544b300 100644 --- a/sycl/test/built-ins/scalar_relational.cpp +++ b/sycl/test/built-ins/scalar_relational.cpp @@ -6,441 +6,412 @@ #include -#include #include -#include // for NAN +#include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // isequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isequal(cl::sycl::cl_float{10.5f}, - cl::sycl::cl_float{10.5f}); + AccR[0] = s::isequal(s::cl_float{ 10.5f }, s::cl_float{ 10.5f }); }); }); } - std::cout << "isequal r \t" << r << std::endl; assert(r == 1); } // isnotequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnotequal(cl::sycl::cl_float{0.4f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isnotequal(s::cl_float{ 0.4f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isnotequal r \t" << r << std::endl; assert(r == 1); } // isgreater-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreater(cl::sycl::cl_float{0.6f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isgreater(s::cl_float{ 0.6f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isgreater r \t" << r << std::endl; assert(r == 1); } // isgreaterequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreaterequal(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isgreaterequal(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isgreaterequal r \t" << r << std::endl; assert(r == 1); } // isless-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isless(cl::sycl::cl_float{0.4f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isless(s::cl_float{ 0.4f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isless r \t" << r << std::endl; assert(r == 1); } // islessequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessequal(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::islessequal(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "islessequal r \t" << r << std::endl; assert(r == 1); } // islessgreater-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessgreater(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::islessgreater(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "islessgreater r \t" << r << std::endl; assert(r == 0); } // isfinite-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::isfinite(cl::sycl::cl_float{NAN}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::isfinite(s::cl_float{ NAN }); + }); }); } - std::cout << "isfinite r \t" << r << std::endl; assert(r == 0); } // isinf-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::isinf(cl::sycl::cl_float{INFINITY}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::isinf(s::cl_float{ INFINITY }); + }); }); } - std::cout << "isinf r \t" << r << std::endl; assert(r == 1); } // isnan-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::isnan(cl::sycl::cl_float{NAN}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::isnan(s::cl_float{ NAN }); + }); }); } - std::cout << "isnan r \t" << r << std::endl; assert(r == 1); } // isnormal-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnormal(cl::sycl::cl_float{INFINITY}); + AccR[0] = s::isnormal(s::cl_float{ INFINITY }); }); }); } - std::cout << "isnormal r \t" << r << std::endl; assert(r == 0); } // isnormal-double { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnormal(cl::sycl::cl_double{INFINITY}); + AccR[0] = s::isnormal(s::cl_double{ INFINITY }); }); }); } - std::cout << "isnormal r \t" << r << std::endl; assert(r == 0); } // isordered-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isordered(cl::sycl::cl_float{4.0f}, - cl::sycl::cl_float{NAN}); + AccR[0] = s::isordered(s::cl_float{ 4.0f }, s::cl_float{ NAN }); }); }); } - std::cout << "isordered r \t" << r << std::endl; assert(r == 0); } // isunordered-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isunordered(cl::sycl::cl_float{4.0f}, - cl::sycl::cl_float{NAN}); + AccR[0] = s::isunordered(s::cl_float{ 4.0f }, s::cl_float{ NAN }); }); }); } - std::cout << "isunordered r \t" << r << std::endl; assert(r == 1); } // signbit-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::signbit(cl::sycl::cl_float{-12.0f}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::signbit(s::cl_float{ -12.0f }); + }); }); } - std::cout << "signbit r \t" << r << std::endl; assert(r == 1); } // any-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{12}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ 12 }); + }); }); } - std::cout << "any + r \t" << r << std::endl; assert(r == 0); } // any-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ 0 }); + }); }); } - std::cout << "any 0 r \t" << r << std::endl; assert(r == 0); } // any-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::any(cl::sycl::cl_int{-12}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ -12 }); + }); }); } - std::cout << "any - r \t" << r << std::endl; assert(r == 1); } // all-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{12}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ 12 }); + }); }); } - std::cout << "all + r \t" << r << std::endl; assert(r == 0); } // all-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ 0 }); + }); }); } - std::cout << "all 0 r \t" << r << std::endl; assert(r == 0); } // all-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::all(cl::sycl::cl_int{-12}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ -12 }); + }); }); } - std::cout << "all - r \t" << r << std::endl; assert(r == 1); } // bitselect-float { - cl::sycl::cl_float r{0.0f}; + s::cl_float r{ 0.0f }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::bitselect(cl::sycl::cl_float{112.112}, - cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{3.3}); + AccR[0] = s::bitselect(s::cl_float{ 112.112 }, s::cl_float{ 34.34 }, + s::cl_float{ 3.3 }); }); }); } - std::cout << "bitselect r \t" << r << std::endl; assert(r <= 80.5478 && r >= 80.5476); // r = 80.5477 } // select-float,int { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{123.123}, - cl::sycl::cl_int{1}); + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ 1 }); }); }); } - std::cout << "select + r \t" << r << std::endl; assert(r <= 123.124 && r >= 123.122); // r = 123.123 } // select-float,int { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{123.123}, - cl::sycl::cl_int{0}); + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ 0 }); }); }); } - std::cout << "select 0 r \t" << r << std::endl; assert(r <= 34.35 && r >= 34.33); // r = 34.34 } // select-float,int { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{123.123}, - cl::sycl::cl_int{-1}); + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ -1 }); }); }); } - std::cout << "select - r \t" << r << std::endl; assert(r <= 123.124 && r >= 123.122); // r = 123.123 } diff --git a/sycl/test/built-ins/vector_common.cpp b/sycl/test/built-ins/vector_common.cpp index dcf877b7772ac..fe8c3527dc07e 100644 --- a/sycl/test/built-ins/vector_common.cpp +++ b/sycl/test/built-ins/vector_common.cpp @@ -6,50 +6,46 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float2{2.3f, 0.4f}); + AccR[0] = + s::max(s::cl_float2{ 0.5f, 3.4f }, s::cl_float2{ 2.3f, 0.4f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 2.3f); assert(r2 == 3.4f); } // max { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float{3.0f}); + AccR[0] = s::max(s::cl_float2{ 0.5f, 3.4f }, s::cl_float{ 3.0f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 3.0f); assert(r2 == 3.4f); } diff --git a/sycl/test/built-ins/vector_geometric.cpp b/sycl/test/built-ins/vector_geometric.cpp index 0f7c5a8b3d499..4bf353341d2c9 100644 --- a/sycl/test/built-ins/vector_geometric.cpp +++ b/sycl/test/built-ins/vector_geometric.cpp @@ -6,72 +6,52 @@ #include -#include #include #include -using namespace cl::sycl; +namespace s = cl::sycl; -bool isFloatEqualTo(float x, float y, float epsilon = 0.005f){ +bool isFloatEqualTo(float x, float y, float epsilon = 0.005f) { return std::fabs(x - y) <= epsilon; } int main() { // dot { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::dot( - cl::sycl::cl_float2{ - 1.f, - 2.f, - }, - cl::sycl::cl_float2{4.f, 6.f}); + AccR[0] = s::dot(s::cl_float2{ 1.f, 2.f, }, s::cl_float2{ 4.f, 6.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 16.f); } // cross { - cl::sycl::cl_float4 r{0}; + s::cl_float4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::cross( - cl::sycl::cl_float4{ - 2.f, - 3.f, - 4.f, - 0.f, - }, - cl::sycl::cl_float4{ - 5.f, - 6.f, - 7.f, - 0.f, - }); + AccR[0] = s::cross(s::cl_float4{ 2.f, 3.f, 4.f, 0.f, }, + s::cl_float4{ 5.f, 6.f, 7.f, 0.f, }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float r3 = r.z(); - cl::sycl::cl_float r4 = r.w(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -3.f); assert(r2 == 6.f); assert(r3 == -3.f); @@ -80,137 +60,106 @@ int main() { // distance { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::distance( - cl::sycl::cl_float2{ - 1.f, - 2.f, - }, - cl::sycl::cl_float2{ - 3.f, - 4.f, - }); + AccR[0] = + s::distance(s::cl_float2{ 1.f, 2.f, }, s::cl_float2{ 3.f, 4.f, }); }); }); } - std::cout << "r " << r << std::endl; assert(isFloatEqualTo(r, 2.82843f)); } // length { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::length(cl::sycl::cl_float2{ - 1.f, - 2.f, - }); + AccR[0] = s::length(s::cl_float2{ 1.f, 2.f, }); }); }); } - std::cout << "r " << r << std::endl; assert(isFloatEqualTo(r, 2.23607f)); } + // normalize { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::normalize(cl::sycl::cl_float2{ - 1.f, - 2.f, - }); + AccR[0] = s::normalize(s::cl_float2{ 1.f, 2.f, }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; assert(isFloatEqualTo(r1, 0.447214f)); assert(isFloatEqualTo(r2, 0.894427f)); } // fast_distance { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_distance( - cl::sycl::cl_float2{ - 1.f, - 2.f, - }, - cl::sycl::cl_float2{ - 3.f, - 4.f, - }); + AccR[0] = s::fast_distance(s::cl_float2{ 1.f, 2.f, }, + s::cl_float2{ 3.f, 4.f, }); }); }); } - std::cout << "r " << r << std::endl; assert(isFloatEqualTo(r, 2.82843f)); } // fast_length { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_length(cl::sycl::cl_float2{ - 1.f, - 2.f, - }); + AccR[0] = s::fast_length(s::cl_float2{ 1.f, 2.f, }); }); }); } - std::cout << "r " << r << std::endl; assert(isFloatEqualTo(r, 2.23607f)); } // fast_normalize { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fast_normalize(cl::sycl::cl_float2{ - 1.f, - 2.f, - }); + AccR[0] = s::fast_normalize(s::cl_float2{ 1.f, 2.f, }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; assert(isFloatEqualTo(r1, 0.447144)); assert(isFloatEqualTo(r2, 0.894287)); } diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index 5d6c721e2bb57..507bd29838066 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -17,8 +17,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); @@ -36,8 +36,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); @@ -55,8 +55,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); @@ -74,8 +74,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); @@ -93,8 +93,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); @@ -112,8 +112,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); @@ -131,8 +131,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); @@ -150,8 +150,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); @@ -169,8 +169,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::abs(s::cl_int2{ -5, -2 }); @@ -188,8 +188,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::abs_diff(s::cl_int2{ -5, -2 }, s::cl_int2{ -1, -1 }); @@ -207,8 +207,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::add_sat(s::cl_int2{ 0x7FFFFFFF, 0x7FFFFFFF }, @@ -227,8 +227,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::hadd(s::cl_int2{ 0x0000007F, 0x0000007F }, @@ -247,8 +247,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::rhadd(s::cl_int2{ 0x0000007F, 0x0000007F }, @@ -267,8 +267,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::clamp(s::cl_int2{ 5, 5 }, s::cl_int2{ 10, 10 }, @@ -287,8 +287,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = @@ -307,8 +307,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::clz(s::cl_int2{ 0x0FFFFFFF, 0x0FFFFFFF }); @@ -326,8 +326,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mad_hi(s::cl_int2{ 0x10000000, 0x10000000 }, @@ -347,8 +347,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mad_sat(s::cl_int2{ 0x10000000, 0x10000000 }, @@ -368,8 +368,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mul_hi(s::cl_int2{ 0x10000000, 0x10000000 }, @@ -388,8 +388,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::rotate(s::cl_int2{ 0x11100000, 0x11100000 }, @@ -408,8 +408,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::sub_sat(s::cl_int2{ 10, 10 }, @@ -428,8 +428,8 @@ int main() { s::cl_ushort2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_uchar2{ 0x10, 0x10 }, @@ -448,8 +448,8 @@ int main() { s::cl_short2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_char2{ 0x10, 0x10 }, @@ -468,8 +468,8 @@ int main() { s::cl_uint2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_ushort2{ 0x0010, 0x0010 }, @@ -488,8 +488,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_short2{ 0x0010, 0x0010 }, @@ -508,8 +508,8 @@ int main() { s::cl_ulong2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_uint2{ 0x00000010, 0x00000010 }, @@ -528,8 +528,8 @@ int main() { s::cl_long2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::upsample(s::cl_int2{ 0x00000010, 0x00000010 }, @@ -548,8 +548,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::popcount(s::cl_int2{ 0x000000FF, 0x000000FF }); @@ -567,8 +567,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mad24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, @@ -587,8 +587,8 @@ int main() { s::cl_int2 r{ 0 }; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myqueue; - myqueue.submit([&](s::handler &cgh) { + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::mul24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, diff --git a/sycl/test/built-ins/vector_math.cpp b/sycl/test/built-ins/vector_math.cpp index 6c45b6c7309fb..b822a931a34dd 100644 --- a/sycl/test/built-ins/vector_math.cpp +++ b/sycl/test/built-ins/vector_math.cpp @@ -9,115 +9,110 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // fmin { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fmin(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float2{2.3f, 0.4f}); + AccR[0] = + s::fmin(s::cl_float2{ 0.5f, 3.4f }, s::cl_float2{ 2.3f, 0.4f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 0.5f); assert(r2 == 0.4f); } // fabs { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fabs(cl::sycl::cl_float2{-1.0f, 2.0f}); + AccR[0] = s::fabs(s::cl_float2{ -1.0f, 2.0f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 1.0f); assert(r2 == 2.0f); } // floor { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::floor(cl::sycl::cl_float2{1.4f, 2.8f}); + AccR[0] = s::floor(s::cl_float2{ 1.4f, 2.8f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 1.0f); assert(r2 == 2.0f); } // ceil { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::ceil(cl::sycl::cl_float2{1.4f, 2.8f}); + AccR[0] = s::ceil(s::cl_float2{ 1.4f, 2.8f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // fract with global memory { - cl::sycl::cl_float2 r{0, 0}; - cl::sycl::cl_float2 i{0, 0}; + s::cl_float2 r{ 0, 0 }; + s::cl_float2 i{ 0, 0 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1)); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - global_ptr Iptr(AccI); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float2{1.5f, 2.5f}, Iptr); + s::global_ptr Iptr(AccI); + AccR[0] = s::fract(s::cl_float2{ 1.5f, 2.5f }, Iptr); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float i1 = i.x(); - cl::sycl::cl_float i2 = i.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << " i1 " << i1 << " i2 " << i2 - << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float i1 = i.x(); + s::cl_float i2 = i.y(); + assert(r1 == 0.5f); assert(r2 == 0.5f); assert(i1 == 1.0f); @@ -126,30 +121,29 @@ int main() { // fract with private memory { - cl::sycl::cl_float2 r{0, 0}; - cl::sycl::cl_float2 i{0, 0}; + s::cl_float2 r{ 0, 0 }; + s::cl_float2 i{ 0, 0 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - cl::sycl::cl_float2 temp(0.0); - private_ptr Iptr(&temp); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float2{1.5f, 2.5f}, Iptr); + s::cl_float2 temp(0.0); + s::private_ptr Iptr(&temp); + AccR[0] = s::fract(s::cl_float2{ 1.5f, 2.5f }, Iptr); AccI[0] = *Iptr; }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float i1 = i.x(); - cl::sycl::cl_float i2 = i.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << " i1 " << i1 << " i2 " << i2 - << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float i1 = i.x(); + s::cl_float i2 = i.y(); + assert(r1 == 0.5f); assert(r2 == 0.5f); assert(i1 == 1.0f); diff --git a/sycl/test/built-ins/vector_relational.cpp b/sycl/test/built-ins/vector_relational.cpp index e2203df2a8021..528189ed0b885 100644 --- a/sycl/test/built-ins/vector_relational.cpp +++ b/sycl/test/built-ins/vector_relational.cpp @@ -10,28 +10,27 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // isequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isequal(cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -43,23 +42,22 @@ int main() { // isnotequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnotequal( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isnotequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -71,23 +69,22 @@ int main() { // isgreater { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreater( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isgreater(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -99,23 +96,22 @@ int main() { // isgreaterequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreaterequal( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isgreaterequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -127,23 +123,22 @@ int main() { // isless { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isless(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isless(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -155,23 +150,22 @@ int main() { // islessequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessequal( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::islessequal(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -183,23 +177,23 @@ int main() { // islessgreater { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessgreater( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, INFINITY}); + AccR[0] = + s::islessgreater(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -212,22 +206,21 @@ int main() { // isfinite { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isfinite( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); + AccR[0] = s::isfinite(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -239,22 +232,21 @@ int main() { // isinf { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isinf(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); + AccR[0] = s::isinf(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -266,22 +258,21 @@ int main() { // isnan { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isnan(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); + AccR[0] = s::isnan(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -293,22 +284,21 @@ int main() { // isnormal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnormal( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); + AccR[0] = s::isnormal(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -320,23 +310,22 @@ int main() { // isordered { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isordered( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isordered(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -348,23 +337,22 @@ int main() { // isunordered { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isunordered( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isunordered(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -376,22 +364,21 @@ int main() { // signbit { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::signbit( - cl::sycl::cl_float4{0.5f, -12.0f, NAN, INFINITY}); + AccR[0] = s::signbit(s::cl_float4{ 0.5f, -12.0f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); std::cout << "sign r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -404,18 +391,18 @@ int main() { // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::any(cl::sycl::cl_int4{-12, -12, 0, 1}); + AccR[0] = s::any(s::cl_int4{ -12, -12, 0, 1 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "Any r1 " << r1 << std::endl; assert(r1 == 1); @@ -424,18 +411,18 @@ int main() { // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::any(cl::sycl::cl_int4{-12, -12, -12, -12}); + AccR[0] = s::any(s::cl_int4{ -12, -12, -12, -12 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "Any - r1 " << r1 << std::endl; assert(r1 == 1); @@ -444,18 +431,18 @@ int main() { // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::any(cl::sycl::cl_int4{0, 0, 0, 0}); + AccR[0] = s::any(s::cl_int4{ 0, 0, 0, 0 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "Any 0 r1 " << r1 << std::endl; assert(r1 == 0); @@ -464,18 +451,18 @@ int main() { // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::any(cl::sycl::cl_int4{12, 12, 12, 12}); + AccR[0] = s::any(s::cl_int4{ 12, 12, 12, 12 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "Any + r1 " << r1 << std::endl; assert(r1 == 0); @@ -484,21 +471,21 @@ int main() { // all. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::all(cl::sycl::cl_int4{-12, -12, -12, -12}); + AccR[0] = s::all(s::cl_int4{ -12, -12, -12, -12 }); // Infinity (positive or negative) or Nan are not integers. // Passing them creates inconsistent results between host and device // execution. }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "All change r1 " << r1 << std::endl; assert(r1 == 1); @@ -507,18 +494,18 @@ int main() { // all. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::all(cl::sycl::cl_int4{-12, -12, -12, -12}); + AccR[0] = s::all(s::cl_int4{ -12, -12, -12, -12 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "All - r1 " << r1 << std::endl; assert(r1 == 1); @@ -527,18 +514,18 @@ int main() { // all. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::all(cl::sycl::cl_int4{0, 0, 0, 0}); + AccR[0] = s::all(s::cl_int4{ 0, 0, 0, 0 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "All 0 r1 " << r1 << std::endl; assert(r1 == 0); @@ -547,18 +534,18 @@ int main() { // all. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::all(cl::sycl::cl_int4{12, 12, 12, 12}); + AccR[0] = s::all(s::cl_int4{ 12, 12, 12, 12 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; std::cout << "All + r1 " << r1 << std::endl; assert(r1 == 0); @@ -566,25 +553,24 @@ int main() { // bitselect { - cl::sycl::cl_float4 r{0}; + s::cl_float4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::bitselect(cl::sycl::cl_float4{112.112, 12.12, 0, 0.0}, - cl::sycl::cl_float4{34.34, 23.23, 1, 0.0}, - cl::sycl::cl_float4{3.3, 6.6, 1, 0.0}); + AccR[0] = s::bitselect(s::cl_float4{ 112.112, 12.12, 0, 0.0 }, + s::cl_float4{ 34.34, 23.23, 1, 0.0 }, + s::cl_float4{ 3.3, 6.6, 1, 0.0 }); }); // Using NAN/INFINITY as any float produced consistent results // between host and device. }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float r3 = r.z(); - cl::sycl::cl_float r4 = r.w(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; @@ -596,26 +582,26 @@ int main() { // select { - cl::sycl::cl_float4 r{0}; + s::cl_float4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::select( - cl::sycl::cl_float4{112.112f, 34.34f, 112.112f, 34.34f}, - cl::sycl::cl_float4{34.34f, 112.112f, 34.34f, 112.112f}, - cl::sycl::cl_int4{0, -1, 0, 1}); + AccR[0] = + s::select(s::cl_float4{ 112.112f, 34.34f, 112.112f, 34.34f }, + s::cl_float4{ 34.34f, 112.112f, 34.34f, 112.112f }, + s::cl_int4{ 0, -1, 0, 1 }); // Using NAN/infinity as an input, which gets // selected by -1, produces a NAN/infinity as expected. }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float r3 = r.z(); - cl::sycl::cl_float r4 = r.w(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 << std::endl; From c136645389c9675e22844962ff04431be8e64fa3 Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Sun, 7 Apr 2019 21:15:32 +0300 Subject: [PATCH 10/11] [SYCL][NFC] Refactoring of the host side built-in functions Removed a code duplication. Moved the common implementation functions to anonymous namespace. Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 2 + .../CL/sycl/detail/generic_type_traits.hpp | 4 + sycl/source/detail/builtins.cpp | 1328 ++++++++--------- sycl/test/built-ins/vector_relational.cpp | 41 - 4 files changed, 644 insertions(+), 731 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index f30258484ef58..5cad3d983540f 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -965,6 +965,8 @@ upsample(T hi, T2 lo) __NOEXC { hi, lo); } +#undef __invoke_s_upsample + // geninteger popcount (geninteger x) template typename std::enable_if::value, T>::type diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 2e25e50133ded..23e11d8454c80 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -1244,6 +1244,10 @@ template static constexpr T min_v() { return std::numeric_limits::min(); } +template static constexpr T quiet_NaN() { + return std::numeric_limits::quiet_NaN(); +} + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index d1446aeff67f6..9e8f5a12b63c2 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -13,7 +13,6 @@ #include #include -#include // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ @@ -194,19 +193,19 @@ namespace d = s::detail; __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) -#define MAKE_SR_1V_AND(Fun, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 16, Ret, Arg1) +#define MAKE_SR_1V_AND(Fun, Call, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 2, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 16, Ret, Arg1) -#define MAKE_SR_1V_OR(Fun, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 16, Ret, Arg1) +#define MAKE_SR_1V_OR(Fun, Call, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 2, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ @@ -232,44 +231,44 @@ namespace detail { template struct helper { template - void run_1v(Res &r, Op op, T1 x) { + inline void run_1v(Res &r, Op op, T1 x) { helper().run_1v(r, op, x); r.template swizzle() = op(x.template swizzle()); } template - void run_1v_2v(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y.template swizzle()); } template - void run_1v_2s(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2s(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y); } template - void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { helper().run_1v_2s_3s(r, op, x, y, z); r.template swizzle() = op(x.template swizzle(), y, z); } template - void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v_rs(r, op, x, y); op(r, x.template swizzle(), y.template swizzle()); } template - void run_1v_rs(Res &r, Op op, T1 x) { + inline void run_1v_rs(Res &r, Op op, T1 x) { helper().run_1v_rs(r, op, x); op(r, x.template swizzle()); } template - void run_1v_2p(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2p(r, op, x, y); // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; @@ -278,7 +277,7 @@ template struct helper { } template - void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { helper().run_1v_2v_3p(r, op, x, y, z); // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; @@ -288,7 +287,7 @@ template struct helper { } template - void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { helper().run_1v_2v_3v(r, op, x, y, z); r.template swizzle() = op(x.template swizzle(), y.template swizzle(), @@ -296,13 +295,13 @@ template struct helper { } template - void run_1v_sr_or(Res &r, Op op, T1 x) { + inline void run_1v_sr_or(Res &r, Op op, T1 x) { helper().run_1v_sr_or(r, op, x); r = (op(x.template swizzle()) || r); } template - void run_1v_sr_and(Res &r, Op op, T1 x) { + inline void run_1v_sr_and(Res &r, Op op, T1 x) { helper().run_1v_sr_and(r, op, x); r = (op(x.template swizzle()) && r); } @@ -310,38 +309,38 @@ template struct helper { template <> struct helper<0> { template - void run_1v(Res &r, Op op, T1 x) { + inline void run_1v(Res &r, Op op, T1 x) { r.template swizzle<0>() = op(x.template swizzle<0>()); } template - void run_1v_2v(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>()); } template - void run_1v_2s(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y); } template - void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { r.template swizzle<0>() = op(x.template swizzle<0>(), y, z); } template - void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { op(r, x.template swizzle<0>(), y.template swizzle<0>()); } template - void run_1v_rs(Res &r, Op op, T1 x) { + inline void run_1v_rs(Res &r, Op op, T1 x) { op(r, x.template swizzle<0>()); } template - void run_1v_2p(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) { // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; r.template swizzle<0>() = op(x.template swizzle<0>(), &temp); @@ -349,7 +348,7 @@ template <> struct helper<0> { } template - void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; r.template swizzle<0>() = @@ -358,24 +357,467 @@ template <> struct helper<0> { } template - void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>(), z.template swizzle<0>()); } template - void run_1v_sr_or(Res &r, Op op, T1 x) { + inline void run_1v_sr_or(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } template - void run_1v_sr_and(Res &r, Op op, T1 x) { + inline void run_1v_sr_and(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } }; } // namespace detail +s::cl_float OpDot(s::cl_float2, s::cl_float2); +s::cl_float OpDot(s::cl_float3, s::cl_float3); +s::cl_float OpDot(s::cl_float4, s::cl_float4); +s::cl_double OpDot(s::cl_double2, s::cl_double2); +s::cl_double OpDot(s::cl_double3, s::cl_double3); +s::cl_double OpDot(s::cl_double4, s::cl_double4); +#ifndef NO_HALF_ENABLED +s::cl_half OpDot(s::cl_half2, s::cl_half2); +s::cl_half OpDot(s::cl_half3, s::cl_half3); +s::cl_half OpDot(s::cl_half4, s::cl_half4); +#endif + +s::cl_int OpAll(s::cl_int2); +s::cl_int OpAll(s::cl_int3); +s::cl_int OpAll(s::cl_int4); + +namespace { +template inline T __acospi(T x) { return std::acos(x) / M_PI; } + +template inline T __asinpi(T x) { return std::asin(x) / M_PI; } + +template inline T __atanpi(T x) { return std::atan(x) / M_PI; } + +template inline T __atan2pi(T x, T y) { + return std::atan2(x, y) / M_PI; +} + +template inline T __cospi(T x) { return std::cos(M_PI * x); } + +template T inline __fract(T x, T *iptr) { + T f = std::floor(x); + *(iptr) = f; + return std::fmin(x - f, nextafter(T(1.0), T(0.0))); +} + +template inline T __mad(T a, T b, T c) { return (a * b) + c; } + +template inline T __maxmag(T x, T y) { + if (std::fabs(x) > std::fabs(y)) + return x; + if (std::fabs(y) > std::fabs(x)) + return y; + return std::fmax(x, y); +} + +template inline T __minmag(T x, T y) { + if (std::fabs(x) < std::fabs(y)) + return x; + if (std::fabs(y) < std::fabs(x)) + return y; + return std::fmin(x, y); +} + +template inline T __powr(T x, T y) { + return (x >= T(0)) ? T(std::pow(x, y)) : x; +} + +template inline T __rootn(T x, s::cl_int y) { + return std::pow(x, T(1.0) / y); +} + +template inline T __rsqrt(T x) { return T(1.0) / std::sqrt(x); } + +template inline T __sincos(T x, T *cosval) { + (*cosval) = std::cos(x); + return std::sin(x); +} + +template inline T __sinpi(T x) { return std::sin(M_PI * x); } + +template inline T __tanpi(T x) { return std::tan(M_PI * x); } + +template inline T __abs_diff(T x, T y) { return std::abs(x - y); } + +template inline T __u_add_sat(T x, T y) { + return (x < (d::max_v() - y) ? x + y : d::max_v()); +} + +template inline T __s_add_sat(T x, T y) { + if (x > 0 && y > 0) + return (x < (d::max_v() - y) ? (x + y) : d::max_v()); + if (x < 0 && y < 0) + return (x > (d::min_v() - y) ? (x + y) : d::min_v()); + return x + y; +} + +template inline T __hadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y & x) & one); +} + +template inline T __rhadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y | x) & one); +} + +template inline T __clamp(T x, T minval, T maxval) { + return std::min(std::max(x, minval), maxval); +} + +template inline constexpr T __clz_impl(T x, T m, T n = 0) { + return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n); +} + +template inline constexpr T __clz(T x) { + using UT = typename std::make_unsigned::type; + return (x == T(0)) ? sizeof(T) * 8 : __clz_impl(x, d::msbMask(x)); +} + +template T __mul_hi(T a, T b) { + using UPT = typename d::make_upper::type; + UPT a_s = a; + UPT b_s = b; + UPT mul = a_s * b_s; + return (mul >> (sizeof(T) * 8)); +} + +// T is minimum of 64 bits- long or longlong +template inline T __long_mul_hi(T a, T b) { + int halfsize = (sizeof(T) * 8) / 2; + T a1 = a >> halfsize; + T a0 = (a << halfsize) >> halfsize; + T b1 = b >> halfsize; + T b0 = (b << halfsize) >> halfsize; + + // a1b1 - for bits - [64-128) + // a1b0 a0b1 for bits - [32-96) + // a0b0 for bits - [0-64) + T a1b1 = a1 * b1; + T a0b1 = a0 * b1; + T a1b0 = a1 * b0; + T a0b0 = a0 * b0; + + // To get the upper 64 bits: + // 64 bits from a1b1, upper 32 bits from [a1b0 + (a0b1 + a0b0>>32 (carry bit + // in 33rd bit))] with carry bit on 64th bit - use of hadd. Add the a1b1 to + // the above 32 bit result. + T result = + a1b1 + (__hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1)); + return result; +} + +template inline T __mad_hi(T a, T b, T c) { + return __mul_hi(a, b) + c; +} + +template inline T __long_mad_hi(T a, T b, T c) { + return __long_mul_hi(a, b) + c; +} + +template inline T __s_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT max = d::max_v(); + const UPT min = d::min_v(); + mul = std::min(std::max(mul, min), max); + return __s_add_sat(T(mul), c); +} + +template inline T __s_long_mad_sat(T a, T b, T c) { + bool neg_prod = (a < 0) ^ (b < 0); + T mulhi = __long_mul_hi(a, b); + + // check mul_hi. If it is any value != 0. + // if prod is +ve, any value in mulhi means we need to saturate. + // if prod is -ve, any value in mulhi besides -1 means we need to saturate. + if (!neg_prod && mulhi != 0) + return d::max_v(); + if (neg_prod && mulhi != -1) + return d::max_v(); // essentially some other negative value. + return __s_add_sat(T(a * b), c); +} + +template inline T __u_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT min = d::min_v(); + const UPT max = d::max_v(); + mul = std::min(std::max(mul, min), max); + return __u_add_sat(T(mul), c); +} + +template inline T __u_long_mad_sat(T a, T b, T c) { + T mulhi = __long_mul_hi(a, b); + // check mul_hi. If it is any value != 0. + if (mulhi != 0) + return d::max_v(); + return __u_add_sat(T(a * b), c); +} + +template inline T __rotate(T x, T n) { + using UT = typename std::make_unsigned::type; + return (x << n) | (UT(x) >> ((sizeof(x) * 8) - n)); +} + +template inline T __u_sub_sat(T x, T y) { + return (y < (x - d::min_v())) ? (x - y) : d::min_v(); +} + +template inline T __s_sub_sat(T x, T y) { + if (y > 0) + return (y < (x - d::min_v()) ? x - y : d::min_v()); + if (y < 0) + return (y > (x - d::max_v()) ? x - y : d::max_v()); + return x; +} + +template +typename d::make_upper::type inline __upsample(T1 hi, T2 lo) { + using UT = typename d::make_upper::type; + return (UT(hi) << (sizeof(T1) * 8)) | lo; +} + +template inline constexpr T __popcount_impl(T x, size_t n = 0) { + return (x == T(0)) ? n : __popcount_impl(x >> 1, ((x & T(1)) ? ++n : n)); +} + +template inline constexpr T __popcount(T x) { + using UT = typename d::make_unsigned::type; + return __popcount_impl(UT(x)); +} + +template inline T __mad24(T x, T y, T z) { return (x * y) + z; } + +template inline T __mul24(T x, T y) { return (x * y); } + +template inline T __fclamp(T x, T minval, T maxval) { + return std::fmin(std::fmax(x, minval), maxval); +} + +template inline T __degrees(T radians) { + return (180 / M_PI) * radians; +} + +template inline T __mix(T x, T y, T a) { return x + (y - x) * a; } + +template inline T __radians(T degrees) { + return (M_PI / 180) * degrees; +} + +template inline T __step(T edge, T x) { + return (x < edge) ? 0.0 : 1.0; +} + +template inline T __smoothstep(T edge0, T edge1, T x) { + cl_float t; + t = __fclamp((x - edge0) / (edge1 - edge0), T(0), T(1)); + return t * t * (3 - 2 * t); +} + +template inline T __sign(T x) { + if (std::isnan(x)) + return T(0.0); + if (x > 0) + return T(1.0); + if (x < 0) + return T(-1.0); + /* x is +0.0 or -0.0 */ + return x; +} + +template inline T __cross(T p0, T p1) { + T result(0); + result.x() = p0.y() * p1.z() - p0.z() * p1.y(); + result.y() = p0.z() * p1.x() - p0.x() * p1.z(); + result.z() = p0.x() * p1.y() - p0.y() * p1.x(); + return result; +} + +template inline void __OpFMul_impl(T &r, T p0, T p1) { + r += p0 * p1; +} + +template inline T __OpFMul(T p0, T p1) { + T result = 0; + __OpFMul_impl(result, p0, p1); + return result; +} + +template +inline typename std::enable_if::value, T>::type __length(T t) { + return std::sqrt(__OpFMul(t, t)); +} + +template +inline typename std::enable_if::value, + typename T::element_type>::type +__length(T t) { + return std::sqrt(OpDot(t, t)); +} + +template +inline typename std::enable_if::value, T>::type +__normalize(T t) { + T r = __length(t); + return t / T(r); +} + +template +inline typename std::enable_if::value, T>::type +__normalize(T t) { + typename T::element_type r = __length(t); + return t / T(r); +} + +template +inline typename std::enable_if::value, T>::type +__fast_length(T t) { + return std::sqrt(__OpFMul(t, t)); +} + +template +inline typename std::enable_if::value, + typename T::element_type>::type +__fast_length(T t) { + return std::sqrt(OpDot(t, t)); +} + +template +inline typename std::enable_if::value, T>::type +__fast_normalize(T t) { + if (OpAll(t == T(0.0f))) + return t; + typename T::element_type r = std::sqrt(OpDot(t, t)); + return t / T(r); +} + +template inline T __vOpFOrdEqual(T x, T y) { return -(x == y); } + +template inline T __sOpFOrdEqual(T x, T y) { return x == y; } + +template inline T __vOpFUnordNotEqual(T x, T y) { + return -(x != y); +} + +template inline T __sOpFUnordNotEqual(T x, T y) { return x != y; } + +template inline T __vOpFOrdGreaterThan(T x, T y) { + return -(x > y); +} + +template inline T __sOpFOrdGreaterThan(T x, T y) { return x > y; } + +template inline T __vOpFOrdGreaterThanEqual(T x, T y) { + return -(x >= y); +} + +template inline T __sOpFOrdGreaterThanEqual(T x, T y) { + return x >= y; +} + +template inline T __vOpFOrdLessThanEqual(T x, T y) { + return -(x <= y); +} + +template inline T __sOpFOrdLessThanEqual(T x, T y) { + return x <= y; +} + +template inline T __vOpLessOrGreater(T x, T y) { + return -((x < y) || (x > y)); +} + +template inline T __sOpLessOrGreater(T x, T y) { + return ((x < y) || (x > y)); +} + +template cl_int inline __OpAny(T x) { return d::msbIsSet(x); } +template cl_int inline __OpAll(T x) { return d::msbIsSet(x); } + +template inline T __vOpOrdered(T x, T y) { + return -(!(std::isunordered(x, y))); +} + +template inline T __sOpOrdered(T x, T y) { + return !(std::isunordered(x, y)); +} + +template inline T __vOpUnordered(T x, T y) { + return -(std::isunordered(x, y)); +} + +template inline T __sOpUnordered(T x, T y) { + return std::isunordered(x, y); +} + +template +inline typename std::enable_if::value, T>::type +__bitselect(T a, T b, T c) { + return (a & ~c) | (b & c); +} + +template union databitset; +// float +template <> union databitset { + static_assert(sizeof(uint32_t) == sizeof(float), + "size of float is not equal to 32 bits."); + float f; + uint32_t i; +}; + +// double +template <> union databitset { + static_assert(sizeof(uint64_t) == sizeof(double), + "size of double is not equal to 64 bits."); + double f; + uint64_t i; +}; + +#ifndef NO_HALF_ENABLED +// Half +template <> union databitset { + static_assert(sizeof(uint16_t) == sizeof(cl_half), + "size of half is not equal to 16 bits."); + cl_half f; + uint16_t i; +}; +#endif + +template +typename std::enable_if::value, T>::type inline __bitselect( + T a, T b, T c) { + databitset ba; + ba.f = a; + databitset bb; + bb.f = b; + databitset bc; + bc.f = c; + databitset br; + br.f = 0; + br.i = ((ba.i & ~bc.i) | (bb.i & bc.i)); + return br.f; +} + +template inline T2 __OpSelect(T c, T2 b, T2 a) { + return (c ? b : a); +} + +template inline T2 __vOpSelect(T c, T2 b, T2 a) { + return d::msbIsSet(c) ? b : a; +} +} + /* ----------------- 4.13.3 Math functions. Host version --------------------*/ // acos cl_float acos(s::cl_float x) __NOEXC { return std::acos(x); } @@ -402,10 +844,10 @@ MAKE_1V(acosh, s::cl_half, s::cl_half) #endif // acospi -cl_float acospi(s::cl_float x) __NOEXC { return std::acos(x) / M_PI; } -cl_double acospi(s::cl_double x) __NOEXC { return std::acos(x) / M_PI; } +cl_float acospi(s::cl_float x) __NOEXC { return __acospi(x); } +cl_double acospi(s::cl_double x) __NOEXC { return __acospi(x); } #ifndef NO_HALF_ENABLED -cl_half acospi(s::cl_half x) __NOEXC { return std::acos(x) / M_PI; } +cl_half acospi(s::cl_half x) __NOEXC { return __acospi(x); } #endif MAKE_1V(acospi, s::cl_float, s::cl_float) MAKE_1V(acospi, s::cl_double, s::cl_double) @@ -438,10 +880,10 @@ MAKE_1V(asinh, s::cl_half, s::cl_half) #endif // asinpi -cl_float asinpi(s::cl_float x) __NOEXC { return std::asin(x) / M_PI; } -cl_double asinpi(s::cl_double x) __NOEXC { return std::asin(x) / M_PI; } +cl_float asinpi(s::cl_float x) __NOEXC { return __asinpi(x); } +cl_double asinpi(s::cl_double x) __NOEXC { return __asinpi(x); } #ifndef NO_HALF_ENABLED -cl_half asinpi(s::cl_half x) __NOEXC { return std::asin(x) / M_PI; } +cl_half asinpi(s::cl_half x) __NOEXC { return __asinpi(x); } #endif MAKE_1V(asinpi, s::cl_float, s::cl_float) MAKE_1V(asinpi, s::cl_double, s::cl_double) @@ -490,10 +932,10 @@ MAKE_1V(atanh, s::cl_half, s::cl_half) #endif // atanpi -cl_float atanpi(s::cl_float x) __NOEXC { return std::atan(x) / M_PI; } -cl_double atanpi(s::cl_double x) __NOEXC { return std::atan(x) / M_PI; } +cl_float atanpi(s::cl_float x) __NOEXC { return __atanpi(x); } +cl_double atanpi(s::cl_double x) __NOEXC { return __atanpi(x); } #ifndef NO_HALF_ENABLED -cl_half atanpi(s::cl_half x) __NOEXC { return std::atan(x) / M_PI; } +cl_half atanpi(s::cl_half x) __NOEXC { return __atanpi(x); } #endif MAKE_1V(atanpi, s::cl_float, s::cl_float) MAKE_1V(atanpi, s::cl_double, s::cl_double) @@ -503,15 +945,13 @@ MAKE_1V(atanpi, s::cl_half, s::cl_half) // atan2pi cl_float atan2pi(s::cl_float x, s::cl_float y) __NOEXC { - return std::atan2(x, y) / M_PI; + return __atan2pi(x, y); } cl_double atan2pi(s::cl_double x, s::cl_double y) __NOEXC { - return std::atan2(x, y) / M_PI; + return __atan2pi(x, y); } #ifndef NO_HALF_ENABLED -cl_half atan2pi(s::cl_half x, s::cl_half y) __NOEXC { - return std::atan2(x, y) / M_PI; -} +cl_half atan2pi(s::cl_half x, s::cl_half y) __NOEXC { return __atan2pi(x, y); } #endif MAKE_1V_2V(atan2pi, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(atan2pi, s::cl_double, s::cl_double, s::cl_double) @@ -586,10 +1026,10 @@ MAKE_1V(cosh, s::cl_half, s::cl_half) #endif // cospi -cl_float cospi(s::cl_float x) __NOEXC { return std::cos(M_PI * x); } -cl_double cospi(s::cl_double x) __NOEXC { return std::cos(M_PI * x); } +cl_float cospi(s::cl_float x) __NOEXC { return __cospi(x); } +cl_double cospi(s::cl_double x) __NOEXC { return __cospi(x); } #ifndef NO_HALF_ENABLED -cl_half cospi(s::cl_half x) __NOEXC { return std::cos(M_PI * x); } +cl_half cospi(s::cl_half x) __NOEXC { return __cospi(x); } #endif MAKE_1V(cospi, s::cl_float, s::cl_float) MAKE_1V(cospi, s::cl_double, s::cl_double) @@ -786,12 +1226,6 @@ MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) #endif // fract -template T __fract(T x, T *iptr) { - T f = std::floor(x); - *(iptr) = f; - return std::fmin(x - f, nextafter(T(1.0), T(0.0))); -} - cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { return __fract(x, iptr); } @@ -961,14 +1395,14 @@ MAKE_1V(logb, s::cl_half, s::cl_half) // mad cl_float mad(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } cl_double mad(s::cl_double a, s::cl_double b, s::cl_double c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } #ifndef NO_HALF_ENABLED cl_half mad(s::cl_half a, s::cl_half b, s::cl_half c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } #endif MAKE_1V_2V_3V(mad, s::cl_float, s::cl_float, s::cl_float, s::cl_float) @@ -978,34 +1412,12 @@ MAKE_1V_2V_3V(mad, s::cl_half, s::cl_half, s::cl_half, s::cl_half) #endif // maxmag -cl_float maxmag(s::cl_float x, s::cl_float y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } -} +cl_float maxmag(s::cl_float x, s::cl_float y) __NOEXC { return __maxmag(x, y); } cl_double maxmag(s::cl_double x, s::cl_double y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } + return __maxmag(x, y); } #ifndef NO_HALF_ENABLED -cl_half maxmag(s::cl_half x, s::cl_half y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } -} +cl_half maxmag(s::cl_half x, s::cl_half y) __NOEXC { return __maxmag(x, y); } #endif MAKE_1V_2V(maxmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(maxmag, s::cl_double, s::cl_double, s::cl_double) @@ -1014,34 +1426,12 @@ MAKE_1V_2V(maxmag, s::cl_half, s::cl_half, s::cl_half) #endif // minmag -cl_float minmag(s::cl_float x, s::cl_float y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } -} +cl_float minmag(s::cl_float x, s::cl_float y) __NOEXC { return __minmag(x, y); } cl_double minmag(s::cl_double x, s::cl_double y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } + return __minmag(x, y); } #ifndef NO_HALF_ENABLED -cl_half minmag(s::cl_half x, s::cl_half y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } -} +cl_half minmag(s::cl_half x, s::cl_half y) __NOEXC { return __minmag(x, y); } #endif MAKE_1V_2V(minmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(minmag, s::cl_double, s::cl_double, s::cl_double) @@ -1058,10 +1448,7 @@ cl_double modf(s::cl_double x, s::cl_double *iptr) __NOEXC { } #ifndef __HALF_NO_ENABLED cl_half modf(s::cl_half x, s::cl_half *iptr) __NOEXC { - float t = 0; - float r = std::modf(x, &t); - *iptr = t; - return r; + return std::modf(x, reinterpret_cast(iptr)); } #endif MAKE_1V_2P(modf, s::cl_float, s::cl_float, s::cl_float) @@ -1071,18 +1458,12 @@ MAKE_1V_2P(modf, s::cl_half, s::cl_half, s::cl_half) #endif // nan -cl_float nan(s::cl_uint nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); -} -cl_double nan(s::cl_ulong nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); -} -cl_double nan(s::ulonglong nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); -} +cl_float nan(s::cl_uint nancode) __NOEXC { return d::quiet_NaN(); } +cl_double nan(s::cl_ulong nancode) __NOEXC { return d::quiet_NaN(); } +cl_double nan(s::ulonglong nancode) __NOEXC { return d::quiet_NaN(); } #ifndef __HALF_NO_ENABLED cl_half nan(s::cl_ushort nancode) __NOEXC { - return s::cl_half(std::numeric_limits::quiet_NaN()); + return s::cl_half(d::quiet_NaN()); } #endif MAKE_1V(nan, s::cl_float, s::cl_uint) @@ -1117,16 +1498,10 @@ MAKE_1V_2V(pown, s::cl_half, s::cl_half, s::cl_int) #endif // powr -cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); -} -cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); -} +cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { return __powr(x, y); } +cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { return __powr(x, y); } #ifndef __HALF_NO_ENABLED -cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { - return (x >= s::cl_half(0) ? std::pow(x, y) : s::cl_float(x)); -} +cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { return __powr(x, y); } #endif MAKE_1V_2V(powr, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(powr, s::cl_double, s::cl_double, s::cl_double) @@ -1183,16 +1558,10 @@ MAKE_1V(rint, s::cl_half, s::cl_half) #endif // rootn -cl_float rootn(s::cl_float x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} -cl_double rootn(s::cl_double x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} +cl_float rootn(s::cl_float x, s::cl_int y) __NOEXC { return __rootn(x, y); } +cl_double rootn(s::cl_double x, s::cl_int y) __NOEXC { return __rootn(x, y); } #ifndef NO_HALF_ENABLED -cl_half rootn(s::cl_half x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} +cl_half rootn(s::cl_half x, s::cl_int y) __NOEXC { return __rootn(x, y); } #endif MAKE_1V_2V(rootn, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(rootn, s::cl_double, s::cl_double, s::cl_int) @@ -1213,10 +1582,10 @@ MAKE_1V(round, s::cl_half, s::cl_half) #endif // rsqrt -cl_float rsqrt(s::cl_float x) __NOEXC { return 1.0 / std::sqrt(x); } -cl_double rsqrt(s::cl_double x) __NOEXC { return 1.0 / std::sqrt(x); } +cl_float rsqrt(s::cl_float x) __NOEXC { return __rsqrt(x); } +cl_double rsqrt(s::cl_double x) __NOEXC { return __rsqrt(x); } #ifndef NO_HALF_ENABLED -cl_half rsqrt(s::cl_half x) __NOEXC { return 1.0 / std::sqrt(x); } +cl_half rsqrt(s::cl_half x) __NOEXC { return __rsqrt(x); } #endif MAKE_1V(rsqrt, s::cl_float, s::cl_float) MAKE_1V(rsqrt, s::cl_double, s::cl_double) @@ -1238,17 +1607,14 @@ MAKE_1V(sin, s::cl_half, s::cl_half) // sincos cl_float sincos(s::cl_float x, s::cl_float *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } cl_double sincos(s::cl_double x, s::cl_double *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } #ifndef NO_HALF_ENABLED cl_half sincos(s::cl_half x, s::cl_half *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } #endif MAKE_1V_2P(sincos, s::cl_float, s::cl_float, s::cl_float) @@ -1270,10 +1636,10 @@ MAKE_1V(sinh, s::cl_half, s::cl_half) #endif // sinpi -cl_float sinpi(s::cl_float x) __NOEXC { return std::sin(M_PI * x); } -cl_double sinpi(s::cl_double x) __NOEXC { return std::sin(M_PI * x); } +cl_float sinpi(s::cl_float x) __NOEXC { return __sinpi(x); } +cl_double sinpi(s::cl_double x) __NOEXC { return __sinpi(x); } #ifndef NO_HALF_ENABLED -cl_half sinpi(s::cl_half x) __NOEXC { return std::sin(M_PI * x); } +cl_half sinpi(s::cl_half x) __NOEXC { return __sinpi(x); } #endif MAKE_1V(sinpi, s::cl_float, s::cl_float) MAKE_1V(sinpi, s::cl_double, s::cl_double) @@ -1318,10 +1684,10 @@ MAKE_1V(tanh, s::cl_half, s::cl_half) #endif // tanpi -cl_float tanpi(s::cl_float x) __NOEXC { return std::tan(M_PI * x); } -cl_double tanpi(s::cl_double x) __NOEXC { return std::tan(M_PI * x); } +cl_float tanpi(s::cl_float x) __NOEXC { return __tanpi(x); } +cl_double tanpi(s::cl_double x) __NOEXC { return __tanpi(x); } #ifndef NO_HALF_ENABLED -cl_half tanpi(s::cl_half x) __NOEXC { return std::tan(M_PI * x); } +cl_half tanpi(s::cl_half x) __NOEXC { return __tanpi(x); } #endif MAKE_1V(tanpi, s::cl_float, s::cl_float) MAKE_1V(tanpi, s::cl_double, s::cl_double) @@ -1355,11 +1721,11 @@ MAKE_1V(trunc, s::cl_half, s::cl_half) /* --------------- 4.13.4 Integer functions. Host version -------------------*/ // u_abs -cl_uchar u_abs(s::cl_uchar x) __NOEXC { return std::abs(x); } -cl_ushort u_abs(s::cl_ushort x) __NOEXC { return std::abs(x); } -cl_uint u_abs(s::cl_uint x) __NOEXC { return std::abs(x); } -cl_ulong u_abs(s::cl_ulong x) __NOEXC { return std::abs(x); } -s::ulonglong u_abs(s::ulonglong x) __NOEXC { return std::abs(x); } +cl_uchar u_abs(s::cl_uchar x) __NOEXC { return x; } +cl_ushort u_abs(s::cl_ushort x) __NOEXC { return x; } +cl_uint u_abs(s::cl_uint x) __NOEXC { return x; } +cl_ulong u_abs(s::cl_ulong x) __NOEXC { return x; } +s::ulonglong u_abs(s::ulonglong x) __NOEXC { return x; } MAKE_1V(u_abs, s::cl_uchar, s::cl_uchar) MAKE_1V(u_abs, s::cl_ushort, s::cl_ushort) MAKE_1V(u_abs, s::cl_uint, s::cl_uint) @@ -1379,20 +1745,12 @@ MAKE_1V(s_abs, s::cl_ulong, s::cl_long) MAKE_1V(s_abs, s::ulonglong, s::longlong) // u_abs_diff -cl_uchar u_abs_diff(s::cl_uchar x, s::cl_uchar y) __NOEXC { - return std::abs(x - y); -} -cl_ushort u_abs_diff(s::cl_ushort x, s::cl_ushort y) __NOEXC { - return std::abs(x - y); -} -cl_uint u_abs_diff(s::cl_uint x, s::cl_uint y) __NOEXC { - return std::abs(x - y); -} -cl_ulong u_abs_diff(s::cl_ulong x, s::cl_ulong y) __NOEXC { - return std::abs(x - y); -} +cl_uchar u_abs_diff(s::cl_uchar x, s::cl_uchar y) __NOEXC { return x - y; } +cl_ushort u_abs_diff(s::cl_ushort x, s::cl_ushort y) __NOEXC { return x - y; } +cl_uint u_abs_diff(s::cl_uint x, s::cl_uint y) __NOEXC { return x - y; } +cl_ulong u_abs_diff(s::cl_ulong x, s::cl_ulong y) __NOEXC { return x - y; } s::ulonglong u_abs_diff(s::ulonglong x, s::ulonglong y) __NOEXC { - return std::abs(x - y); + return x - y; } MAKE_1V_2V(u_abs_diff, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_abs_diff, s::cl_ushort, s::cl_ushort, s::cl_ushort) @@ -1402,17 +1760,19 @@ MAKE_1V_2V(u_abs_diff, s::ulonglong, s::ulonglong, s::ulonglong) // s_abs_diff cl_uchar s_abs_diff(s::cl_char x, s::cl_char y) __NOEXC { - return std::abs(x - y); + return __abs_diff(x, y); } cl_ushort s_abs_diff(s::cl_short x, s::cl_short y) __NOEXC { - return std::abs(x - y); + return __abs_diff(x, y); +} +cl_uint s_abs_diff(s::cl_int x, s::cl_int y) __NOEXC { + return __abs_diff(x, y); } -cl_uint s_abs_diff(s::cl_int x, s::cl_int y) __NOEXC { return std::abs(x - y); } cl_ulong s_abs_diff(s::cl_long x, s::cl_long y) __NOEXC { - return std::abs(x - y); + return __abs_diff(x, y); } s::ulonglong s_abs_diff(s::longlong x, s::longlong y) __NOEXC { - return std::abs(x - y); + return __abs_diff(x, y); } MAKE_1V_2V(s_abs_diff, s::cl_uchar, s::cl_char, s::cl_char) MAKE_1V_2V(s_abs_diff, s::cl_ushort, s::cl_short, s::cl_short) @@ -1421,10 +1781,6 @@ MAKE_1V_2V(s_abs_diff, s::cl_ulong, s::cl_long, s::cl_long) MAKE_1V_2V(s_abs_diff, s::ulonglong, s::longlong, s::longlong) // u_add_sat -template T __u_add_sat(T x, T y) { - return (x < (d::max_v() - y) ? x + y : d::max_v()); -} - cl_uchar u_add_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __u_add_sat(x, y); } @@ -1447,14 +1803,6 @@ MAKE_1V_2V(u_add_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_1V_2V(u_add_sat, s::ulonglong, s::ulonglong, s::ulonglong) // s_add_sat -template T __s_add_sat(T x, T y) { - if (x > 0 && y > 0) - return (x < (d::max_v() - y) ? (x + y) : d::max_v()); - if (x < 0 && y < 0) - return (x > (d::min_v() - y) ? (x + y) : d::min_v()); - return x + y; -} - cl_char s_add_sat(s::cl_char x, s::cl_char y) __NOEXC { return __s_add_sat(x, y); } @@ -1475,11 +1823,6 @@ MAKE_1V_2V(s_add_sat, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V(s_add_sat, s::longlong, s::longlong, s::longlong) // u_hadd -template T __hadd(T x, T y) { - const T one = 1; - return (x >> one) + (y >> one) + ((y & x) & one); -} - cl_uchar u_hadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __hadd(x, y); } cl_ushort u_hadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { return __hadd(x, y); @@ -1510,11 +1853,6 @@ MAKE_1V_2V(s_hadd, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V(s_hadd, s::longlong, s::longlong, s::longlong) // u_rhadd -template T __rhadd(T x, T y) { - const T one = 1; - return (x >> one) + (y >> one) + ((y | x) & one); -} - cl_uchar u_rhadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rhadd(x, y); } cl_ushort u_rhadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { return __rhadd(x, y); @@ -1545,10 +1883,6 @@ MAKE_1V_2V(s_rhadd, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V(s_rhadd, s::longlong, s::longlong, s::longlong) // u_clamp -template T __clamp(T x, T minval, T maxval) { - return std::min(std::max(x, minval), maxval); -} - cl_uchar u_clamp(s::cl_uchar x, s::cl_uchar minval, s::cl_uchar maxval) __NOEXC { return __clamp(x, minval, maxval); @@ -1597,7 +1931,6 @@ s::longlong s_clamp(s::longlong x, s::longlong minval, s::longlong maxval) __NOEXC { return __clamp(x, minval, maxval); } - MAKE_1V_2V_3V(s_clamp, s::cl_char, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V_3V(s_clamp, s::cl_short, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V_3V(s_clamp, s::cl_int, s::cl_int, s::cl_int, s::cl_int) @@ -1610,15 +1943,6 @@ MAKE_1V_2S_3S(s_clamp, s::cl_long, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2S_3S(s_clamp, s::longlong, s::longlong, s::longlong, s::longlong) // clz -template inline constexpr T __clz_impl(T x, T m, T n = 0) { - return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n); -} - -template inline constexpr T __clz(T x) { - using UT = typename std::make_unsigned::type; - return (x == T(0)) ? sizeof(T) * 8 : __clz_impl(x, d::msbMask(x)); -} - cl_uchar clz(s::cl_uchar x) __NOEXC { return __clz(x); } cl_char clz(s::cl_char x) __NOEXC { return __clz(x); } cl_ushort clz(s::cl_ushort x) __NOEXC { return __clz(x); } @@ -1638,42 +1962,9 @@ MAKE_1V(clz, s::cl_int, s::cl_int) MAKE_1V(clz, s::cl_ulong, s::cl_ulong) MAKE_1V(clz, s::cl_long, s::cl_long) MAKE_1V(clz, s::longlong, s::longlong) -MAKE_1V(clz, s::ulonglong, s::ulonglong) - -// s_mul_hi -template T __mul_hi(T a, T b) { - using UPT = typename d::make_upper::type; - UPT a_s = a; - UPT b_s = b; - UPT mul = a_s * b_s; - return (mul >> (sizeof(T) * 8)); -} - -// T is minimum of 64 bits- long or longlong -template T __long_mul_hi(T a, T b) { - int halfsize = (sizeof(T) * 8) / 2; - T a1 = a >> halfsize; - T a0 = (a << halfsize) >> halfsize; - T b1 = b >> halfsize; - T b0 = (b << halfsize) >> halfsize; - - // a1b1 - for bits - [64-128) - // a1b0 a0b1 for bits - [32-96) - // a0b0 for bits - [0-64) - T a1b1 = a1 * b1; - T a0b1 = a0 * b1; - T a1b0 = a1 * b0; - T a0b0 = a0 * b0; - - // To get the upper 64 bits: - // 64 bits from a1b1, upper 32 bits from [a1b0 + (a0b1 + a0b0>>32 (carry bit - // in 33rd bit))] with carry bit on 64th bit - use of hadd. Add the a1b1 to - // the above 32 bit result. - T result = - a1b1 + (__hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1)); - return result; -} +MAKE_1V(clz, s::ulonglong, s::ulonglong) +// s_mul_hi cl_char s_mul_hi(cl_char a, cl_char b) { return __mul_hi(a, b); } cl_short s_mul_hi(cl_short a, cl_short b) { return __mul_hi(a, b); } cl_int s_mul_hi(cl_int a, cl_int b) { return __mul_hi(a, b); } @@ -1683,7 +1974,6 @@ cl_long s_mul_hi(s::cl_long x, s::cl_long y) __NOEXC { s::longlong s_mul_hi(s::longlong x, s::longlong y) __NOEXC { return __long_mul_hi(x, y); } - MAKE_1V_2V(s_mul_hi, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V(s_mul_hi, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V(s_mul_hi, s::cl_int, s::cl_int, s::cl_int) @@ -1700,7 +1990,6 @@ cl_ulong u_mul_hi(s::cl_ulong x, s::cl_ulong y) __NOEXC { s::ulonglong u_mul_hi(s::ulonglong x, s::ulonglong y) __NOEXC { return __long_mul_hi(x, y); } - MAKE_1V_2V(u_mul_hi, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_mul_hi, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_mul_hi, s::cl_uint, s::cl_uint, s::cl_uint) @@ -1708,11 +1997,6 @@ MAKE_1V_2V(u_mul_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_1V_2V(u_mul_hi, s::ulonglong, s::ulonglong, s::ulonglong) // s_mad_hi -template T __mad_hi(T a, T b, T c) { return __mul_hi(a, b) + c; } -template T __long_mad_hi(T a, T b, T c) { - return __long_mul_hi(a, b) + c; -} - cl_char s_mad_hi(s::cl_char x, s::cl_char minval, s::cl_char maxval) __NOEXC { return __mad_hi(x, minval, maxval); } @@ -1730,7 +2014,6 @@ s::longlong s_mad_hi(s::longlong x, s::longlong minval, s::longlong maxval) __NOEXC { return __long_mad_hi(x, minval, maxval); } - MAKE_1V_2V_3V(s_mad_hi, s::cl_char, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V_3V(s_mad_hi, s::cl_short, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V_3V(s_mad_hi, s::cl_int, s::cl_int, s::cl_int, s::cl_int) @@ -1764,28 +2047,6 @@ MAKE_1V_2V_3V(u_mad_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_1V_2V_3V(u_mad_hi, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) // s_mad_sat -template T __s_mad_sat(T a, T b, T c) { - using UPT = typename d::make_upper::type; - UPT mul = UPT(a) * UPT(b); - const UPT max = d::max_v(); - const UPT min = d::min_v(); - mul = std::min(std::max(mul, min), max); - return s_add_sat(T(mul), c); -} - -template T __s_long_mad_sat(T a, T b, T c) { - bool neg_prod = (a < 0) ^ (b < 0); - T mulhi = s_mul_hi(a, b); - // check mul_hi. If it is any value != 0. - // if prod is +ve, any value in mulhi means we need to saturate. - // if prod is -ve, any value in mulhi besides -1 means we need to saturate. - if (!neg_prod && mulhi != 0) - return d::max_v(); - else if (neg_prod && mulhi != -1) - return d::max_v(); // essentially some other negative value. - return s_add_sat(T(a * b), c); -} - cl_char s_mad_sat(s::cl_char a, s::cl_char b, s::cl_char c) __NOEXC { return __s_mad_sat(a, b, c); } @@ -1808,24 +2069,6 @@ MAKE_1V_2V_3V(s_mad_sat, s::cl_long, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V_3V(s_mad_sat, s::longlong, s::longlong, s::longlong, s::longlong) // u_mad_sat -template T __u_mad_sat(T a, T b, T c) { - using UPT = typename d::make_upper::type; - UPT mul = UPT(a) * UPT(b); - const UPT min = d::min_v(); - const UPT max = d::max_v(); - mul = std::min(std::max(mul, min), max); - return u_add_sat(T(mul), c); -} - -template T __u_long_mad_sat(T a, T b, T c) { - T mulhi = u_mul_hi(a, b); - // check mul_hi. If it is any value != 0. - if (mulhi != 0) - return d::max_v(); - else - return u_add_sat(T(a * b), c); -} - cl_uchar u_mad_sat(s::cl_uchar a, s::cl_uchar b, s::cl_uchar c) __NOEXC { return __u_mad_sat(a, b, c); } @@ -1928,11 +2171,6 @@ MAKE_1V_2S(u_min, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_1V_2S(u_min, s::ulonglong, s::ulonglong, s::ulonglong) // rotate -template T __rotate(T x, T n) { - using UT = typename std::make_unsigned::type; - return (x << n) | (UT(x) >> ((sizeof(x) * 8) - n)); -} - cl_uchar rotate(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rotate(x, y); } cl_ushort rotate(s::cl_ushort x, s::cl_ushort y) __NOEXC { return __rotate(x, y); @@ -1961,10 +2199,6 @@ MAKE_1V_2V(rotate, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V(rotate, s::longlong, s::longlong, s::longlong) // u_sub_sat -template T __u_sub_sat(T x, T y) { - return (y < (x - d::min_v())) ? (x - y) : d::min_v(); -} - cl_uchar u_sub_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __u_sub_sat(x, y); } @@ -1987,15 +2221,6 @@ MAKE_1V_2V(u_sub_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_1V_2V(u_sub_sat, s::ulonglong, s::ulonglong, s::ulonglong) // s_sub_sat -template T __s_sub_sat(T x, T y) { - if (y > 0) - return (y < (x - d::min_v()) ? x - y : d::min_v()); - else if (y < 0) - return (y > (x - d::max_v()) ? x - y : d::max_v()); - else - return x; -} - cl_char s_sub_sat(s::cl_char x, s::cl_char y) __NOEXC { return __s_sub_sat(x, y); } @@ -2016,12 +2241,6 @@ MAKE_1V_2V(s_sub_sat, s::cl_long, s::cl_long, s::cl_long) MAKE_1V_2V(s_sub_sat, s::longlong, s::longlong, s::longlong) // u_upsample -template -typename d::make_upper::type __upsample(T1 hi, T2 lo) { - using UT = typename d::make_upper::type; - return (UT(hi) << (sizeof(T1) * 8)) | lo; -} - cl_ushort u_upsample(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __upsample(x, y); } @@ -2048,22 +2267,13 @@ cl_int s_upsample(s::cl_short x, s::cl_ushort y) __NOEXC { cl_long s_upsample(s::cl_int x, s::cl_uint y) __NOEXC { return __upsample(x, y); } -MAKE_1V_2V(u_upsample, s::cl_short, s::cl_char, s::cl_uchar) -MAKE_1V_2V(u_upsample, s::cl_int, s::cl_short, s::cl_ushort) -MAKE_1V_2V(u_upsample, s::cl_long, s::cl_int, s::cl_uint) +MAKE_1V_2V(s_upsample, s::cl_short, s::cl_char, s::cl_uchar) +MAKE_1V_2V(s_upsample, s::cl_int, s::cl_short, s::cl_ushort) +MAKE_1V_2V(s_upsample, s::cl_long, s::cl_int, s::cl_uint) #undef s_upsample // popcount -template inline constexpr T __popcount_impl(T x, size_t n = 0) { - return (x == T(0)) ? n : __popcount_impl(x >> 1, ((x & T(1)) ? ++n : n)); -} - -template inline constexpr T __popcount(T x) { - using UT = typename d::make_unsigned::type; - return __popcount_impl(UT(x)); -} - cl_uchar popcount(s::cl_uchar x) __NOEXC { return __popcount(x); } cl_ushort popcount(s::cl_ushort x) __NOEXC { return __popcount(x); } cl_uint popcount(s::cl_uint x) __NOEXC { return __popcount(x); } @@ -2087,8 +2297,6 @@ MAKE_1V(popcount, s::cl_long, s::cl_long) MAKE_1V(popcount, s::longlong, s::longlong) // u_mad24 -template T __mad24(T x, T y, T z) { return (x * y) + z; } - cl_uint u_mad24(s::cl_uint x, s::cl_uint y, s::cl_uint z) __NOEXC { return __mad24(x, y, z); } @@ -2098,33 +2306,28 @@ MAKE_1V_2V_3V(u_mad24, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) cl_int s_mad24(s::cl_int x, s::cl_int y, s::cl_int z) __NOEXC { return __mad24(x, y, z); } - MAKE_1V_2V_3V(s_mad24, s::cl_int, s::cl_int, s::cl_int, s::cl_int) // u_mul24 -template T __mul24(T x, T y) { return (x * y); } - cl_uint u_mul24(s::cl_uint x, s::cl_uint y) __NOEXC { return __mul24(x, y); } - MAKE_1V_2V(u_mul24, s::cl_uint, s::cl_uint, s::cl_uint) // s_mul24 cl_int s_mul24(s::cl_int x, s::cl_int y) __NOEXC { return __mul24(x, y); } - MAKE_1V_2V(s_mul24, s::cl_int, s::cl_int, s::cl_int) /* --------------- 4.13.5 Common functions. Host version --------------------*/ // fclamp cl_float fclamp(s::cl_float x, s::cl_float minval, s::cl_float maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } cl_double fclamp(s::cl_double x, s::cl_double minval, s::cl_double maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } #ifndef NO_HALF_ENABLED cl_half fclamp(s::cl_half x, s::cl_half minval, s::cl_half maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } #endif MAKE_1V_2V_3V(fclamp, s::cl_float, s::cl_float, s::cl_float, s::cl_float) @@ -2134,12 +2337,10 @@ MAKE_1V_2V_3V(fclamp, s::cl_half, s::cl_half, s::cl_half, s::cl_half) #endif // degrees -cl_float degrees(s::cl_float radians) __NOEXC { return (180 / M_PI) * radians; } -cl_double degrees(s::cl_double radians) __NOEXC { - return (180 / M_PI) * radians; -} +cl_float degrees(s::cl_float radians) __NOEXC { return __degrees(radians); } +cl_double degrees(s::cl_double radians) __NOEXC { return __degrees(radians); } #ifndef NO_HALF_ENABLED -cl_half degrees(s::cl_half radians) __NOEXC { return (180 / M_PI) * radians; } +cl_half degrees(s::cl_half radians) __NOEXC { return __degrees(radians); } #endif MAKE_1V(degrees, s::cl_float, s::cl_float) MAKE_1V(degrees, s::cl_double, s::cl_double) @@ -2185,14 +2386,14 @@ MAKE_1V_2V(fmax_common, s::cl_half, s::cl_half, s::cl_half) // mix cl_float mix(s::cl_float x, s::cl_float y, s::cl_float a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } cl_double mix(s::cl_double x, s::cl_double y, s::cl_double a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } #ifndef NO_HALF_ENABLED cl_half mix(s::cl_half x, s::cl_half y, s::cl_half a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } #endif MAKE_1V_2V_3V(mix, s::cl_float, s::cl_float, s::cl_float, s::cl_float) @@ -2202,12 +2403,10 @@ MAKE_1V_2V_3V(mix, s::cl_half, s::cl_half, s::cl_half, s::cl_half) #endif // radians -cl_float radians(s::cl_float degrees) __NOEXC { return (M_PI / 180) * degrees; } -cl_double radians(s::cl_double degrees) __NOEXC { - return (M_PI / 180) * degrees; -} +cl_float radians(s::cl_float degrees) __NOEXC { return __radians(degrees); } +cl_double radians(s::cl_double degrees) __NOEXC { return __radians(degrees); } #ifndef NO_HALF_ENABLED -cl_half radians(s::cl_half degrees) __NOEXC { return (M_PI / 180) * degrees; } +cl_half radians(s::cl_half degrees) __NOEXC { return __radians(degrees); } #endif MAKE_1V(radians, s::cl_float, s::cl_float) MAKE_1V(radians, s::cl_double, s::cl_double) @@ -2217,15 +2416,13 @@ MAKE_1V(radians, s::cl_half, s::cl_half) // step cl_float step(s::cl_float edge, s::cl_float x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; + return __step(edge, x); } cl_double step(s::cl_double edge, s::cl_double x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; + return __step(edge, x); } #ifndef NO_HALF_ENABLED -cl_half step(s::cl_half edge, s::cl_half x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; -} +cl_half step(s::cl_half edge, s::cl_half x) __NOEXC { return __step(edge, x); } #endif MAKE_1V_2V(step, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(step, s::cl_double, s::cl_double, s::cl_double) @@ -2236,21 +2433,15 @@ MAKE_1V_2V(step, s::cl_half, s::cl_half, s::cl_half) // fma cl_float smoothstep(s::cl_float edge0, s::cl_float edge1, s::cl_float x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } cl_double smoothstep(s::cl_double edge0, s::cl_double edge1, s::cl_double x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } #ifndef NO_HALF_ENABLED cl_half smoothstep(s::cl_half edge0, s::cl_half edge1, s::cl_half x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } #endif MAKE_1V_2V_3V(smoothstep, s::cl_float, s::cl_float, s::cl_float, s::cl_float) @@ -2261,40 +2452,10 @@ MAKE_1V_2V_3V(smoothstep, s::cl_half, s::cl_half, s::cl_half, s::cl_half) #endif // sign -cl_float sign(s::cl_float x) __NOEXC { - if (std::isnan(x)) { - return 0.0; - } else if (x > 0) { - return 1.0; - } else if (x < 0) { - return -1.0; - } else /* x is +0.0 or -0.0} */ { - return x; - } -} -cl_double sign(s::cl_double x) __NOEXC { - if (std::isnan(x)) { - return 0.0; - } else if (x > 0) { - return 1.0; - } else if (x < 0) { - return -1.0; - } else /* x is +0.0 or -0.0} */ { - return x; - } -} +cl_float sign(s::cl_float x) __NOEXC { return __sign(x); } +cl_double sign(s::cl_double x) __NOEXC { return __sign(x); } #ifndef NO_HALF_ENABLED -cl_half sign(s::cl_half x) __NOEXC { - if (std::isnan(x)) { - return s::cl_half(0.0); - } else if (x > 0) { - return s::cl_half(1.0); - } else if (x < 0) { - return s::cl_half(-1.0); - } else /* x is +0.0 or -0.0} */ { - return x; - } -} +cl_half sign(s::cl_half x) __NOEXC { return __sign(x); } #endif MAKE_1V(sign, s::cl_float, s::cl_float) MAKE_1V(sign, s::cl_double, s::cl_double) @@ -2304,87 +2465,43 @@ MAKE_1V(sign, s::cl_half, s::cl_half) /* --------------- 4.13.6 Geometric Functions. Host version -----------------*/ // cross -#define MAKE_CROSS(r, p0, p1) \ - r.x() = p0.y() * p1.z() - p0.z() * p1.y(); \ - r.y() = p0.z() * p1.x() - p0.x() * p1.z(); \ - r.z() = p0.x() * p1.y() - p0.y() * p1.x(); - s::cl_float3 cross(s::cl_float3 p0, s::cl_float3 p1) __NOEXC { - s::cl_float3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_float4 cross(s::cl_float4 p0, s::cl_float4 p1) __NOEXC { - s::cl_float4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } s::cl_double3 cross(s::cl_double3 p0, s::cl_double3 p1) __NOEXC { - s::cl_double3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_double4 cross(s::cl_double4 p0, s::cl_double4 p1) __NOEXC { - s::cl_double4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } #ifndef NO_HALF_ENABLED s::cl_half3 cross(s::cl_half3 p0, s::cl_half3 p1) __NOEXC { - s::cl_half3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_half4 cross(s::cl_half4 p0, s::cl_half4 p1) __NOEXC { - s::cl_half4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } #endif -#undef MAKE_CROSS // OpFMul -template -typename std::enable_if::value, void>::type -__OpFMul(T &r, T p0, T p1) { - r += p0 * p1; -} - -cl_float OpFMul(s::cl_float p0, s::cl_float p1) { - s::cl_float r = 0; - __OpFMul(r, p0, p1); - return r; -} -cl_double OpFMul(s::cl_double p0, s::cl_double p1) { - s::cl_double r = 0; - __OpFMul(r, p0, p1); - return r; -} +cl_float OpFMul(s::cl_float p0, s::cl_float p1) { return __OpFMul(p0, p1); } +cl_double OpFMul(s::cl_double p0, s::cl_double p1) { return __OpFMul(p0, p1); } #ifndef NO_HALF_ENABLED -cl_float OpFMul(s::cl_half p0, s::cl_half p1) { - s::cl_half r = 0; - __OpFMul(r, p0, p1); - return r; -} +cl_float OpFMul(s::cl_half p0, s::cl_half p1) { return __OpFMul(p0, p1); } #endif + // OpDot -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_float, s::cl_float, s::cl_float) -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_double, s::cl_double, s::cl_double) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_float, s::cl_float, s::cl_float) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_double, s::cl_double, + s::cl_double) #ifndef NO_HALF_ENABLED -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul, s::cl_half, s::cl_half, s::cl_half) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_half, s::cl_half, s::cl_half) #endif // length -template -typename std::enable_if::value, T>::type -__length(T t) { - return std::sqrt(OpFMul(t, t)); -} - -template -typename std::enable_if::value, - typename T::element_type>::type -__length(T t) { - return std::sqrt(OpDot(t, t)); -} - cl_float length(s::cl_float p) { return __length(p); } cl_double length(s::cl_double p) { return __length(p); } #ifndef NO_HALF_ENABLED @@ -2404,13 +2521,10 @@ cl_half length(s::cl_half4 p) { return __length(p); } // distance cl_float distance(s::cl_float p0, s::cl_float p1) { return length(p0 - p1); } -cl_double distance(s::cl_double p0, s::cl_double p1) { return length(p0 - p1); } -#ifndef NO_HALF_ENABLED -cl_half distance(s::cl_half p0, s::cl_half p1) { return length(p0 - p1); } -#endif cl_float distance(s::cl_float2 p0, s::cl_float2 p1) { return length(p0 - p1); } cl_float distance(s::cl_float3 p0, s::cl_float3 p1) { return length(p0 - p1); } cl_float distance(s::cl_float4 p0, s::cl_float4 p1) { return length(p0 - p1); } +cl_double distance(s::cl_double p0, s::cl_double p1) { return length(p0 - p1); } cl_double distance(s::cl_double2 p0, s::cl_double2 p1) { return length(p0 - p1); } @@ -2421,31 +2535,17 @@ cl_double distance(s::cl_double4 p0, s::cl_double4 p1) { return length(p0 - p1); } #ifndef NO_HALF_ENABLED +cl_half distance(s::cl_half p0, s::cl_half p1) { return length(p0 - p1); } cl_half distance(s::cl_half2 p0, s::cl_half2 p1) { return length(p0 - p1); } cl_half distance(s::cl_half3 p0, s::cl_half3 p1) { return length(p0 - p1); } cl_half distance(s::cl_half4 p0, s::cl_half4 p1) { return length(p0 - p1); } #endif // normalize -template -typename std::enable_if::value, T>::type -__normalize(T t) { - T r = length(t); - return t / T(r); -} - -template -typename std::enable_if::value, T>::type -__normalize(T t) { - typename T::element_type r = length(t); - return t / T(r); -} - s::cl_float normalize(s::cl_float p) { return __normalize(p); } s::cl_float2 normalize(s::cl_float2 p) { return __normalize(p); } s::cl_float3 normalize(s::cl_float3 p) { return __normalize(p); } s::cl_float4 normalize(s::cl_float4 p) { return __normalize(p); } - s::cl_double normalize(s::cl_double p) { return __normalize(p); } s::cl_double2 normalize(s::cl_double2 p) { return __normalize(p); } s::cl_double3 normalize(s::cl_double3 p) { return __normalize(p); } @@ -2458,35 +2558,15 @@ s::cl_half4 normalize(s::cl_half4 p) { return __normalize(p); } #endif // fast_length -template -typename std::enable_if::value, - typename T::element_type>::type -__fast_length(T t) { - return std::sqrt(OpDot(t, t)); -} -cl_float fast_length(s::cl_float p) { return std::sqrt(OpFMul(p, p)); } +cl_float fast_length(s::cl_float p) { return __fast_length(p); } cl_float fast_length(s::cl_float2 p) { return __fast_length(p); } cl_float fast_length(s::cl_float3 p) { return __fast_length(p); } cl_float fast_length(s::cl_float4 p) { return __fast_length(p); } // fast_normalize -s::cl_int OpAll(s::cl_int2); -s::cl_int OpAll(s::cl_int3); -s::cl_int OpAll(s::cl_int4); -template -typename std::enable_if::value, T>::type -__fast_normalize(T t) { - if (OpAll(t == T(0.0f))) { - return t; - } - typename T::element_type r = std::sqrt(OpDot(t, t)); - return t / T(r); -} - s::cl_float fast_normalize(s::cl_float p) { - if (p == 0.0f) { + if (p == 0.0f) return p; - } s::cl_float r = std::sqrt(OpFMul(p, p)); return p / r; } @@ -2510,18 +2590,15 @@ cl_float fast_distance(s::cl_float4 p0, s::cl_float4 p1) { /* --------------- 4.13.7 Relational functions. Host version --------------*/ // OpFOrdEqual-isequal -cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { return (x == y); } -cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { return (x == y); } -cl_int __vOpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sOpFOrdEqual(x, y); } -cl_long __vOpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sOpFOrdEqual(x, y); } #ifndef NO_HALF_ENABLED -cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { return (x == y); } -cl_short __vOpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFOrdEqual(x, y); } #endif MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_int, s::cl_float, @@ -2535,21 +2612,14 @@ MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_short, s::cl_half, // OpFUnordNotEqual-isnotequal cl_int OpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x != y); + return __sOpFUnordNotEqual(x, y); } cl_int OpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x != y); -} -cl_int __vOpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x != y); -} -cl_long __vOpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x != y); + return __sOpFUnordNotEqual(x, y); } #ifndef NO_HALF_ENABLED -cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { return (x != y); } -cl_short __vOpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x != y); +cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFUnordNotEqual(x, y); } #endif MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_int, s::cl_float, @@ -2563,21 +2633,14 @@ MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_short, s::cl_half, // (OpFOrdGreaterThan) // isgreater cl_int OpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { - return (x > y); + return __sOpFOrdGreaterThan(x, y); } cl_int OpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { - return (x > y); -} -cl_int __vOpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { - return -(x > y); -} -cl_long __vOpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { - return -(x > y); + return __sOpFOrdGreaterThan(x, y); } #ifndef NO_HALF_ENABLED -cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { return (x > y); } -cl_short __vOpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { - return -(x > y); +cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFOrdGreaterThan(x, y); } #endif MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_int, s::cl_float, @@ -2591,23 +2654,14 @@ MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_short, // (OpFOrdGreaterThanEqual) // isgreaterequal cl_int OpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } cl_int OpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x >= y); -} -cl_int __vOpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x >= y); -} -cl_long __vOpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } #ifndef NO_HALF_ENABLED cl_int OpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return (x >= y); -} -cl_short __vOpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } #endif MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_int, @@ -2647,23 +2701,14 @@ MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_short, s::cl_half, // (OpFOrdLessThanEqual) // islessequal cl_int OpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x <= y); + return __sOpFOrdLessThanEqual(x, y); } cl_int OpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x <= y); -} -cl_int __vOpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x <= y); -} -cl_long __vOpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x <= y); + return __sOpFOrdLessThanEqual(x, y); } #ifndef NO_HALF_ENABLED cl_int OpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return (x <= y); -} -cl_short __vOpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x <= y); + return __sOpFOrdLessThanEqual(x, y); } #endif MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_int, @@ -2677,23 +2722,14 @@ MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_short, // (OpLessOrGreater) // islessgreater cl_int OpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { - return ((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } cl_int OpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { - return ((x < y) || (x > y)); -} -cl_int __vOpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { - return -((x < y) || (x > y)); -} -cl_long __vOpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { - return -((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } #ifndef NO_HALF_ENABLED cl_int OpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { - return ((x < y) || (x > y)); -} -cl_short __vOpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { - return -((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } #endif MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_int, s::cl_float, @@ -2706,12 +2742,12 @@ MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_short, s::cl_half, #endif // (OpIsFinite) // isfinite -cl_int OpIsFinite(s::cl_float x) __NOEXC { return (std::isfinite(x)); } -cl_int OpIsFinite(s::cl_double x) __NOEXC { return (std::isfinite(x)); } +cl_int OpIsFinite(s::cl_float x) __NOEXC { return std::isfinite(x); } +cl_int OpIsFinite(s::cl_double x) __NOEXC { return std::isfinite(x); } cl_int __vOpIsFinite(s::cl_float x) __NOEXC { return -(std::isfinite(x)); } cl_long __vOpIsFinite(s::cl_double x) __NOEXC { return -(std::isfinite(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsFinite(s::cl_half x) __NOEXC { return (std::isfinite(x)); } +cl_int OpIsFinite(s::cl_half x) __NOEXC { return std::isfinite(x); } cl_short __vOpIsFinite(s::cl_half x) __NOEXC { return -(std::isfinite(x)); } #endif MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_int, s::cl_float) @@ -2721,12 +2757,12 @@ MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_short, s::cl_half) #endif // (OpIsInf) // isinf -cl_int OpIsInf(s::cl_float x) __NOEXC { return (std::isinf(x)); } -cl_int OpIsInf(s::cl_double x) __NOEXC { return (std::isinf(x)); } +cl_int OpIsInf(s::cl_float x) __NOEXC { return std::isinf(x); } +cl_int OpIsInf(s::cl_double x) __NOEXC { return std::isinf(x); } cl_int __vOpIsInf(s::cl_float x) __NOEXC { return -(std::isinf(x)); } cl_long __vOpIsInf(s::cl_double x) __NOEXC { return -(std::isinf(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsInf(s::cl_half x) __NOEXC { return (std::isinf(x)); } +cl_int OpIsInf(s::cl_half x) __NOEXC { return std::isinf(x); } cl_short __vOpIsInf(s::cl_half x) __NOEXC { return -(std::isinf(x)); } #endif MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_int, s::cl_float) @@ -2736,13 +2772,13 @@ MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_short, s::cl_half) #endif // (OpIsNan) // isnan -cl_int OpIsNan(s::cl_float x) __NOEXC { return (std::isnan(x)); } -cl_int OpIsNan(s::cl_double x) __NOEXC { return (std::isnan(x)); } +cl_int OpIsNan(s::cl_float x) __NOEXC { return std::isnan(x); } +cl_int OpIsNan(s::cl_double x) __NOEXC { return std::isnan(x); } cl_int __vOpIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } cl_long __vOpIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsNan(s::cl_half x) __NOEXC { return (std::isnan(x)); } +cl_int OpIsNan(s::cl_half x) __NOEXC { return std::isnan(x); } cl_short __vOpIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } #endif MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_int, s::cl_float) @@ -2752,12 +2788,12 @@ MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_short, s::cl_half) #endif // (OpIsNormal) // isnormal -cl_int OpIsNormal(s::cl_float x) __NOEXC { return (std::isnormal(x)); } -cl_int OpIsNormal(s::cl_double x) __NOEXC { return (std::isnormal(x)); } +cl_int OpIsNormal(s::cl_float x) __NOEXC { return std::isnormal(x); } +cl_int OpIsNormal(s::cl_double x) __NOEXC { return std::isnormal(x); } cl_int __vOpIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } cl_long __vOpIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } #ifndef NO_HALF_ENABLED -cl_int OpIsNormal(s::cl_half x) __NOEXC { return (std::isnormal(x)); } +cl_int OpIsNormal(s::cl_half x) __NOEXC { return std::isnormal(x); } cl_short __vOpIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } #endif MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_int, s::cl_float) @@ -2768,23 +2804,14 @@ MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_short, s::cl_half) // (OpOrdered) // isordered cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return !(std::isunordered(x, y)); + return __vOpOrdered(x, y); } cl_int OpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return !(std::isunordered(x, y)); -} -cl_int __vOpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return -(!(std::isunordered(x, y))); -} -cl_long __vOpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return -(!(std::isunordered(x, y))); + return __vOpOrdered(x, y); } #ifndef NO_HALF_ENABLED cl_int OpOrdered(s::cl_half x, s::cl_half y) __NOEXC { - return (OpFOrdEqual(x, x) && OpFOrdEqual(y, y)); -} -cl_short __vOpOrdered(s::cl_half x, s::cl_half y) __NOEXC { - return -((OpFOrdEqual(x, x) && OpFOrdEqual(y, y))); + return __vOpOrdered(x, y); } #endif MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_int, s::cl_float, s::cl_float) @@ -2795,23 +2822,14 @@ MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_short, s::cl_half, s::cl_half) // (OpUnordered) // isunordered cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return std::isunordered(x, y); + return __sOpUnordered(x, y); } cl_int OpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return std::isunordered(x, y); -} -cl_int __vOpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return -(std::isunordered(x, y)); -} -cl_long __vOpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return -(std::isunordered(x, y)); + return __sOpUnordered(x, y); } #ifndef NO_HALF_ENABLED cl_int OpUnordered(s::cl_half x, s::cl_half y) __NOEXC { - return (OpIsNan(x) || OpIsNan(y)); -} -cl_short __vOpUnordered(s::cl_half x, s::cl_half y) __NOEXC { - return -((OpIsNan(x) || OpIsNan(y))); + return __sOpUnordered(x, y); } #endif MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_int, s::cl_float, @@ -2839,71 +2857,22 @@ MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_short, s::cl_half) #endif // (OpAny) // any -template cl_int OpAny(T x) { return sycl::detail::msbIsSet(x); } -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_char) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_short) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_int) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_long) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::longlong) - -// (OpAll) // all -template cl_int OpAll(T x) { return sycl::detail::msbIsSet(x); } -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_char) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_short) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_int) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_long) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::longlong) -// (bitselect) - -template -typename std::enable_if::value, T>::type -__bitselect(T a, T b, T c) { - return ((a & ~c) | (b & c)); -} - -template union databitset; -// float -template <> union databitset { - static_assert(sizeof(uint32_t) == sizeof(float), - "size of float is not equal to 32 bits."); - float f; - uint32_t i; -}; - -// double -template <> union databitset { - static_assert(sizeof(uint64_t) == sizeof(double), - "size of double is not equal to 64 bits."); - double f; - uint64_t i; -}; +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_char) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_short) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_int) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_long) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::longlong) -#ifndef NO_HALF_ENABLED -// Half -template <> union databitset { - static_assert(sizeof(uint16_t) == sizeof(cl_half), - "size of half is not equal to 16 bits."); - cl_half f; - uint16_t i; -}; -#endif +// (OpAll) // all -template -typename std::enable_if::value, T>::type -__bitselect(T a, T b, T c) { - databitset ba; - ba.f = a; - databitset bb; - bb.f = b; - databitset bc; - bc.f = c; - databitset br; - br.f = 0; - br.i = ((ba.i & ~bc.i) | (bb.i & bc.i)); - return br.f; -} +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_char) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_short) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_int) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_long) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::longlong) +// (bitselect) // Instantiate functions for the scalar types and vector types. MAKE_SC_1V_2V_3V(bitselect, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_SC_1V_2V_3V(bitselect, s::cl_double, s::cl_double, s::cl_double, @@ -2927,20 +2896,10 @@ MAKE_SC_1V_2V_3V(bitselect, s::cl_half, s::cl_half, s::cl_half, s::cl_half) // (OpSelect) // select // for scalar: result = c ? b : a. // for vector: result[i] = (MSB of c[i] is set)? b[i] : a[i] - -template T2 __OpSelect(T c, T2 b, T2 a) { - return (c ? b : a); -} - -template T2 __vOpSelect(T c, T2 b, T2 a) { - return sycl::detail::msbIsSet(c) ? b : a; -} - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_int, s::cl_float, s::cl_float) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_uint, s::cl_float, s::cl_float) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_long, s::cl_double, s::cl_double) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_ulong, @@ -2949,57 +2908,46 @@ MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::longlong, s::cl_double, s::cl_double) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::ulonglong, s::cl_double, s::cl_double) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, s::cl_char, s::cl_char) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_uchar, s::cl_char, s::cl_char) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_char, s::cl_uchar, s::cl_uchar) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, s::cl_short, s::cl_short) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_ushort, s::cl_short, s::cl_short) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_short, s::cl_ushort, s::cl_ushort) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, s::cl_int) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_uint, s::cl_int, s::cl_int) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_int, s::cl_uint, s::cl_uint) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, s::cl_long, s::cl_long) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_ulong, s::cl_long, s::cl_long) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_long, s::cl_ulong, s::cl_ulong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, s::longlong, s::longlong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::ulonglong, s::longlong, s::longlong) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::longlong, s::ulonglong, s::ulonglong) - #ifndef NO_HALF_ENABLED MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_short, s::cl_half, s::cl_half) diff --git a/sycl/test/built-ins/vector_relational.cpp b/sycl/test/built-ins/vector_relational.cpp index 528189ed0b885..88a030ae9fcfa 100644 --- a/sycl/test/built-ins/vector_relational.cpp +++ b/sycl/test/built-ins/vector_relational.cpp @@ -6,7 +6,6 @@ #include -#include #include #include @@ -32,8 +31,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == 0); assert(r3 == 0); @@ -59,8 +56,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == -1); @@ -86,8 +81,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -113,8 +106,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -140,8 +131,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -167,8 +156,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -195,8 +182,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -222,8 +207,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -248,8 +231,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == 0); @@ -274,8 +255,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == -1); @@ -300,8 +279,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -327,8 +304,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -354,8 +329,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == -1); @@ -380,8 +353,6 @@ int main() { s::cl_int r3 = r.z(); s::cl_int r4 = r.w(); - std::cout << "sign r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " - << r4 << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -404,7 +375,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "Any r1 " << r1 << std::endl; assert(r1 == 1); } @@ -424,7 +394,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "Any - r1 " << r1 << std::endl; assert(r1 == 1); } @@ -444,7 +413,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "Any 0 r1 " << r1 << std::endl; assert(r1 == 0); } @@ -464,7 +432,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "Any + r1 " << r1 << std::endl; assert(r1 == 0); } @@ -487,7 +454,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "All change r1 " << r1 << std::endl; assert(r1 == 1); } @@ -507,7 +473,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "All - r1 " << r1 << std::endl; assert(r1 == 1); } @@ -527,7 +492,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "All 0 r1 " << r1 << std::endl; assert(r1 == 0); } @@ -547,7 +511,6 @@ int main() { } s::cl_int r1 = r; - std::cout << "All + r1 " << r1 << std::endl; assert(r1 == 0); } @@ -572,8 +535,6 @@ int main() { s::cl_float r3 = r.z(); s::cl_float r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(abs(r1 - 80.5477f) < 0.0001); assert(abs(r2 - 18.2322f) < 0.0001); assert(abs(r3 - 1.0f) < 0.01); @@ -603,8 +564,6 @@ int main() { s::cl_float r3 = r.z(); s::cl_float r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 112.112f); assert(r2 == 112.112f); assert(r3 == 112.112f); From 4f5b299a857fda5b76eaccb51281b4cb4ab9437a Mon Sep 17 00:00:00 2001 From: Alexey Voronov Date: Tue, 16 Apr 2019 19:29:16 +0300 Subject: [PATCH 11/11] [SYCL] Enable built-in functions overloaded for half type. Signed-off-by: Alexey Voronov --- sycl/include/CL/sycl/builtins.hpp | 105 +-- .../CL/sycl/detail/generic_type_traits.hpp | 799 ++++++------------ sycl/source/detail/builtins.cpp | 553 ++---------- 3 files changed, 385 insertions(+), 1072 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 5cad3d983540f..3785e57751fb8 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -17,9 +17,6 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -// TODO Remove when half type will supported by SYCL Runtime -#define __HALF_NO_ENABLED - namespace cl { namespace sycl { #ifdef __SYCL_DEVICE_ONLY__ @@ -260,7 +257,7 @@ fmod(T x, T y) __NOEXC { // genfloat fract (genfloat x, genfloatptr iptr) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type fract(T x, T2 iptr) __NOEXC { return __sycl_std::__invoke_fract(x, iptr); } @@ -268,7 +265,7 @@ fract(T x, T2 iptr) __NOEXC { // genfloat frexp (genfloat x, genintptr exp) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genintptr::value, T>::type + detail::is_genfloat::value && detail::is_genintptr::value, T>::type frexp(T x, T2 exp) __NOEXC { return __sycl_std::__invoke_frexp(x, exp); } @@ -308,7 +305,7 @@ ldexp(T x, int k) __NOEXC { // vgenfloat ldexp (vgenfloat x, genint k) template typename std::enable_if< - detail::is_vgenfloat::value &&detail::is_intn::value, T>::type + detail::is_vgenfloat::value && detail::is_intn::value, T>::type ldexp(T x, T2 k) __NOEXC { return __sycl_std::__invoke_ldexp(x, k); } @@ -323,7 +320,7 @@ lgamma(T x) __NOEXC { // genfloat lgamma_r (genfloat x, genintptr signp) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genintptr::value, T>::type + detail::is_genfloat::value && detail::is_genintptr::value, T>::type lgamma_r(T x, T2 signp) __NOEXC { return __sycl_std::__invoke_lgamma_r(x, signp); } @@ -387,7 +384,7 @@ minmag(T x, T y) __NOEXC { // genfloat modf (genfloat x, genfloatptr iptr) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type modf(T x, T2 iptr) __NOEXC { return __sycl_std::__invoke_modf(x, iptr); } @@ -420,7 +417,7 @@ pow(T x, T y) __NOEXC { // genfloat pown (genfloat x, genint y) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genint::value, T>::type + detail::is_genfloat::value && detail::is_genint::value, T>::type pown(T x, T2 y) __NOEXC { return __sycl_std::__invoke_pown(x, y); } @@ -442,7 +439,7 @@ remainder(T x, T y) __NOEXC { // genfloat remquo (genfloat x, genfloat y, genintptr quo) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genintptr::value, T>::type + detail::is_genfloat::value && detail::is_genintptr::value, T>::type remquo(T x, T y, T2 quo) __NOEXC { return __sycl_std::__invoke_remquo(x, y, quo); } @@ -457,7 +454,7 @@ rint(T x) __NOEXC { // genfloat rootn (genfloat x, genint y) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genint::value, T>::type + detail::is_genfloat::value && detail::is_genint::value, T>::type rootn(T x, T2 y) __NOEXC { return __sycl_std::__invoke_rootn(x, y); } @@ -486,7 +483,7 @@ sin(T x) __NOEXC { // genfloat sincos (genfloat x, genfloatptr cosval) template typename std::enable_if< - detail::is_genfloat::value &&detail::is_genfloatptr::value, T>::type + detail::is_genfloat::value && detail::is_genfloatptr::value, T>::type sincos(T x, T2 cosval) __NOEXC { return __sycl_std::__invoke_sincos(x, cosval); } @@ -1042,7 +1039,6 @@ dot(T p0, T p1) __NOEXC { return __sycl_std::__invoke_OpDot(p0, p1); } -#ifndef __HALF_NO_ENABLED // half dot (vgengeohalf p0, vgengeohalf p1) template typename std::enable_if::value, @@ -1050,7 +1046,6 @@ typename std::enable_if::value, dot(T p0, T p1) __NOEXC { return __sycl_std::__invoke_OpDot(p0, p1); } -#endif // float distance (gengeofloat p0, gengeofloat p1) template (p0, p1); } -#ifndef __HALF_NO_ENABLED // half distance (gengeohalf p0, gengeohalf p1) template ::value, T>::type> cl::sycl::cl_half distance(T p0, T p1) __NOEXC { return __sycl_std::__invoke_distance(p0, p1); } -#endif // float length (gengeofloat p) template (p); } -#ifndef __HALF_NO_ENABLED // half length (gengeohalf p) template ::value, T>::type> cl::sycl::cl_half length(T p) __NOEXC { return __sycl_std::__invoke_length(p); } -#endif // gengeofloat normalize (gengeofloat p) template @@ -1112,14 +1103,12 @@ normalize(T p) __NOEXC { return __sycl_std::__invoke_normalize(p); } -#ifndef __HALF_NO_ENABLED // gengeohalf normalize (gengeohalf p) template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { return __sycl_std::__invoke_normalize(p); } -#endif // float fast_distance (gengeofloat p0, gengeofloat p1) template ::value, T>::type> detail::common_rel_ret_t isequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdEqual >(x, y)); + __sycl_std::__invoke_OpFOrdEqual>(x, y)); } // int isnotequal (half x, half y) @@ -1185,7 +1174,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFUnordNotEqual >(x, y)); + __sycl_std::__invoke_OpFUnordNotEqual>(x, y)); } // int isgreater (half x, half y) @@ -1197,7 +1186,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThan >(x, y)); + __sycl_std::__invoke_OpFOrdGreaterThan>(x, y)); } // int isgreaterequal (half x, half y) @@ -1209,7 +1198,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThanEqual >(x, y)); + __sycl_std::__invoke_OpFOrdGreaterThanEqual>(x, y)); } // int isless (half x, half y) @@ -1221,7 +1210,7 @@ template ::value, T>::type> detail::common_rel_ret_t isless(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThan >(x, y)); + __sycl_std::__invoke_OpFOrdLessThan>(x, y)); } // int islessequal (half x, half y) @@ -1233,7 +1222,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThanEqual >(x, y)); + __sycl_std::__invoke_OpFOrdLessThanEqual>(x, y)); } // int islessgreater (half x, half y) @@ -1245,7 +1234,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpLessOrGreater >(x, y)); + __sycl_std::__invoke_OpLessOrGreater>(x, y)); } // int isfinite (half x) @@ -1257,7 +1246,7 @@ template ::value, T>::type> detail::common_rel_ret_t isfinite(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsFinite >(x)); + __sycl_std::__invoke_OpIsFinite>(x)); } // int isinf (half x) @@ -1269,7 +1258,7 @@ template ::value, T>::type> detail::common_rel_ret_t isinf(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsInf >(x)); + __sycl_std::__invoke_OpIsInf>(x)); } // int isnan (half x) @@ -1281,7 +1270,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnan(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNan >(x)); + __sycl_std::__invoke_OpIsNan>(x)); } // int isnormal (half x) @@ -1293,7 +1282,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnormal(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNormal >(x)); + __sycl_std::__invoke_OpIsNormal>(x)); } // int isordered (half x) @@ -1305,7 +1294,7 @@ template ::value, T>::type> detail::common_rel_ret_t isordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpOrdered >(x, y)); + __sycl_std::__invoke_OpOrdered>(x, y)); } // int isunordered (half x, half y) @@ -1317,7 +1306,7 @@ template ::value, T>::type> detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpUnordered >(x, y)); + __sycl_std::__invoke_OpUnordered>(x, y)); } // int signbit (half x) @@ -1329,7 +1318,7 @@ template ::value, T>::type> detail::common_rel_ret_t signbit(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpSignBitSet >(x)); + __sycl_std::__invoke_OpSignBitSet>(x)); } // int any (sigeninteger x) @@ -1346,7 +1335,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type any(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAny >( + __sycl_std::__invoke_OpAny>( detail::rel_sign_bit_test_arg_t(x))); } @@ -1364,7 +1353,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type all(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAll >( + __sycl_std::__invoke_OpAll>( detail::rel_sign_bit_test_arg_t(x))); } @@ -1377,18 +1366,18 @@ bitselect(T a, T b, T c) __NOEXC { // geninteger select (geninteger a, geninteger b, igeninteger c) template -typename std::enable_if< - detail::is_geninteger::value &&detail::is_igeninteger::value, - T>::type +typename std::enable_if::value && + detail::is_igeninteger::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // geninteger select (geninteger a, geninteger b, ugeninteger c) template -typename std::enable_if< - detail::is_geninteger::value &&detail::is_ugeninteger::value, - T>::type +typename std::enable_if::value && + detail::is_ugeninteger::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } @@ -1396,7 +1385,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, genint c) template typename std::enable_if< - detail::is_genfloatf::value &&detail::is_genint::value, T>::type + detail::is_genfloatf::value && detail::is_genint::value, T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } @@ -1404,48 +1393,46 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, ugenint c) template typename std::enable_if< - detail::is_genfloatf::value &&detail::is_ugenint::value, T>::type + detail::is_genfloatf::value && detail::is_ugenint::value, T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) template -typename std::enable_if< - detail::is_genfloatd::value &&detail::is_igeninteger64bit::value, - T>::type +typename std::enable_if::value && + detail::is_igeninteger64bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) template -typename std::enable_if< - detail::is_genfloatd::value &&detail::is_ugeninteger64bit::value, - T>::type +typename std::enable_if::value && + detail::is_ugeninteger64bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -#ifndef __HALF_NO_ENABLED // genfloath select (genfloath a, genfloath b, igeninteger16 c) template -typename std::enable_if< - detail::is_genfloath::value &&detail::is_igeninteger16bit::value, - T>::type +typename std::enable_if::value && + detail::is_igeninteger16bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloath select (genfloath a, genfloath b, ugeninteger16 c) template -typename std::enable_if< - detail::is_genfloath::value &&detail::is_ugeninteger16bit::value, - T>::type +typename std::enable_if::value && + detail::is_ugeninteger16bit::value, + T>::type select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -#endif namespace native { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ @@ -1652,6 +1639,4 @@ tan(T x) __NOEXC { } // namespace sycl } // namespace cl -#undef __HALF_NO_ENABLED #undef __NOEXC -#undef __DEVICE_SIDE diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 23e11d8454c80..aae3f16413252 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -11,11 +11,8 @@ #include #include -#include #include - -// TODO Delete when half type will supported by SYCL Runtime -#define __HALF_NO_ENABLED +#include namespace cl { namespace sycl { @@ -35,281 +32,260 @@ struct is_contained : std::conditional::type, typename TL::head>::value, std::true_type, - is_contained >::type {}; + is_contained>::type {}; -template struct is_contained > : std::false_type {}; +template struct is_contained> : std::false_type {}; // floatn: float2, float3, float4, float8, float16 template -using is_floatn = - typename is_contained >::type; +using is_floatn = typename is_contained< + T, type_list>::type; // genfloatf: float, floatn template -using is_genfloatf = std::integral_constant< - bool, is_contained >::value || is_floatn::value>; +using is_genfloatf = + std::integral_constant>::value || + is_floatn::value>; // doublen: double2, double3, double4, double8, double16 template using is_doublen = typename is_contained >::type; + cl_double8, cl_double16>>::type; // genfloatd: double, doublen template -using is_genfloatd = std::integral_constant< - bool, - is_contained >::value || is_doublen::value>; +using is_genfloatd = + std::integral_constant>::value || + is_doublen::value>; -#ifndef __HALF_NO_ENABLED // halfn: half2, half3, half4, half8, half16 template using is_halfn = typename is_contained< - T, type_list >::type; + T, type_list>::type; // genfloath: half, halfn template -using is_genfloath = std::integral_constant< - bool, is_contained >::value || is_halfn::value>; -#endif +using is_genfloath = + std::integral_constant>::value || + is_halfn::value>; // genfloat: genfloatf, genfloatd, genfloath template using is_genfloat = std::integral_constant::value || - is_genfloatd::value -#ifndef __HALF_NO_ENABLED - || is_genfloath::value -#endif - >; + is_genfloatd::value || + is_genfloath::value>; // sgenfloat: float, double, half template -using is_sgenfloat = typename is_contained >::type; +using is_sgenfloat = + typename is_contained>::type; // vgenfloat: floatn, doublen, halfn template using is_vgenfloat = - std::integral_constant::value || is_doublen::value -#ifndef __HALF_NO_ENABLED - || is_halfn::value -#endif - >; + std::integral_constant::value || is_doublen::value || + is_halfn::value>; // gengeofloat: float, float2, float3, float4 template using is_gengeofloat = typename is_contained< - T, type_list >::type; + T, type_list>::type; // gengeodouble: double, double2, double3, double4 template using is_gengeodouble = typename is_contained< - T, type_list >::type; + T, type_list>::type; -#ifndef __HALF_NO_ENABLED // gengeohalf: half, half2, half3, half4 template using is_gengeohalf = typename is_contained< - T, type_list >::type; -#endif + T, type_list>::type; // gengeofloat: float, float2, float3, float4 template using is_vgengeofloat = - typename is_contained >::type; + typename is_contained>::type; // gengeodouble: double, double2, double3, double4 template -using is_vgengeodouble = typename is_contained< - T, type_list >::type; +using is_vgengeodouble = + typename is_contained>::type; -#ifndef __HALF_NO_ENABLED // gengeohalf: half2, half3, half4 template using is_vgengeohalf = - typename is_contained >::type; -#endif + typename is_contained>::type; // sgengeo: float, double, half template -using is_sgengeo = - std::integral_constant >::value>; +using is_sgengeo = std::integral_constant< + bool, is_contained>::value>; // vgengeo: vgengeofloat, vgengeodouble, vgengeohalf template -using is_vgengeo = std::integral_constant::value || - is_vgengeodouble::value -#ifndef __HALF_NO_ENABLED - || is_vgengeohalf::value -#endif - >; +using is_vgengeo = + std::integral_constant::value || + is_vgengeodouble::value || + is_vgengeohalf::value>; // gencrossfloat: float3, float4 template using is_gencrossfloat = - typename is_contained >::type; + typename is_contained>::type; // gencrossdouble: double3, double4 template using is_gencrossdouble = - typename is_contained >::type; + typename is_contained>::type; -#ifndef __HALF_NO_ENABLED // gencrosshalf: half3, half4 template using is_gencrosshalf = - typename is_contained >::type; -#endif + typename is_contained>::type; // gencross: gencrossfloat, gencrossdouble, gencrosshalf template -using is_gencross = std::integral_constant< - bool, is_gencrossfloat::value || is_gencrossdouble::value -#ifndef __HALF_NO_ENABLED - || is_gencrosshalf::value -#endif - >; +using is_gencross = + std::integral_constant::value || + is_gencrossdouble::value || + is_gencrosshalf::value>; // charn: char2, char3, char4, char8, char16 template using is_charn = typename is_contained< - T, type_list >::type; + T, type_list>::type; // scharn: schar2, schar3, schar4, schar8, schar16 template -using is_scharn = - typename is_contained >::type; +using is_scharn = typename is_contained< + T, type_list>::type; // ucharn: uchar2, uchar3, uchar4, uchar8, uchar16 template -using is_ucharn = - typename is_contained >::type; +using is_ucharn = typename is_contained< + T, type_list>::type; // igenchar: signed char, scharn template -using is_igenchar = std::integral_constant< - bool, is_contained >::value || is_scharn::value>; +using is_igenchar = + std::integral_constant>::value || + is_scharn::value>; // ugenchar: unsigned char, ucharn template -using is_ugenchar = std::integral_constant< - bool, is_contained >::value || is_ucharn::value>; +using is_ugenchar = + std::integral_constant>::value || + is_ucharn::value>; // genchar: char, charn, igenchar, ugenchar template using is_genchar = std::integral_constant< - bool, is_contained >::value || is_charn::value || + bool, is_contained>::value || is_charn::value || is_igenchar::value || is_ugenchar::value>; // shortn: short2, short3, short4, short8, short16 template -using is_shortn = - typename is_contained >::type; +using is_shortn = typename is_contained< + T, type_list>::type; // genshort: short, shortn template -using is_genshort = std::integral_constant< - bool, is_contained >::value || is_shortn::value>; +using is_genshort = + std::integral_constant>::value || + is_shortn::value>; // ushortn: ushort2, ushort3, ushort4, ushort8, ushort16 template using is_ushortn = typename is_contained >::type; + cl_ushort8, cl_ushort16>>::type; // genushort: ushort, ushortn template -using is_ugenshort = std::integral_constant< - bool, - is_contained >::value || is_ushortn::value>; +using is_ugenshort = + std::integral_constant>::value || + is_ushortn::value>; // uintn: uint2, uint3, uint4, uint8, uint16 template using is_uintn = typename is_contained< - T, type_list >::type; + T, type_list>::type; // ugenint: unsigned int, uintn template -using is_ugenint = std::integral_constant< - bool, is_contained >::value || is_uintn::value>; +using is_ugenint = + std::integral_constant>::value || + is_uintn::value>; // intn: int2, int3, int4, int8, int16 template using is_intn = typename is_contained< - T, type_list >::type; + T, type_list>::type; // genint: int, intn template -using is_genint = std::integral_constant< - bool, is_contained >::value || is_intn::value>; +using is_genint = + std::integral_constant>::value || + is_intn::value>; // ulongn: ulong2, ulong3, ulong4, ulong8,ulong16 template -using is_ulongn = - typename is_contained >::type; +using is_ulongn = typename is_contained< + T, type_list>::type; // ugenlong: unsigned long int, ulongn template -using is_ugenlong = std::integral_constant< - bool, is_contained >::value || is_ulongn::value>; +using is_ugenlong = + std::integral_constant>::value || + is_ulongn::value>; // longn: long2, long3, long4, long8, long16 template using is_longn = typename is_contained< - T, type_list >::type; + T, type_list>::type; // genlong: long int, longn template -using is_genlong = std::integral_constant< - bool, is_contained >::value || is_longn::value>; +using is_genlong = + std::integral_constant>::value || + is_longn::value>; // ulonglongn: ulonglong2, ulonglong3, ulonglong4,ulonglong8, ulonglong16 template using is_ulonglongn = typename is_contained >::type; + ulonglong8, ulonglong16>>::type; // ugenlonglong: unsigned long long int, ulonglongn template -using is_ugenlonglong = std::integral_constant< - bool, - is_contained >::value || is_ulonglongn::value>; +using is_ugenlonglong = + std::integral_constant>::value || + is_ulonglongn::value>; // longlongn: longlong2, longlong3, longlong4,longlong8, longlong16 template -using is_longlongn = - typename is_contained >::type; +using is_longlongn = typename is_contained< + T, type_list>::type; // genlonglong: long long int, longlongn template using is_genlonglong = - std::integral_constant >::value || + std::integral_constant>::value || is_longlongn::value>; // igenlonginteger: genlong, genlonglong template -using is_igenlonginteger = std::integral_constant< - bool, is_genlong::value || is_genlonglong::value>; +using is_igenlonginteger = + std::integral_constant::value || is_genlonglong::value>; // ugenlonginteger ugenlong, ugenlonglong template -using is_ugenlonginteger = std::integral_constant< - bool, is_ugenlong::value || is_ugenlonglong::value>; +using is_ugenlonginteger = + std::integral_constant::value || is_ugenlonglong::value>; // geninteger: genchar, genshort, ugenshort, genint, ugenint, igenlonginteger, // ugenlonginteger @@ -338,7 +314,7 @@ using is_ugeninteger = std::integral_constant< template using is_sgeninteger = typename is_contained< T, type_list >::type; + cl_uint, cl_long, cl_ulong, longlong, ulonglong>>::type; // vgeninteger: charn, scharn, ucharn, shortn, ushortn, intn, uintn, longn, // ulongn, longlongn, ulonglongn @@ -352,15 +328,14 @@ using is_vgeninteger = std::integral_constant< // sigeninteger: char, signed char, short, int, long int, , long long int template -using is_sigeninteger = - typename is_contained >::type; +using is_sigeninteger = typename is_contained< + T, type_list>::type; // sugeninteger: unsigned char, unsigned short, unsigned int, unsigned long // int, unsigned long long int template using is_sugeninteger = typename is_contained< - T, type_list >::type; + T, type_list>::type; // vigeninteger: charn, scharn, shortn, intn, longn, longlongn template @@ -388,8 +363,8 @@ template class TryToGetElementType; // size, where N = 8, 16, 32, 64 template using is_igenintegerNbit = typename std::integral_constant< - bool, is_igeninteger::value &&( - sizeof(typename TryToGetElementType::type) == N)>; + bool, is_igeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // igeninteger8bit All types within igeninteger whose base type are 8 bits in // size @@ -411,8 +386,8 @@ template using is_igeninteger64bit = is_igenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_ugenintegerNbit = typename std::integral_constant< - bool, is_ugeninteger::value &&( - sizeof(typename TryToGetElementType::type) == N)>; + bool, is_ugeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // ugeninteger8bit All types within ugeninteger whose base type are 8 bits in // size @@ -434,8 +409,8 @@ template using is_ugeninteger64bit = is_ugenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_genintegerNbit = typename std::integral_constant< - bool, is_geninteger::value &&( - sizeof(typename TryToGetElementType::type) == N)>; + bool, is_geninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // geninteger8bit All types within geninteger whose base type are 8 bits in size template using is_geninteger8bit = is_genintegerNbit; @@ -454,9 +429,9 @@ template using is_geninteger64bit = is_genintegerNbit; template using is_MultiPtrOfGLR = - std::integral_constant >::value || - std::is_same >::value || - std::is_same >::value>; + std::integral_constant>::value || + std::is_same>::value || + std::is_same>::value>; // genintptr All permutations of multi_ptr where dataT is // all types within genint and addressSpace is @@ -483,14 +458,12 @@ using is_genfloatptr = is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || -#ifndef __HALF_NO_ENABLED is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || -#endif is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || @@ -519,7 +492,6 @@ template <> struct unsign_integral_to_float_point { using type = cl_float16; }; -#ifndef __HALF_NO_ENABLED template <> struct unsign_integral_to_float_point { using type = cl_half; }; @@ -538,7 +510,6 @@ template <> struct unsign_integral_to_float_point { template <> struct unsign_integral_to_float_point { using type = cl_half16; }; -#endif template <> struct unsign_integral_to_float_point { using type = cl_double; @@ -605,7 +576,6 @@ template <> struct float_point_to_sign_integral { using type = cl_int16; }; -#ifndef __HALF_NO_ENABLED template <> struct float_point_to_sign_integral { using type = cl_int; }; @@ -624,7 +594,6 @@ template <> struct float_point_to_sign_integral { template <> struct float_point_to_sign_integral { using type = cl_short16; }; -#endif template <> struct float_point_to_sign_integral { using type = cl_int; @@ -647,417 +616,161 @@ template <> struct float_point_to_sign_integral { // Used for ilogb built-in template struct float_point_to_int; -template <> struct float_point_to_int { - using type = cl_int; -}; -template <> struct float_point_to_int { - using type = cl_int2; -}; -template <> struct float_point_to_int { - using type = cl_int3; -}; -template <> struct float_point_to_int { - using type = cl_int4; -}; -template <> struct float_point_to_int { - using type = cl_int8; -}; -template <> struct float_point_to_int { - using type = cl_int16; -}; -#ifndef __HALF_NO_ENABLED -template <> struct float_point_to_int { - using type = cl_int; -}; -template <> struct float_point_to_int { - using type = cl_int2; -}; -template <> struct float_point_to_int { - using type = cl_int3; -}; -template <> struct float_point_to_int { - using type = cl_int4; -}; -template <> struct float_point_to_int { - using type = cl_int8; -}; -template <> struct float_point_to_int { - using type = cl_int16; -}; -#endif -template <> struct float_point_to_int { - using type = cl_int; -}; -template <> struct float_point_to_int { - using type = cl_int2; -}; -template <> struct float_point_to_int { - using type = cl_int3; -}; -template <> struct float_point_to_int { - using type = cl_int4; -}; -template <> struct float_point_to_int { - using type = cl_int8; -}; -template <> struct float_point_to_int { - using type = cl_int16; -}; +template <> struct float_point_to_int { using type = cl_int; }; +template <> struct float_point_to_int { using type = cl_int2; }; +template <> struct float_point_to_int { using type = cl_int3; }; +template <> struct float_point_to_int { using type = cl_int4; }; +template <> struct float_point_to_int { using type = cl_int8; }; +template <> struct float_point_to_int { using type = cl_int16; }; + +template <> struct float_point_to_int { using type = cl_int; }; +template <> struct float_point_to_int { using type = cl_int2; }; +template <> struct float_point_to_int { using type = cl_int3; }; +template <> struct float_point_to_int { using type = cl_int4; }; +template <> struct float_point_to_int { using type = cl_int8; }; +template <> struct float_point_to_int { using type = cl_int16; }; + +template <> struct float_point_to_int { using type = cl_int; }; +template <> struct float_point_to_int { using type = cl_int2; }; +template <> struct float_point_to_int { using type = cl_int3; }; +template <> struct float_point_to_int { using type = cl_int4; }; +template <> struct float_point_to_int { using type = cl_int8; }; +template <> struct float_point_to_int { using type = cl_int16; }; // Used for abs and abs_diff built-in -template struct make_unsigned { - using type = T; -}; - -template <> struct make_unsigned { - using type = cl_uchar; -}; -template <> struct make_unsigned { - using type = cl_uchar2; -}; -template <> struct make_unsigned { - using type = cl_uchar3; -}; -template <> struct make_unsigned { - using type = cl_uchar4; -}; -template <> struct make_unsigned { - using type = cl_uchar8; -}; -template <> struct make_unsigned { - using type = cl_uchar16; -}; - -template <> struct make_unsigned { - using type = cl_ushort; -}; -template <> struct make_unsigned { - using type = cl_ushort2; -}; -template <> struct make_unsigned { - using type = cl_ushort3; -}; -template <> struct make_unsigned { - using type = cl_ushort4; -}; -template <> struct make_unsigned { - using type = cl_ushort8; -}; -template <> struct make_unsigned { - using type = cl_ushort16; -}; - -template <> struct make_unsigned { - using type = cl_uint; -}; -template <> struct make_unsigned { - using type = cl_uint2; -}; -template <> struct make_unsigned { - using type = cl_uint3; -}; -template <> struct make_unsigned { - using type = cl_uint4; -}; -template <> struct make_unsigned { - using type = cl_uint8; -}; -template <> struct make_unsigned { - using type = cl_uint16; -}; - -template <> struct make_unsigned { - using type = cl_ulong; -}; -template <> struct make_unsigned { - using type = cl_ulong2; -}; -template <> struct make_unsigned { - using type = cl_ulong3; -}; -template <> struct make_unsigned { - using type = cl_ulong4; -}; -template <> struct make_unsigned { - using type = cl_ulong8; -}; -template <> struct make_unsigned { - using type = cl_ulong16; -}; - -template <> struct make_unsigned { - using type = ulonglong; -}; -template <> struct make_unsigned { - using type = ulonglong2; -}; -template <> struct make_unsigned { - using type = ulonglong3; -}; -template <> struct make_unsigned { - using type = ulonglong4; -}; -template <> struct make_unsigned { - using type = ulonglong8; -}; -template <> struct make_unsigned { - using type = ulonglong16; -}; - -template struct make_signed { - using type = T; -}; - -template <> struct make_signed { - using type = cl_char; -}; -template <> struct make_signed { - using type = cl_char2; -}; -template <> struct make_signed { - using type = cl_char3; -}; -template <> struct make_signed { - using type = cl_char4; -}; -template <> struct make_signed { - using type = cl_char8; -}; -template <> struct make_signed { - using type = cl_char16; -}; - -template <> struct make_signed { - using type = cl_short; -}; -template <> struct make_signed { - using type = cl_short2; -}; -template <> struct make_signed { - using type = cl_short3; -}; -template <> struct make_signed { - using type = cl_short4; -}; -template <> struct make_signed { - using type = cl_short8; -}; -template <> struct make_signed { - using type = cl_short16; -}; - -template <> struct make_signed { - using type = cl_int; -}; -template <> struct make_signed { - using type = cl_int2; -}; -template <> struct make_signed { - using type = cl_int3; -}; -template <> struct make_signed { - using type = cl_int4; -}; -template <> struct make_signed { - using type = cl_int8; -}; -template <> struct make_signed { - using type = cl_int16; -}; - -template <> struct make_signed { - using type = cl_long; -}; -template <> struct make_signed { - using type = cl_long2; -}; -template <> struct make_signed { - using type = cl_long3; -}; -template <> struct make_signed { - using type = cl_long4; -}; -template <> struct make_signed { - using type = cl_long8; -}; -template <> struct make_signed { - using type = cl_long16; -}; - -template <> struct make_signed { - using type = longlong; -}; -template <> struct make_signed { - using type = longlong2; -}; -template <> struct make_signed { - using type = longlong3; -}; -template <> struct make_signed { - using type = longlong4; -}; -template <> struct make_signed { - using type = longlong8; -}; -template <> struct make_signed { - using type = longlong16; -}; +template struct make_unsigned { using type = T; }; + +template <> struct make_unsigned { using type = cl_uchar; }; +template <> struct make_unsigned { using type = cl_uchar2; }; +template <> struct make_unsigned { using type = cl_uchar3; }; +template <> struct make_unsigned { using type = cl_uchar4; }; +template <> struct make_unsigned { using type = cl_uchar8; }; +template <> struct make_unsigned { using type = cl_uchar16; }; + +template <> struct make_unsigned { using type = cl_ushort; }; +template <> struct make_unsigned { using type = cl_ushort2; }; +template <> struct make_unsigned { using type = cl_ushort3; }; +template <> struct make_unsigned { using type = cl_ushort4; }; +template <> struct make_unsigned { using type = cl_ushort8; }; +template <> struct make_unsigned { using type = cl_ushort16; }; + +template <> struct make_unsigned { using type = cl_uint; }; +template <> struct make_unsigned { using type = cl_uint2; }; +template <> struct make_unsigned { using type = cl_uint3; }; +template <> struct make_unsigned { using type = cl_uint4; }; +template <> struct make_unsigned { using type = cl_uint8; }; +template <> struct make_unsigned { using type = cl_uint16; }; + +template <> struct make_unsigned { using type = cl_ulong; }; +template <> struct make_unsigned { using type = cl_ulong2; }; +template <> struct make_unsigned { using type = cl_ulong3; }; +template <> struct make_unsigned { using type = cl_ulong4; }; +template <> struct make_unsigned { using type = cl_ulong8; }; +template <> struct make_unsigned { using type = cl_ulong16; }; + +template <> struct make_unsigned { using type = ulonglong; }; +template <> struct make_unsigned { using type = ulonglong2; }; +template <> struct make_unsigned { using type = ulonglong3; }; +template <> struct make_unsigned { using type = ulonglong4; }; +template <> struct make_unsigned { using type = ulonglong8; }; +template <> struct make_unsigned { using type = ulonglong16; }; + +template struct make_signed { using type = T; }; + +template <> struct make_signed { using type = cl_char; }; +template <> struct make_signed { using type = cl_char2; }; +template <> struct make_signed { using type = cl_char3; }; +template <> struct make_signed { using type = cl_char4; }; +template <> struct make_signed { using type = cl_char8; }; +template <> struct make_signed { using type = cl_char16; }; + +template <> struct make_signed { using type = cl_short; }; +template <> struct make_signed { using type = cl_short2; }; +template <> struct make_signed { using type = cl_short3; }; +template <> struct make_signed { using type = cl_short4; }; +template <> struct make_signed { using type = cl_short8; }; +template <> struct make_signed { using type = cl_short16; }; + +template <> struct make_signed { using type = cl_int; }; +template <> struct make_signed { using type = cl_int2; }; +template <> struct make_signed { using type = cl_int3; }; +template <> struct make_signed { using type = cl_int4; }; +template <> struct make_signed { using type = cl_int8; }; +template <> struct make_signed { using type = cl_int16; }; + +template <> struct make_signed { using type = cl_long; }; +template <> struct make_signed { using type = cl_long2; }; +template <> struct make_signed { using type = cl_long3; }; +template <> struct make_signed { using type = cl_long4; }; +template <> struct make_signed { using type = cl_long8; }; +template <> struct make_signed { using type = cl_long16; }; + +template <> struct make_signed { using type = longlong; }; +template <> struct make_signed { using type = longlong2; }; +template <> struct make_signed { using type = longlong3; }; +template <> struct make_signed { using type = longlong4; }; +template <> struct make_signed { using type = longlong8; }; +template <> struct make_signed { using type = longlong16; }; // Used for upsample built-in // Bases on Table 4.93: Scalar data type aliases supported by SYCL template struct make_upper; -template <> struct make_upper { - using type = cl_short; -}; -template <> struct make_upper { - using type = cl_short2; -}; -template <> struct make_upper { - using type = cl_short3; -}; -template <> struct make_upper { - using type = cl_short4; -}; -template <> struct make_upper { - using type = cl_short8; -}; -template <> struct make_upper { - using type = cl_short16; -}; - -template <> struct make_upper { - using type = cl_ushort; -}; -template <> struct make_upper { - using type = cl_ushort2; -}; -template <> struct make_upper { - using type = cl_ushort3; -}; -template <> struct make_upper { - using type = cl_ushort4; -}; -template <> struct make_upper { - using type = cl_ushort8; -}; -template <> struct make_upper { - using type = cl_ushort16; -}; - -template <> struct make_upper { - using type = cl_int; -}; -template <> struct make_upper { - using type = cl_int2; -}; -template <> struct make_upper { - using type = cl_int3; -}; -template <> struct make_upper { - using type = cl_int4; -}; -template <> struct make_upper { - using type = cl_int8; -}; -template <> struct make_upper { - using type = cl_int16; -}; - -template <> struct make_upper { - using type = cl_uint; -}; -template <> struct make_upper { - using type = cl_uint2; -}; -template <> struct make_upper { - using type = cl_uint3; -}; -template <> struct make_upper { - using type = cl_uint4; -}; -template <> struct make_upper { - using type = cl_uint8; -}; -template <> struct make_upper { - using type = cl_uint16; -}; - -template <> struct make_upper { - using type = cl_long; -}; -template <> struct make_upper { - using type = cl_long2; -}; -template <> struct make_upper { - using type = cl_long3; -}; -template <> struct make_upper { - using type = cl_long4; -}; -template <> struct make_upper { - using type = cl_long8; -}; -template <> struct make_upper { - using type = cl_long16; -}; - -template <> struct make_upper { - using type = cl_ulong; -}; -template <> struct make_upper { - using type = cl_ulong2; -}; -template <> struct make_upper { - using type = cl_ulong3; -}; -template <> struct make_upper { - using type = cl_ulong4; -}; -template <> struct make_upper { - using type = cl_ulong8; -}; -template <> struct make_upper { - using type = cl_ulong16; -}; - -template <> struct make_upper { - using type = longlong; -}; -template <> struct make_upper { - using type = longlong2; -}; -template <> struct make_upper { - using type = longlong3; -}; -template <> struct make_upper { - using type = longlong4; -}; -template <> struct make_upper { - using type = longlong8; -}; -template <> struct make_upper { - using type = longlong16; -}; - -template <> struct make_upper { - using type = ulonglong; -}; -template <> struct make_upper { - using type = ulonglong2; -}; -template <> struct make_upper { - using type = ulonglong3; -}; -template <> struct make_upper { - using type = ulonglong4; -}; -template <> struct make_upper { - using type = ulonglong8; -}; -template <> struct make_upper { - using type = ulonglong16; -}; +template <> struct make_upper { using type = cl_short; }; +template <> struct make_upper { using type = cl_short2; }; +template <> struct make_upper { using type = cl_short3; }; +template <> struct make_upper { using type = cl_short4; }; +template <> struct make_upper { using type = cl_short8; }; +template <> struct make_upper { using type = cl_short16; }; + +template <> struct make_upper { using type = cl_ushort; }; +template <> struct make_upper { using type = cl_ushort2; }; +template <> struct make_upper { using type = cl_ushort3; }; +template <> struct make_upper { using type = cl_ushort4; }; +template <> struct make_upper { using type = cl_ushort8; }; +template <> struct make_upper { using type = cl_ushort16; }; + +template <> struct make_upper { using type = cl_int; }; +template <> struct make_upper { using type = cl_int2; }; +template <> struct make_upper { using type = cl_int3; }; +template <> struct make_upper { using type = cl_int4; }; +template <> struct make_upper { using type = cl_int8; }; +template <> struct make_upper { using type = cl_int16; }; + +template <> struct make_upper { using type = cl_uint; }; +template <> struct make_upper { using type = cl_uint2; }; +template <> struct make_upper { using type = cl_uint3; }; +template <> struct make_upper { using type = cl_uint4; }; +template <> struct make_upper { using type = cl_uint8; }; +template <> struct make_upper { using type = cl_uint16; }; + +template <> struct make_upper { using type = cl_long; }; +template <> struct make_upper { using type = cl_long2; }; +template <> struct make_upper { using type = cl_long3; }; +template <> struct make_upper { using type = cl_long4; }; +template <> struct make_upper { using type = cl_long8; }; +template <> struct make_upper { using type = cl_long16; }; + +template <> struct make_upper { using type = cl_ulong; }; +template <> struct make_upper { using type = cl_ulong2; }; +template <> struct make_upper { using type = cl_ulong3; }; +template <> struct make_upper { using type = cl_ulong4; }; +template <> struct make_upper { using type = cl_ulong8; }; +template <> struct make_upper { using type = cl_ulong16; }; + +template <> struct make_upper { using type = longlong; }; +template <> struct make_upper { using type = longlong2; }; +template <> struct make_upper { using type = longlong3; }; +template <> struct make_upper { using type = longlong4; }; +template <> struct make_upper { using type = longlong8; }; +template <> struct make_upper { using type = longlong16; }; + +template <> struct make_upper { using type = ulonglong; }; +template <> struct make_upper { using type = ulonglong2; }; +template <> struct make_upper { using type = ulonglong3; }; +template <> struct make_upper { using type = ulonglong4; }; +template <> struct make_upper { using type = ulonglong8; }; +template <> struct make_upper { using type = ulonglong16; }; // Try to get pointer_t, otherwise T template class TryToGetPointerT { @@ -1251,5 +964,3 @@ template static constexpr T quiet_NaN() { } // namespace detail } // namespace sycl } // namespace cl - -#undef __HALF_NO_ENABLED diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 9e8f5a12b63c2..c454a5965db89 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include - #include #include #include @@ -17,9 +16,6 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -// TODO Remove when half type will supported by SYCL Runtime -#define NO_HALF_ENABLED - namespace s = cl::sycl; namespace d = s::detail; @@ -43,17 +39,18 @@ namespace d = s::detail; return r; \ } -#define __MAKE_1V_2V_RS(Fun, Call, N, Ret, Arg1, Arg2) \ - Ret Fun __NOEXC(Arg1##N x, Arg2##N y) { \ - Ret r = Ret(); \ - using base1_t = typename Arg1##N::element_type; \ - using base2_t = typename Arg2##N::element_type; \ - detail::helper().run_1v_2v_rs(r, \ - [](Ret &r, base1_t x, base2_t y) { \ - return cl::__host_std::Call(r, x, y); \ - }, \ - x, y); \ - return r; \ +#define __MAKE_1V_2V_RS(Fun, Call, N, Ret, Arg1, Arg2) \ + Ret Fun __NOEXC(Arg1##N x, Arg2##N y) { \ + Ret r = Ret(); \ + using base1_t = typename Arg1##N::element_type; \ + using base2_t = typename Arg2##N::element_type; \ + detail::helper().run_1v_2v_rs( \ + r, \ + [](Ret &r, base1_t x, base2_t y) { \ + return cl::__host_std::Call(r, x, y); \ + }, \ + x, y); \ + return r; \ } #define __MAKE_1V_RS(Fun, Call, N, Ret, Arg1) \ @@ -65,30 +62,32 @@ namespace d = s::detail; return r; \ } -#define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ - Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ - Ret##N r; \ - using base1_t = typename Arg1##N::element_type; \ - using base2_t = typename Arg2##N::element_type; \ - using base3_t = typename Arg3##N::element_type; \ - detail::helper().run_1v_2v_3v(r, \ - [](base1_t x, base2_t y, base3_t z) { \ - return cl::__host_std::Call(x, y, z); \ - }, \ - x, y, z); \ - return r; \ +#define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + using base2_t = typename Arg2##N::element_type; \ + using base3_t = typename Arg3##N::element_type; \ + detail::helper().run_1v_2v_3v( \ + r, \ + [](base1_t x, base2_t y, base3_t z) { \ + return cl::__host_std::Call(x, y, z); \ + }, \ + x, y, z); \ + return r; \ } -#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ - Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ - Ret##N r; \ - using base1_t = typename Arg1##N::element_type; \ - detail::helper().run_1v_2s_3s(r, \ - [](base1_t x, Arg2 y, Arg3 z) { \ - return cl::__host_std::Fun(x, y, z); \ - }, \ - x, y, z); \ - return r; \ +#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_2s_3s( \ + r, \ + [](base1_t x, Arg2 y, Arg3 z) { \ + return cl::__host_std::Fun(x, y, z); \ + }, \ + x, y, z); \ + return r; \ } #define __MAKE_1V_2S(Fun, N, Ret, Arg1, Arg2) \ @@ -136,8 +135,10 @@ namespace d = s::detail; using base2_t = typename Arg2##N::element_type; \ using base3_t = typename Arg3##N::element_type; \ detail::helper().run_1v_2v_3p( \ - r, [](base1_t x, base2_t y, \ - base3_t *z) { return cl::__host_std::Fun(x, y, z); }, \ + r, \ + [](base1_t x, base2_t y, base3_t *z) { \ + return cl::__host_std::Fun(x, y, z); \ + }, \ x, y, z); \ return r; \ } @@ -145,85 +146,85 @@ namespace d = s::detail; #define MAKE_1V(Fun, Ret, Arg1) MAKE_1V_FUNC(Fun, Fun, Ret, Arg1) #define MAKE_1V_FUNC(Fun, Call, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 2, Ret, Arg1) __MAKE_1V(Fun, Call, 3, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 4, Ret, Arg1) __MAKE_1V(Fun, Call, 8, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 16, Ret, Arg1) + __MAKE_1V(Fun, Call, 2, Ret, Arg1) \ + __MAKE_1V(Fun, Call, 3, Ret, Arg1) __MAKE_1V(Fun, Call, 4, Ret, Arg1) \ + __MAKE_1V(Fun, Call, 8, Ret, Arg1) __MAKE_1V(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2V(Fun, Ret, Arg1, Arg2) \ MAKE_1V_2V_FUNC(Fun, Fun, Ret, Arg1, Arg2) #define MAKE_1V_2V_FUNC(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2V(Fun, Call, 16, Ret, Arg1, Arg2) + __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2V(Fun, Call, 16, Ret, Arg1, Arg2) #define MAKE_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) #define MAKE_1V_2V_3V_FUNC(Fun, Call, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3V(Fun, Call, 16, Ret, Arg1, Arg2, Arg3) + __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3V(Fun, Call, 16, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ - MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_FSC_1V_2V_3V_FV(FunSc, FunV, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(FunSc, Ret, Arg1, Arg2, Arg3) \ - MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) + MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) #define MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ Ret Fun __NOEXC(Arg1 x, Arg2 y, Arg3 z) { return (Ret)__##Fun(x, y, z); } #define MAKE_1V_2S(Fun, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) + __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) #define MAKE_1V_2S_3S(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2S_3S(Fun, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) + __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) #define MAKE_SR_1V_AND(Fun, Call, Ret, Arg1) \ __MAKE_SR_1V_AND(Fun, Call, 2, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Call, 3, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Call, 4, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Call, 8, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Call, 16, Ret, Arg1) + __MAKE_SR_1V_AND(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 16, Ret, Arg1) #define MAKE_SR_1V_OR(Fun, Call, Ret, Arg1) \ __MAKE_SR_1V_OR(Fun, Call, 2, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Call, 3, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Call, 4, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Call, 8, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Call, 16, Ret, Arg1) + __MAKE_SR_1V_OR(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) + __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) #define MAKE_GEO_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) + __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ + __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) #define MAKE_1V_2V_3P(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3P(Fun, 2, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 3, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 4, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 8, Ret, Arg1, Arg2, Arg3) \ - __MAKE_1V_2V_3P(Fun, 16, Ret, Arg1, Arg2, Arg3) + __MAKE_1V_2V_3P(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2V_3P(Fun, 16, Ret, Arg1, Arg2, Arg3) namespace cl { namespace __host_std { @@ -381,11 +382,9 @@ s::cl_float OpDot(s::cl_float4, s::cl_float4); s::cl_double OpDot(s::cl_double2, s::cl_double2); s::cl_double OpDot(s::cl_double3, s::cl_double3); s::cl_double OpDot(s::cl_double4, s::cl_double4); -#ifndef NO_HALF_ENABLED s::cl_half OpDot(s::cl_half2, s::cl_half2); s::cl_half OpDot(s::cl_half3, s::cl_half3); s::cl_half OpDot(s::cl_half4, s::cl_half4); -#endif s::cl_int OpAll(s::cl_int2); s::cl_int OpAll(s::cl_int3); @@ -620,8 +619,9 @@ template inline T __step(T edge, T x) { } template inline T __smoothstep(T edge0, T edge1, T x) { - cl_float t; - t = __fclamp((x - edge0) / (edge1 - edge0), T(0), T(1)); + T t; + T v = (x - edge0) / (edge1 - edge0); + t = __fclamp(v, T(0), T(1)); return t * t * (3 - 2 * t); } @@ -784,15 +784,13 @@ template <> union databitset { uint64_t i; }; -#ifndef NO_HALF_ENABLED -// Half -template <> union databitset { - static_assert(sizeof(uint16_t) == sizeof(cl_half), +// half +template <> union databitset { + static_assert(sizeof(uint16_t) == sizeof(s::cl_half), "size of half is not equal to 16 bits."); - cl_half f; + s::cl_half f; uint16_t i; }; -#endif template typename std::enable_if::value, T>::type inline __bitselect( @@ -816,92 +814,64 @@ template inline T2 __OpSelect(T c, T2 b, T2 a) { template inline T2 __vOpSelect(T c, T2 b, T2 a) { return d::msbIsSet(c) ? b : a; } -} +} // namespace /* ----------------- 4.13.3 Math functions. Host version --------------------*/ // acos cl_float acos(s::cl_float x) __NOEXC { return std::acos(x); } cl_double acos(s::cl_double x) __NOEXC { return std::acos(x); } -#ifndef NO_HALF_ENABLED cl_half acos(s::cl_half x) __NOEXC { return std::acos(x); } -#endif MAKE_1V(acos, s::cl_float, s::cl_float) MAKE_1V(acos, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acos, s::cl_half, s::cl_half) -#endif // acosh cl_float acosh(s::cl_float x) __NOEXC { return std::acosh(x); } cl_double acosh(s::cl_double x) __NOEXC { return std::acosh(x); } -#ifndef NO_HALF_ENABLED cl_half acosh(s::cl_half x) __NOEXC { return std::acosh(x); } -#endif MAKE_1V(acosh, s::cl_float, s::cl_float) MAKE_1V(acosh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acosh, s::cl_half, s::cl_half) -#endif // acospi cl_float acospi(s::cl_float x) __NOEXC { return __acospi(x); } cl_double acospi(s::cl_double x) __NOEXC { return __acospi(x); } -#ifndef NO_HALF_ENABLED cl_half acospi(s::cl_half x) __NOEXC { return __acospi(x); } -#endif MAKE_1V(acospi, s::cl_float, s::cl_float) MAKE_1V(acospi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acospi, s::cl_half, s::cl_half) -#endif // asin cl_float asin(s::cl_float x) __NOEXC { return std::asin(x); } cl_double asin(s::cl_double x) __NOEXC { return std::asin(x); } -#ifndef NO_HALF_ENABLED cl_half asin(s::cl_half x) __NOEXC { return std::asin(x); } -#endif MAKE_1V(asin, s::cl_float, s::cl_float) MAKE_1V(asin, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asin, s::cl_half, s::cl_half) -#endif // asinh cl_float asinh(s::cl_float x) __NOEXC { return std::asinh(x); } cl_double asinh(s::cl_double x) __NOEXC { return std::asinh(x); } -#ifndef NO_HALF_ENABLED cl_half asinh(s::cl_half x) __NOEXC { return std::asinh(x); } -#endif MAKE_1V(asinh, s::cl_float, s::cl_float) MAKE_1V(asinh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asinh, s::cl_half, s::cl_half) -#endif // asinpi cl_float asinpi(s::cl_float x) __NOEXC { return __asinpi(x); } cl_double asinpi(s::cl_double x) __NOEXC { return __asinpi(x); } -#ifndef NO_HALF_ENABLED cl_half asinpi(s::cl_half x) __NOEXC { return __asinpi(x); } -#endif MAKE_1V(asinpi, s::cl_float, s::cl_float) MAKE_1V(asinpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asinpi, s::cl_half, s::cl_half) -#endif // atan cl_float atan(s::cl_float x) __NOEXC { return std::atan(x); } cl_double atan(s::cl_double x) __NOEXC { return std::atan(x); } -#ifndef NO_HALF_ENABLED cl_half atan(s::cl_half x) __NOEXC { return std::atan(x); } -#endif MAKE_1V(atan, s::cl_float, s::cl_float) MAKE_1V(atan, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atan, s::cl_half, s::cl_half) -#endif // atan2 cl_float atan2(s::cl_float x, s::cl_float y) __NOEXC { @@ -910,38 +880,26 @@ cl_float atan2(s::cl_float x, s::cl_float y) __NOEXC { cl_double atan2(s::cl_double x, s::cl_double y) __NOEXC { return std::atan2(x, y); } -#ifndef NO_HALF_ENABLED cl_half atan2(s::cl_half x, s::cl_half y) __NOEXC { return std::atan2(x, y); } -#endif MAKE_1V_2V(atan2, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(atan2, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(atan2, s::cl_half, s::cl_half, s::cl_half) -#endif // atanh cl_float atanh(s::cl_float x) __NOEXC { return std::atanh(x); } cl_double atanh(s::cl_double x) __NOEXC { return std::atanh(x); } -#ifndef NO_HALF_ENABLED cl_half atanh(s::cl_half x) __NOEXC { return std::atanh(x); } -#endif MAKE_1V(atanh, s::cl_float, s::cl_float) MAKE_1V(atanh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atanh, s::cl_half, s::cl_half) -#endif // atanpi cl_float atanpi(s::cl_float x) __NOEXC { return __atanpi(x); } cl_double atanpi(s::cl_double x) __NOEXC { return __atanpi(x); } -#ifndef NO_HALF_ENABLED cl_half atanpi(s::cl_half x) __NOEXC { return __atanpi(x); } -#endif MAKE_1V(atanpi, s::cl_float, s::cl_float) MAKE_1V(atanpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atanpi, s::cl_half, s::cl_half) -#endif // atan2pi cl_float atan2pi(s::cl_float x, s::cl_float y) __NOEXC { @@ -950,38 +908,26 @@ cl_float atan2pi(s::cl_float x, s::cl_float y) __NOEXC { cl_double atan2pi(s::cl_double x, s::cl_double y) __NOEXC { return __atan2pi(x, y); } -#ifndef NO_HALF_ENABLED cl_half atan2pi(s::cl_half x, s::cl_half y) __NOEXC { return __atan2pi(x, y); } -#endif MAKE_1V_2V(atan2pi, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(atan2pi, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(atan2pi, s::cl_half, s::cl_half, s::cl_half) -#endif // cbrt cl_float cbrt(s::cl_float x) __NOEXC { return std::cbrt(x); } cl_double cbrt(s::cl_double x) __NOEXC { return std::cbrt(x); } -#ifndef NO_HALF_ENABLED cl_half cbrt(s::cl_half x) __NOEXC { return std::cbrt(x); } -#endif MAKE_1V(cbrt, s::cl_float, s::cl_float) MAKE_1V(cbrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cbrt, s::cl_half, s::cl_half) -#endif // ceil cl_float ceil(s::cl_float x) __NOEXC { return std::ceil(x); } cl_double ceil(s::cl_double x) __NOEXC { return std::ceil(x); } -#ifndef NO_HALF_ENABLED cl_half ceil(s::cl_half x) __NOEXC { return std::ceil(x); } -#endif MAKE_1V(ceil, s::cl_float, s::cl_float) MAKE_1V(ceil, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(ceil, s::cl_half, s::cl_half) -#endif // copysign cl_float copysign(s::cl_float x, s::cl_float y) __NOEXC { @@ -990,162 +936,110 @@ cl_float copysign(s::cl_float x, s::cl_float y) __NOEXC { cl_double copysign(s::cl_double x, s::cl_double y) __NOEXC { return std::copysign(x, y); } -#ifndef NO_HALF_ENABLED cl_half copysign(s::cl_half x, s::cl_half y) __NOEXC { return std::copysign(x, y); } -#endif MAKE_1V_2V(copysign, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(copysign, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(copysign, s::cl_half, s::cl_half, s::cl_half) -#endif // cos cl_float cos(s::cl_float x) __NOEXC { return std::cos(x); } cl_double cos(s::cl_double x) __NOEXC { return std::cos(x); } -#ifndef NO_HALF_ENABLED cl_half cos(s::cl_half x) __NOEXC { return std::cos(x); } -#endif MAKE_1V(cos, s::cl_float, s::cl_float) MAKE_1V(cos, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cos, s::cl_half, s::cl_half) -#endif // cosh cl_float cosh(s::cl_float x) __NOEXC { return std::cosh(x); } cl_double cosh(s::cl_double x) __NOEXC { return std::cosh(x); } -#ifndef NO_HALF_ENABLED cl_half cosh(s::cl_half x) __NOEXC { return std::cosh(x); } -#endif MAKE_1V(cosh, s::cl_float, s::cl_float) MAKE_1V(cosh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cosh, s::cl_half, s::cl_half) -#endif // cospi cl_float cospi(s::cl_float x) __NOEXC { return __cospi(x); } cl_double cospi(s::cl_double x) __NOEXC { return __cospi(x); } -#ifndef NO_HALF_ENABLED cl_half cospi(s::cl_half x) __NOEXC { return __cospi(x); } -#endif MAKE_1V(cospi, s::cl_float, s::cl_float) MAKE_1V(cospi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cospi, s::cl_half, s::cl_half) -#endif // erfc cl_float erfc(s::cl_float x) __NOEXC { return std::erfc(x); } cl_double erfc(s::cl_double x) __NOEXC { return std::erfc(x); } -#ifndef NO_HALF_ENABLED cl_half erfc(s::cl_half x) __NOEXC { return std::erfc(x); } -#endif MAKE_1V(erfc, s::cl_float, s::cl_float) MAKE_1V(erfc, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(erfc, s::cl_half, s::cl_half) -#endif // erf cl_float erf(s::cl_float x) __NOEXC { return std::erf(x); } cl_double erf(s::cl_double x) __NOEXC { return std::erf(x); } -#ifndef NO_HALF_ENABLED cl_half erf(s::cl_half x) __NOEXC { return std::erf(x); } -#endif MAKE_1V(erf, s::cl_float, s::cl_float) MAKE_1V(erf, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(erf, s::cl_half, s::cl_half) -#endif // exp cl_float exp(s::cl_float x) __NOEXC { return std::exp(x); } cl_double exp(s::cl_double x) __NOEXC { return std::exp(x); } -#ifndef NO_HALF_ENABLED cl_half exp(s::cl_half x) __NOEXC { return std::exp(x); } -#endif MAKE_1V(exp, s::cl_float, s::cl_float) MAKE_1V(exp, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp, s::cl_half, s::cl_half) -#endif // exp2 cl_float exp2(s::cl_float x) __NOEXC { return std::exp2(x); } cl_double exp2(s::cl_double x) __NOEXC { return std::exp2(x); } -#ifndef NO_HALF_ENABLED cl_half exp2(s::cl_half x) __NOEXC { return std::exp2(x); } -#endif MAKE_1V(exp2, s::cl_float, s::cl_float) MAKE_1V(exp2, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp2, s::cl_half, s::cl_half) -#endif // exp10 cl_float exp10(s::cl_float x) __NOEXC { return std::pow(10, x); } cl_double exp10(s::cl_double x) __NOEXC { return std::pow(10, x); } -#ifndef NO_HALF_ENABLED cl_half exp10(s::cl_half x) __NOEXC { return std::pow(10, x); } -#endif MAKE_1V(exp10, s::cl_float, s::cl_float) MAKE_1V(exp10, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp10, s::cl_half, s::cl_half) -#endif // expm1 cl_float expm1(s::cl_float x) __NOEXC { return std::expm1(x); } cl_double expm1(s::cl_double x) __NOEXC { return std::expm1(x); } -#ifndef NO_HALF_ENABLED cl_half expm1(s::cl_half x) __NOEXC { return std::expm1(x); } -#endif MAKE_1V(expm1, s::cl_float, s::cl_float) MAKE_1V(expm1, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(expm1, s::cl_half, s::cl_half) -#endif // fabs cl_float fabs(s::cl_float x) __NOEXC { return std::fabs(x); } cl_double fabs(s::cl_double x) __NOEXC { return std::fabs(x); } -#ifndef NO_HALF_ENABLED cl_half fabs(s::cl_half x) __NOEXC { return std::fabs(x); } -#endif MAKE_1V(fabs, s::cl_float, s::cl_float) MAKE_1V(fabs, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(fabs, s::cl_half, s::cl_half) -#endif // fdim cl_float fdim(s::cl_float x, s::cl_float y) __NOEXC { return std::fdim(x, y); } cl_double fdim(s::cl_double x, s::cl_double y) __NOEXC { return std::fdim(x, y); } -#ifndef NO_HALF_ENABLED cl_half fdim(s::cl_half x, s::cl_half y) __NOEXC { return std::fdim(x, y); } -#endif MAKE_1V_2V(fdim, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fdim, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fdim, s::cl_half, s::cl_half, s::cl_half) -#endif // floor cl_float floor(s::cl_float x) __NOEXC { return std::floor(x); } cl_double floor(s::cl_double x) __NOEXC { return std::floor(x); } -#ifndef NO_HALF_ENABLED cl_half floor(s::cl_half x) __NOEXC { return std::floor(x); } -#endif MAKE_1V(floor, s::cl_float, s::cl_float) MAKE_1V(floor, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(floor, s::cl_half, s::cl_half) -#endif // fma cl_float fma(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { @@ -1154,58 +1048,42 @@ cl_float fma(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { cl_double fma(s::cl_double a, s::cl_double b, s::cl_double c) __NOEXC { return std::fma(a, b, c); } -#ifndef NO_HALF_ENABLED cl_half fma(s::cl_half a, s::cl_half b, s::cl_half c) __NOEXC { return std::fma(a, b, c); } -#endif MAKE_1V_2V_3V(fma, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(fma, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(fma, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // fmax cl_float fmax(s::cl_float x, s::cl_float y) __NOEXC { return std::fmax(x, y); } cl_double fmax(s::cl_double x, s::cl_double y) __NOEXC { return std::fmax(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmax(s::cl_half x, s::cl_half y) __NOEXC { return std::fmax(x, y); } -#endif MAKE_1V_2V(fmax, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmax, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmax, s::cl_half, s::cl_half, s::cl_half) -#endif // fmin cl_float fmin(s::cl_float x, s::cl_float y) __NOEXC { return std::fmin(x, y); } cl_double fmin(s::cl_double x, s::cl_double y) __NOEXC { return std::fmin(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmin(s::cl_half x, s::cl_half y) __NOEXC { return std::fmin(x, y); } -#endif MAKE_1V_2V(fmin, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmin, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmin, s::cl_half, s::cl_half, s::cl_half) -#endif // fmod cl_float fmod(s::cl_float x, s::cl_float y) __NOEXC { return std::fmod(x, y); } cl_double fmod(s::cl_double x, s::cl_double y) __NOEXC { return std::fmod(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmod(s::cl_half x, s::cl_half y) __NOEXC { return std::fmod(x, y); } -#endif MAKE_1V_2V(fmod, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmod, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmod, s::cl_half, s::cl_half, s::cl_half) -#endif // nextafter cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { @@ -1214,16 +1092,12 @@ cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { return std::nextafter(x, y); } -#ifndef __HALF_NO_ENABLED cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { return std::nextafter(x, y); } -#endif MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) -#endif // fract cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { @@ -1232,16 +1106,12 @@ cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { cl_double fract(s::cl_double x, s::cl_double *iptr) __NOEXC { return __fract(x, iptr); } -#ifndef __HALF_NO_ENABLED cl_half fract(s::cl_half x, s::cl_half *iptr) __NOEXC { return __fract(x, iptr); } -#endif MAKE_1V_2P(fract, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(fract, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2P(fract, s::cl_half, s::cl_half, s::cl_half) -#endif // frexp cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { @@ -1250,16 +1120,12 @@ cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { cl_double frexp(s::cl_double x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } -#ifndef __HALF_NO_ENABLED cl_half frexp(s::cl_half x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } -#endif MAKE_1V_2P(frexp, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(frexp, s::cl_double, s::cl_double, s::cl_int) -#ifndef __HALF_NO_ENABLED MAKE_1V_2P(frexp, s::cl_half, s::cl_half, s::cl_int) -#endif // hypot cl_float hypot(s::cl_float x, s::cl_float y) __NOEXC { @@ -1268,52 +1134,36 @@ cl_float hypot(s::cl_float x, s::cl_float y) __NOEXC { cl_double hypot(s::cl_double x, s::cl_double y) __NOEXC { return std::hypot(x, y); } -#ifndef NO_HALF_ENABLED cl_half hypot(s::cl_half x, s::cl_half y) __NOEXC { return std::hypot(x, y); } -#endif MAKE_1V_2V(hypot, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(hypot, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(hypot, s::cl_half, s::cl_half, s::cl_half) -#endif // ilogb cl_int ilogb(s::cl_float x) __NOEXC { return std::ilogb(x); } cl_int ilogb(s::cl_double x) __NOEXC { return std::ilogb(x); } -#ifndef NO_HALF_ENABLED cl_int ilogb(s::cl_half x) __NOEXC { return std::ilogb(x); } -#endif MAKE_1V(ilogb, s::cl_int, s::cl_float) MAKE_1V(ilogb, s::cl_int, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(ilogb, s::cl_int, s::cl_half) -#endif // ldexp cl_float ldexp(s::cl_float x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } cl_double ldexp(s::cl_double x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } -#ifndef NO_HALF_ENABLED cl_half ldexp(s::cl_half x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } -#endif MAKE_1V_2V(ldexp, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(ldexp, s::cl_double, s::cl_double, s::cl_int) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(ldexp, s::cl_half, s::cl_half, s::cl_int) -#endif // lgamma cl_float lgamma(s::cl_float x) __NOEXC { return std::lgamma(x); } cl_double lgamma(s::cl_double x) __NOEXC { return std::lgamma(x); } -#ifndef NO_HALF_ENABLED cl_half lgamma(s::cl_half x) __NOEXC { return std::lgamma(x); } -#endif MAKE_1V(lgamma, s::cl_float, s::cl_float) MAKE_1V(lgamma, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(lgamma, s::cl_half, s::cl_half) -#endif // lgamma_r cl_float lgamma_r(s::cl_float x, s::cl_int *signp) __NOEXC { @@ -1322,76 +1172,52 @@ cl_float lgamma_r(s::cl_float x, s::cl_int *signp) __NOEXC { cl_double lgamma_r(s::cl_double x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } -#ifndef __HALF_NO_ENABLED cl_half lgamma_r(s::cl_half x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } -#endif MAKE_1V_2P(lgamma_r, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(lgamma_r, s::cl_double, s::cl_double, s::cl_int) -#ifndef __HALF_NO_ENABLED MAKE_1V_2P(lgamma_r, s::cl_half, s::cl_half, s::cl_int) -#endif // log cl_float log(s::cl_float x) __NOEXC { return std::log(x); } cl_double log(s::cl_double x) __NOEXC { return std::log(x); } -#ifndef NO_HALF_ENABLED cl_half log(s::cl_half x) __NOEXC { return std::log(x); } -#endif MAKE_1V(log, s::cl_float, s::cl_float) MAKE_1V(log, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log, s::cl_half, s::cl_half) -#endif // log2 cl_float log2(s::cl_float x) __NOEXC { return std::log2(x); } cl_double log2(s::cl_double x) __NOEXC { return std::log2(x); } -#ifndef NO_HALF_ENABLED cl_half log2(s::cl_half x) __NOEXC { return std::log2(x); } -#endif MAKE_1V(log2, s::cl_float, s::cl_float) MAKE_1V(log2, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log2, s::cl_half, s::cl_half) -#endif // log10 cl_float log10(s::cl_float x) __NOEXC { return std::log10(x); } cl_double log10(s::cl_double x) __NOEXC { return std::log10(x); } -#ifndef NO_HALF_ENABLED cl_half log10(s::cl_half x) __NOEXC { return std::log10(x); } -#endif MAKE_1V(log10, s::cl_float, s::cl_float) MAKE_1V(log10, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log10, s::cl_half, s::cl_half) -#endif // log1p cl_float log1p(s::cl_float x) __NOEXC { return std::log1p(x); } cl_double log1p(s::cl_double x) __NOEXC { return std::log1p(x); } -#ifndef NO_HALF_ENABLED cl_half log1p(s::cl_half x) __NOEXC { return std::log1p(x); } -#endif MAKE_1V(log1p, s::cl_float, s::cl_float) MAKE_1V(log1p, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log1p, s::cl_half, s::cl_half) -#endif // logb cl_float logb(s::cl_float x) __NOEXC { return std::logb(x); } cl_double logb(s::cl_double x) __NOEXC { return std::logb(x); } -#ifndef NO_HALF_ENABLED cl_half logb(s::cl_half x) __NOEXC { return std::logb(x); } -#endif MAKE_1V(logb, s::cl_float, s::cl_float) MAKE_1V(logb, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(logb, s::cl_half, s::cl_half) -#endif // mad cl_float mad(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { @@ -1400,44 +1226,32 @@ cl_float mad(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { cl_double mad(s::cl_double a, s::cl_double b, s::cl_double c) __NOEXC { return __mad(a, b, c); } -#ifndef NO_HALF_ENABLED cl_half mad(s::cl_half a, s::cl_half b, s::cl_half c) __NOEXC { return __mad(a, b, c); } -#endif MAKE_1V_2V_3V(mad, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(mad, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(mad, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // maxmag cl_float maxmag(s::cl_float x, s::cl_float y) __NOEXC { return __maxmag(x, y); } cl_double maxmag(s::cl_double x, s::cl_double y) __NOEXC { return __maxmag(x, y); } -#ifndef NO_HALF_ENABLED cl_half maxmag(s::cl_half x, s::cl_half y) __NOEXC { return __maxmag(x, y); } -#endif MAKE_1V_2V(maxmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(maxmag, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(maxmag, s::cl_half, s::cl_half, s::cl_half) -#endif // minmag cl_float minmag(s::cl_float x, s::cl_float y) __NOEXC { return __minmag(x, y); } cl_double minmag(s::cl_double x, s::cl_double y) __NOEXC { return __minmag(x, y); } -#ifndef NO_HALF_ENABLED cl_half minmag(s::cl_half x, s::cl_half y) __NOEXC { return __minmag(x, y); } -#endif MAKE_1V_2V(minmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(minmag, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(minmag, s::cl_half, s::cl_half, s::cl_half) -#endif // modf cl_float modf(s::cl_float x, s::cl_float *iptr) __NOEXC { @@ -1446,68 +1260,48 @@ cl_float modf(s::cl_float x, s::cl_float *iptr) __NOEXC { cl_double modf(s::cl_double x, s::cl_double *iptr) __NOEXC { return std::modf(x, iptr); } -#ifndef __HALF_NO_ENABLED cl_half modf(s::cl_half x, s::cl_half *iptr) __NOEXC { return std::modf(x, reinterpret_cast(iptr)); } -#endif MAKE_1V_2P(modf, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(modf, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2P(modf, s::cl_half, s::cl_half, s::cl_half) -#endif // nan cl_float nan(s::cl_uint nancode) __NOEXC { return d::quiet_NaN(); } cl_double nan(s::cl_ulong nancode) __NOEXC { return d::quiet_NaN(); } cl_double nan(s::ulonglong nancode) __NOEXC { return d::quiet_NaN(); } -#ifndef __HALF_NO_ENABLED cl_half nan(s::cl_ushort nancode) __NOEXC { return s::cl_half(d::quiet_NaN()); } -#endif MAKE_1V(nan, s::cl_float, s::cl_uint) MAKE_1V(nan, s::cl_double, s::cl_ulong) MAKE_1V(nan, s::cl_double, s::ulonglong) -#ifndef __HALF_NO_ENABLED MAKE_1V(nan, s::cl_half, s::cl_ushort) -#endif // pow cl_float pow(s::cl_float x, s::cl_float y) __NOEXC { return std::pow(x, y); } cl_double pow(s::cl_double x, s::cl_double y) __NOEXC { return std::pow(x, y); } -#ifndef __HALF_NO_ENABLED cl_half pow(s::cl_half x, s::cl_half y) __NOEXC { return std::pow(x, y); } -#endif MAKE_1V_2V(pow, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(pow, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V(pow, s::cl_half, s::cl_half, s::cl_half) -#endif // pown cl_float pown(s::cl_float x, s::cl_int y) __NOEXC { return std::pow(x, y); } cl_double pown(s::cl_double x, s::cl_int y) __NOEXC { return std::pow(x, y); } -#ifndef __HALF_NO_ENABLED cl_half pown(s::cl_half x, s::cl_int y) __NOEXC { return std::pow(x, y); } -#endif MAKE_1V_2V(pown, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(pown, s::cl_double, s::cl_double, s::cl_int) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V(pown, s::cl_half, s::cl_half, s::cl_int) -#endif // powr cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { return __powr(x, y); } cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { return __powr(x, y); } -#ifndef __HALF_NO_ENABLED cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { return __powr(x, y); } -#endif MAKE_1V_2V(powr, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(powr, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V(powr, s::cl_half, s::cl_half, s::cl_half) -#endif // remainder cl_float remainder(s::cl_float x, s::cl_float y) __NOEXC { @@ -1516,16 +1310,12 @@ cl_float remainder(s::cl_float x, s::cl_float y) __NOEXC { cl_double remainder(s::cl_double x, s::cl_double y) __NOEXC { return std::remainder(x, y); } -#ifndef __HALF_NO_ENABLED cl_half remainder(s::cl_half x, s::cl_half y) __NOEXC { return std::remainder(x, y); } -#endif MAKE_1V_2V(remainder, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(remainder, s::cl_double, s::cl_double, s::cl_double) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V(remainder, s::cl_half, s::cl_half, s::cl_half) -#endif // remquo cl_float remquo(s::cl_float x, s::cl_float y, s::cl_int *quo) __NOEXC { @@ -1534,76 +1324,52 @@ cl_float remquo(s::cl_float x, s::cl_float y, s::cl_int *quo) __NOEXC { cl_double remquo(s::cl_double x, s::cl_double y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } -#ifndef __HALF_NO_ENABLED cl_half remquo(s::cl_half x, s::cl_half y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } -#endif MAKE_1V_2V_3P(remquo, s::cl_float, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V_3P(remquo, s::cl_double, s::cl_double, s::cl_double, s::cl_int) -#ifndef __HALF_NO_ENABLED MAKE_1V_2V_3P(remquo, s::cl_half, s::cl_half, s::cl_half, s::cl_int) -#endif // rint cl_float rint(s::cl_float x) __NOEXC { return std::rint(x); } cl_double rint(s::cl_double x) __NOEXC { return std::rint(x); } -#ifndef NO_HALF_ENABLED cl_half rint(s::cl_half x) __NOEXC { return std::rint(x); } -#endif MAKE_1V(rint, s::cl_float, s::cl_float) MAKE_1V(rint, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(rint, s::cl_half, s::cl_half) -#endif // rootn cl_float rootn(s::cl_float x, s::cl_int y) __NOEXC { return __rootn(x, y); } cl_double rootn(s::cl_double x, s::cl_int y) __NOEXC { return __rootn(x, y); } -#ifndef NO_HALF_ENABLED cl_half rootn(s::cl_half x, s::cl_int y) __NOEXC { return __rootn(x, y); } -#endif MAKE_1V_2V(rootn, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(rootn, s::cl_double, s::cl_double, s::cl_int) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(rootn, s::cl_half, s::cl_half, s::cl_int) -#endif // round cl_float round(s::cl_float x) __NOEXC { return std::round(x); } cl_double round(s::cl_double x) __NOEXC { return std::round(x); } -#ifndef NO_HALF_ENABLED cl_half round(s::cl_half x) __NOEXC { return std::round(x); } -#endif MAKE_1V(round, s::cl_float, s::cl_float) MAKE_1V(round, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(round, s::cl_half, s::cl_half) -#endif // rsqrt cl_float rsqrt(s::cl_float x) __NOEXC { return __rsqrt(x); } cl_double rsqrt(s::cl_double x) __NOEXC { return __rsqrt(x); } -#ifndef NO_HALF_ENABLED cl_half rsqrt(s::cl_half x) __NOEXC { return __rsqrt(x); } -#endif MAKE_1V(rsqrt, s::cl_float, s::cl_float) MAKE_1V(rsqrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(rsqrt, s::cl_half, s::cl_half) -#endif // sin cl_float sin(s::cl_float x) __NOEXC { return std::sin(x); } cl_double sin(s::cl_double x) __NOEXC { return std::sin(x); } -#ifndef NO_HALF_ENABLED cl_half sin(s::cl_half x) __NOEXC { return std::sin(x); } -#endif MAKE_1V(sin, s::cl_float, s::cl_float) MAKE_1V(sin, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sin, s::cl_half, s::cl_half) -#endif // sincos cl_float sincos(s::cl_float x, s::cl_float *cosval) __NOEXC { @@ -1612,112 +1378,76 @@ cl_float sincos(s::cl_float x, s::cl_float *cosval) __NOEXC { cl_double sincos(s::cl_double x, s::cl_double *cosval) __NOEXC { return __sincos(x, cosval); } -#ifndef NO_HALF_ENABLED cl_half sincos(s::cl_half x, s::cl_half *cosval) __NOEXC { return __sincos(x, cosval); } -#endif MAKE_1V_2P(sincos, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(sincos, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2P(sincos, s::cl_half, s::cl_half, s::cl_half) -#endif // sinh cl_float sinh(s::cl_float x) __NOEXC { return std::sinh(x); } cl_double sinh(s::cl_double x) __NOEXC { return std::sinh(x); } -#ifndef NO_HALF_ENABLED cl_half sinh(s::cl_half x) __NOEXC { return std::sinh(x); } -#endif MAKE_1V(sinh, s::cl_float, s::cl_float) MAKE_1V(sinh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sinh, s::cl_half, s::cl_half) -#endif // sinpi cl_float sinpi(s::cl_float x) __NOEXC { return __sinpi(x); } cl_double sinpi(s::cl_double x) __NOEXC { return __sinpi(x); } -#ifndef NO_HALF_ENABLED cl_half sinpi(s::cl_half x) __NOEXC { return __sinpi(x); } -#endif MAKE_1V(sinpi, s::cl_float, s::cl_float) MAKE_1V(sinpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sinpi, s::cl_half, s::cl_half) -#endif // sqrt cl_float sqrt(s::cl_float x) __NOEXC { return std::sqrt(x); } cl_double sqrt(s::cl_double x) __NOEXC { return std::sqrt(x); } -#ifndef NO_HALF_ENABLED cl_half sqrt(s::cl_half x) __NOEXC { return std::sqrt(x); } -#endif MAKE_1V(sqrt, s::cl_float, s::cl_float) MAKE_1V(sqrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sqrt, s::cl_half, s::cl_half) -#endif // tan cl_float tan(s::cl_float x) __NOEXC { return std::tan(x); } cl_double tan(s::cl_double x) __NOEXC { return std::tan(x); } -#ifndef NO_HALF_ENABLED cl_half tan(s::cl_half x) __NOEXC { return std::tan(x); } -#endif MAKE_1V(tan, s::cl_float, s::cl_float) MAKE_1V(tan, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tan, s::cl_half, s::cl_half) -#endif // tanh cl_float tanh(s::cl_float x) __NOEXC { return std::tanh(x); } cl_double tanh(s::cl_double x) __NOEXC { return std::tanh(x); } -#ifndef NO_HALF_ENABLED cl_half tanh(s::cl_half x) __NOEXC { return std::tanh(x); } -#endif MAKE_1V(tanh, s::cl_float, s::cl_float) MAKE_1V(tanh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tanh, s::cl_half, s::cl_half) -#endif // tanpi cl_float tanpi(s::cl_float x) __NOEXC { return __tanpi(x); } cl_double tanpi(s::cl_double x) __NOEXC { return __tanpi(x); } -#ifndef NO_HALF_ENABLED cl_half tanpi(s::cl_half x) __NOEXC { return __tanpi(x); } -#endif MAKE_1V(tanpi, s::cl_float, s::cl_float) MAKE_1V(tanpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tanpi, s::cl_half, s::cl_half) -#endif // tgamma cl_float tgamma(s::cl_float x) __NOEXC { return std::tgamma(x); } cl_double tgamma(s::cl_double x) __NOEXC { return std::tgamma(x); } -#ifndef NO_HALF_ENABLED cl_half tgamma(s::cl_half x) __NOEXC { return std::tgamma(x); } -#endif MAKE_1V(tgamma, s::cl_float, s::cl_float) MAKE_1V(tgamma, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tgamma, s::cl_half, s::cl_half) -#endif // trunc cl_float trunc(s::cl_float x) __NOEXC { return std::trunc(x); } cl_double trunc(s::cl_double x) __NOEXC { return std::trunc(x); } -#ifndef NO_HALF_ENABLED cl_half trunc(s::cl_half x) __NOEXC { return std::trunc(x); } -#endif MAKE_1V(trunc, s::cl_float, s::cl_float) MAKE_1V(trunc, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(trunc, s::cl_half, s::cl_half) -#endif /* --------------- 4.13.4 Integer functions. Host version -------------------*/ // u_abs @@ -2325,28 +2055,20 @@ cl_double fclamp(s::cl_double x, s::cl_double minval, s::cl_double maxval) __NOEXC { return __fclamp(x, minval, maxval); } -#ifndef NO_HALF_ENABLED cl_half fclamp(s::cl_half x, s::cl_half minval, s::cl_half maxval) __NOEXC { return __fclamp(x, minval, maxval); } -#endif MAKE_1V_2V_3V(fclamp, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(fclamp, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(fclamp, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // degrees cl_float degrees(s::cl_float radians) __NOEXC { return __degrees(radians); } cl_double degrees(s::cl_double radians) __NOEXC { return __degrees(radians); } -#ifndef NO_HALF_ENABLED cl_half degrees(s::cl_half radians) __NOEXC { return __degrees(radians); } -#endif MAKE_1V(degrees, s::cl_float, s::cl_float) MAKE_1V(degrees, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(degrees, s::cl_half, s::cl_half) -#endif // fmin_common cl_float fmin_common(s::cl_float x, s::cl_float y) __NOEXC { @@ -2355,16 +2077,12 @@ cl_float fmin_common(s::cl_float x, s::cl_float y) __NOEXC { cl_double fmin_common(s::cl_double x, s::cl_double y) __NOEXC { return std::fmin(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmin_common(s::cl_half x, s::cl_half y) __NOEXC { return std::fmin(x, y); } -#endif MAKE_1V_2V(fmin_common, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmin_common, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmin_common, s::cl_half, s::cl_half, s::cl_half) -#endif // fmax_common cl_float fmax_common(s::cl_float x, s::cl_float y) __NOEXC { @@ -2373,16 +2091,12 @@ cl_float fmax_common(s::cl_float x, s::cl_float y) __NOEXC { cl_double fmax_common(s::cl_double x, s::cl_double y) __NOEXC { return std::fmax(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmax_common(s::cl_half x, s::cl_half y) __NOEXC { return std::fmax(x, y); } -#endif MAKE_1V_2V(fmax_common, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmax_common, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmax_common, s::cl_half, s::cl_half, s::cl_half) -#endif // mix cl_float mix(s::cl_float x, s::cl_float y, s::cl_float a) __NOEXC { @@ -2391,28 +2105,20 @@ cl_float mix(s::cl_float x, s::cl_float y, s::cl_float a) __NOEXC { cl_double mix(s::cl_double x, s::cl_double y, s::cl_double a) __NOEXC { return __mix(x, y, a); } -#ifndef NO_HALF_ENABLED cl_half mix(s::cl_half x, s::cl_half y, s::cl_half a) __NOEXC { return __mix(x, y, a); } -#endif MAKE_1V_2V_3V(mix, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(mix, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(mix, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // radians cl_float radians(s::cl_float degrees) __NOEXC { return __radians(degrees); } cl_double radians(s::cl_double degrees) __NOEXC { return __radians(degrees); } -#ifndef NO_HALF_ENABLED cl_half radians(s::cl_half degrees) __NOEXC { return __radians(degrees); } -#endif MAKE_1V(radians, s::cl_float, s::cl_float) MAKE_1V(radians, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(radians, s::cl_half, s::cl_half) -#endif // step cl_float step(s::cl_float edge, s::cl_float x) __NOEXC { @@ -2421,14 +2127,10 @@ cl_float step(s::cl_float edge, s::cl_float x) __NOEXC { cl_double step(s::cl_double edge, s::cl_double x) __NOEXC { return __step(edge, x); } -#ifndef NO_HALF_ENABLED cl_half step(s::cl_half edge, s::cl_half x) __NOEXC { return __step(edge, x); } -#endif MAKE_1V_2V(step, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(step, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(step, s::cl_half, s::cl_half, s::cl_half) -#endif // fma cl_float smoothstep(s::cl_float edge0, s::cl_float edge1, @@ -2439,29 +2141,21 @@ cl_double smoothstep(s::cl_double edge0, s::cl_double edge1, s::cl_double x) __NOEXC { return __smoothstep(edge0, edge1, x); } -#ifndef NO_HALF_ENABLED cl_half smoothstep(s::cl_half edge0, s::cl_half edge1, s::cl_half x) __NOEXC { return __smoothstep(edge0, edge1, x); } -#endif MAKE_1V_2V_3V(smoothstep, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(smoothstep, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(smoothstep, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // sign cl_float sign(s::cl_float x) __NOEXC { return __sign(x); } cl_double sign(s::cl_double x) __NOEXC { return __sign(x); } -#ifndef NO_HALF_ENABLED cl_half sign(s::cl_half x) __NOEXC { return __sign(x); } -#endif MAKE_1V(sign, s::cl_float, s::cl_float) MAKE_1V(sign, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sign, s::cl_half, s::cl_half) -#endif /* --------------- 4.13.6 Geometric Functions. Host version -----------------*/ // cross @@ -2477,47 +2171,37 @@ s::cl_double3 cross(s::cl_double3 p0, s::cl_double3 p1) __NOEXC { s::cl_double4 cross(s::cl_double4 p0, s::cl_double4 p1) __NOEXC { return __cross(p0, p1); } -#ifndef NO_HALF_ENABLED s::cl_half3 cross(s::cl_half3 p0, s::cl_half3 p1) __NOEXC { return __cross(p0, p1); } s::cl_half4 cross(s::cl_half4 p0, s::cl_half4 p1) __NOEXC { return __cross(p0, p1); } -#endif // OpFMul cl_float OpFMul(s::cl_float p0, s::cl_float p1) { return __OpFMul(p0, p1); } cl_double OpFMul(s::cl_double p0, s::cl_double p1) { return __OpFMul(p0, p1); } -#ifndef NO_HALF_ENABLED cl_float OpFMul(s::cl_half p0, s::cl_half p1) { return __OpFMul(p0, p1); } -#endif // OpDot MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_float, s::cl_float, s::cl_float) MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_half, s::cl_half, s::cl_half) -#endif // length cl_float length(s::cl_float p) { return __length(p); } cl_double length(s::cl_double p) { return __length(p); } -#ifndef NO_HALF_ENABLED cl_half length(s::cl_half p) { return __length(p); } -#endif cl_float length(s::cl_float2 p) { return __length(p); } cl_float length(s::cl_float3 p) { return __length(p); } cl_float length(s::cl_float4 p) { return __length(p); } cl_double length(s::cl_double2 p) { return __length(p); } cl_double length(s::cl_double3 p) { return __length(p); } cl_double length(s::cl_double4 p) { return __length(p); } -#ifndef NO_HALF_ENABLED cl_half length(s::cl_half2 p) { return __length(p); } cl_half length(s::cl_half3 p) { return __length(p); } cl_half length(s::cl_half4 p) { return __length(p); } -#endif // distance cl_float distance(s::cl_float p0, s::cl_float p1) { return length(p0 - p1); } @@ -2534,12 +2218,10 @@ cl_double distance(s::cl_double3 p0, s::cl_double3 p1) { cl_double distance(s::cl_double4 p0, s::cl_double4 p1) { return length(p0 - p1); } -#ifndef NO_HALF_ENABLED cl_half distance(s::cl_half p0, s::cl_half p1) { return length(p0 - p1); } cl_half distance(s::cl_half2 p0, s::cl_half2 p1) { return length(p0 - p1); } cl_half distance(s::cl_half3 p0, s::cl_half3 p1) { return length(p0 - p1); } cl_half distance(s::cl_half4 p0, s::cl_half4 p1) { return length(p0 - p1); } -#endif // normalize s::cl_float normalize(s::cl_float p) { return __normalize(p); } @@ -2550,12 +2232,10 @@ s::cl_double normalize(s::cl_double p) { return __normalize(p); } s::cl_double2 normalize(s::cl_double2 p) { return __normalize(p); } s::cl_double3 normalize(s::cl_double3 p) { return __normalize(p); } s::cl_double4 normalize(s::cl_double4 p) { return __normalize(p); } -#ifndef NO_HALF_ENABLED s::cl_half normalize(s::cl_half p) { return __normalize(p); } s::cl_half2 normalize(s::cl_half2 p) { return __normalize(p); } s::cl_half3 normalize(s::cl_half3 p) { return __normalize(p); } s::cl_half4 normalize(s::cl_half4 p) { return __normalize(p); } -#endif // fast_length cl_float fast_length(s::cl_float p) { return __fast_length(p); } @@ -2596,19 +2276,15 @@ cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { return __sOpFOrdEqual(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { return __sOpFOrdEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // OpFUnordNotEqual-isnotequal cl_int OpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { @@ -2617,19 +2293,15 @@ cl_int OpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { return __sOpFUnordNotEqual(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { return __sOpFUnordNotEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdGreaterThan) // isgreater cl_int OpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { @@ -2638,19 +2310,15 @@ cl_int OpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { return __sOpFOrdGreaterThan(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { return __sOpFOrdGreaterThan(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdGreaterThanEqual) // isgreaterequal cl_int OpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { @@ -2659,19 +2327,15 @@ cl_int OpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { return __sOpFOrdGreaterThanEqual(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { return __sOpFOrdGreaterThanEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdLessThan) // isless cl_int OpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { return (x < y); } @@ -2684,20 +2348,16 @@ cl_int __vOpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { cl_long __vOpFOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { return -(x < y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return (x < y); } cl_short __vOpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return -(x < y); } -#endif MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdLessThanEqual) // islessequal cl_int OpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { @@ -2706,19 +2366,15 @@ cl_int OpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { return __sOpFOrdLessThanEqual(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { return __sOpFOrdLessThanEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpLessOrGreater) // islessgreater cl_int OpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { @@ -2727,49 +2383,37 @@ cl_int OpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { return __sOpLessOrGreater(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { return __sOpLessOrGreater(x, y); } -#endif MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpIsFinite) // isfinite cl_int OpIsFinite(s::cl_float x) __NOEXC { return std::isfinite(x); } cl_int OpIsFinite(s::cl_double x) __NOEXC { return std::isfinite(x); } cl_int __vOpIsFinite(s::cl_float x) __NOEXC { return -(std::isfinite(x)); } cl_long __vOpIsFinite(s::cl_double x) __NOEXC { return -(std::isfinite(x)); } -#ifndef NO_HALF_ENABLED cl_int OpIsFinite(s::cl_half x) __NOEXC { return std::isfinite(x); } cl_short __vOpIsFinite(s::cl_half x) __NOEXC { return -(std::isfinite(x)); } -#endif MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_short, s::cl_half) -#endif // (OpIsInf) // isinf cl_int OpIsInf(s::cl_float x) __NOEXC { return std::isinf(x); } cl_int OpIsInf(s::cl_double x) __NOEXC { return std::isinf(x); } cl_int __vOpIsInf(s::cl_float x) __NOEXC { return -(std::isinf(x)); } cl_long __vOpIsInf(s::cl_double x) __NOEXC { return -(std::isinf(x)); } -#ifndef NO_HALF_ENABLED cl_int OpIsInf(s::cl_half x) __NOEXC { return std::isinf(x); } cl_short __vOpIsInf(s::cl_half x) __NOEXC { return -(std::isinf(x)); } -#endif MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_short, s::cl_half) -#endif // (OpIsNan) // isnan cl_int OpIsNan(s::cl_float x) __NOEXC { return std::isnan(x); } @@ -2777,30 +2421,22 @@ cl_int OpIsNan(s::cl_double x) __NOEXC { return std::isnan(x); } cl_int __vOpIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } cl_long __vOpIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } -#ifndef NO_HALF_ENABLED cl_int OpIsNan(s::cl_half x) __NOEXC { return std::isnan(x); } cl_short __vOpIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } -#endif MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_short, s::cl_half) -#endif // (OpIsNormal) // isnormal cl_int OpIsNormal(s::cl_float x) __NOEXC { return std::isnormal(x); } cl_int OpIsNormal(s::cl_double x) __NOEXC { return std::isnormal(x); } cl_int __vOpIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } cl_long __vOpIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } -#ifndef NO_HALF_ENABLED cl_int OpIsNormal(s::cl_half x) __NOEXC { return std::isnormal(x); } cl_short __vOpIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } -#endif MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_short, s::cl_half) -#endif // (OpOrdered) // isordered cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { @@ -2809,16 +2445,12 @@ cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpOrdered(s::cl_double x, s::cl_double y) __NOEXC { return __vOpOrdered(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpOrdered(s::cl_half x, s::cl_half y) __NOEXC { return __vOpOrdered(x, y); } -#endif MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpUnordered) // isunordered cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { @@ -2827,37 +2459,28 @@ cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { cl_int OpUnordered(s::cl_double x, s::cl_double y) __NOEXC { return __sOpUnordered(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpUnordered(s::cl_half x, s::cl_half y) __NOEXC { return __sOpUnordered(x, y); } -#endif MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpSignBitSet) // signbit cl_int OpSignBitSet(s::cl_float x) __NOEXC { return std::signbit(x); } cl_int OpSignBitSet(s::cl_double x) __NOEXC { return std::signbit(x); } cl_int __vOpSignBitSet(s::cl_float x) __NOEXC { return -(std::signbit(x)); } cl_long __vOpSignBitSet(s::cl_double x) __NOEXC { return -(std::signbit(x)); } -#ifndef NO_HALF_ENABLED cl_int OpSignBitSet(s::cl_half x) __NOEXC { return std::signbit(x); } cl_short __vOpSignBitSet(s::cl_half x) __NOEXC { return -(std::signbit(x)); } -#endif MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_short, s::cl_half) -#endif // (OpAny) // any - MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_char) MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_short) MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_int) @@ -2865,7 +2488,6 @@ MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_long) MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::longlong) // (OpAll) // all - MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_char) MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_short) MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_int) @@ -2889,9 +2511,7 @@ MAKE_SC_1V_2V_3V(bitselect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_SC_1V_2V_3V(bitselect, s::longlong, s::longlong, s::longlong, s::longlong) MAKE_SC_1V_2V_3V(bitselect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) -#ifndef NO_HALF_ENABLED MAKE_SC_1V_2V_3V(bitselect, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // (OpSelect) // select // for scalar: result = c ? b : a. @@ -2948,12 +2568,10 @@ MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::longlong, s::ulonglong, s::ulonglong) -#ifndef NO_HALF_ENABLED MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_short, s::cl_half, s::cl_half) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_ushort, s::cl_half, s::cl_half) -#endif /* --------------- 4.13.3 Native Math functions. Host version ---------------*/ // native_cos @@ -3074,7 +2692,6 @@ MAKE_1V(half_tan, s::cl_float, s::cl_float) } // namespace cl #undef __NOEXC -#undef NO_HALF_ENABLED #undef __MAKE_1V #undef __MAKE_1V_2V #undef __MAKE_1V_2V_RS