diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index aa86d8d137320..3785e57751fb8 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -16,31 +17,16 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -// TODO Remove when half type will supported by SYCL Runtime -#define __HALF_NO_ENABLED - namespace cl { namespace sycl { #ifdef __SYCL_DEVICE_ONLY__ -#define __DEVICE_SIDE 1 namespace __sycl_std = cl::__spirv; #else -#define __DEVICE_SIDE 0 namespace __sycl_std = __host_std; #endif } // namespace sycl } // namespace cl -#define __NO_SUPPORT_HOST_VERSION(name, T) \ - static_assert(__DEVICE_SIDE != 0 && \ - /* compile time dependence*/ sizeof(T) > 0, \ - STRINGIFY_LINE(name) " host version is not implemented yet"); - -#define __NO_SUPPORT_DEVICE_VERSION(name, T) \ - static_assert( \ - __DEVICE_SIDE != 1 && /* compile time dependence*/ sizeof(T) > 0, \ - STRINGIFY_LINE(name) " device version is not implemented yet"); - namespace cl { namespace sycl { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ @@ -688,11 +674,9 @@ sign(T x) __NOEXC { /* --------------- 4.13.4 Integer functions. --------------------------------*/ // ugeninteger abs (geninteger x) template -typename std::enable_if::value, - typename detail::make_unsigned::type>::type +typename std::enable_if::value, T>::type abs(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs, T) - return __sycl_std::__invoke_u_abs::type>(x); + return __sycl_std::__invoke_u_abs(x); } // ugeninteger abs (geninteger x) @@ -700,18 +684,14 @@ template typename std::enable_if::value, typename detail::make_unsigned::type>::type abs(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs, T) return __sycl_std::__invoke_s_abs::type>(x); } // ugeninteger abs_diff (geninteger x, geninteger y) template -typename std::enable_if::value, - typename detail::make_unsigned::type>::type +typename std::enable_if::value, T>::type abs_diff(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs_diff, T) - return __sycl_std::__invoke_u_abs_diff< - typename detail::make_unsigned::type>(x, y); + return __sycl_std::__invoke_u_abs_diff(x, y); } // ugeninteger abs_diff (geninteger x, geninteger y) @@ -719,7 +699,6 @@ template typename std::enable_if::value, typename detail::make_unsigned::type>::type abs_diff(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(abs_diff, T) return __sycl_std::__invoke_s_abs_diff< typename detail::make_unsigned::type>(x, y); } @@ -728,7 +707,6 @@ abs_diff(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type add_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(add_sat, T) return __sycl_std::__invoke_s_add_sat(x, y); } @@ -736,7 +714,6 @@ add_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type add_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(add_sat, T) return __sycl_std::__invoke_u_add_sat(x, y); } @@ -744,7 +721,6 @@ add_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type hadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(hadd, T) return __sycl_std::__invoke_s_hadd(x, y); } @@ -752,7 +728,6 @@ hadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type hadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(hadd, T) return __sycl_std::__invoke_u_hadd(x, y); } @@ -760,7 +735,6 @@ hadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rhadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rhadd, T) return __sycl_std::__invoke_s_rhadd(x, y); } @@ -768,7 +742,6 @@ rhadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rhadd(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rhadd, T) return __sycl_std::__invoke_u_rhadd(x, y); } @@ -776,7 +749,6 @@ rhadd(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type clamp(T x, T minval, T maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_s_clamp(x, minval, maxval); } @@ -784,7 +756,6 @@ clamp(T x, T minval, T maxval) __NOEXC { template typename std::enable_if::value, T>::type clamp(T x, T minval, T maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) return __sycl_std::__invoke_u_clamp(x, minval, maxval); } @@ -793,8 +764,7 @@ template typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) - return __sycl_std::__invoke_s_clamp(x, minval, maxval); + return __sycl_std::__invoke_s_clamp(x, T(minval), T(maxval)); } // geninteger clamp (geninteger x, sgeninteger minval, sgeninteger maxval) @@ -802,39 +772,34 @@ template typename std::enable_if::value, T>::type clamp(T x, typename T::element_type minval, typename T::element_type maxval) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clamp, T) - return __sycl_std::__invoke_u_clamp(x, minval, maxval); + return __sycl_std::__invoke_u_clamp(x, T(minval), T(maxval)); } // geninteger clz (geninteger x) template typename std::enable_if::value, T>::type clz(T x) __NOEXC { - __NO_SUPPORT_HOST_VERSION(clz, T) return __sycl_std::__invoke_clz(x); } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type -mad_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_hi, T) - return __sycl_std::__invoke_s_mad_hi(x, y); +mad_hi(T x, T y, T z) __NOEXC { + return __sycl_std::__invoke_s_mad_hi(x, y, z); } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type -mad_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_hi, T) - return __sycl_std::__invoke_u_mad_hi(x, y); +mad_hi(T x, T y, T z) __NOEXC { + return __sycl_std::__invoke_u_mad_hi(x, y, z); } // geninteger mad_sat (geninteger a, geninteger b, geninteger c) template typename std::enable_if::value, T>::type mad_sat(T a, T b, T c) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_sat, T) return __sycl_std::__invoke_s_mad_sat(a, b, c); } @@ -842,71 +807,69 @@ mad_sat(T a, T b, T c) __NOEXC { template typename std::enable_if::value, T>::type mad_sat(T a, T b, T c) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad_sat, T) return __sycl_std::__invoke_u_mad_sat(a, b, c); } -// geninteger max (geninteger x, geninteger y) +// igeninteger max (igeninteger x, igeninteger y) template typename std::enable_if::value, T>::type max(T x, T y) __NOEXC { return __sycl_std::__invoke_s_max(x, y); } -// geninteger max (geninteger x, geninteger y) +// ugeninteger max (ugeninteger x, ugeninteger y) template typename std::enable_if::value, T>::type max(T x, T y) __NOEXC { return __sycl_std::__invoke_u_max(x, y); } -// geninteger max (geninteger x, sgeninteger y) +// igeninteger max (vigeninteger x, sigeninteger y) template typename std::enable_if::value, T>::type max(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_s_max(x, y); + return __sycl_std::__invoke_s_max(x, T(y)); } -// geninteger max (geninteger x, sgeninteger y) +// vugeninteger max (vugeninteger x, sugeninteger y) template typename std::enable_if::value, T>::type max(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_u_max(x, y); + return __sycl_std::__invoke_u_max(x, T(y)); } -// geninteger min (geninteger x, geninteger y) +// igeninteger min (igeninteger x, igeninteger y) template typename std::enable_if::value, T>::type min(T x, T y) __NOEXC { return __sycl_std::__invoke_s_min(x, y); } -// geninteger min (geninteger x, geninteger y) +// ugeninteger min (ugeninteger x, ugeninteger y) template typename std::enable_if::value, T>::type min(T x, T y) __NOEXC { return __sycl_std::__invoke_u_min(x, y); } -// geninteger min (geninteger x, sgeninteger y) +// vigeninteger min (vigeninteger x, sigeninteger y) template typename std::enable_if::value, T>::type min(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_s_min(x, y); + return __sycl_std::__invoke_s_min(x, T(y)); } -// geninteger min (geninteger x, sgeninteger y) +// vugeninteger min (vugeninteger x, sugeninteger y) template typename std::enable_if::value, T>::type min(T x, typename T::element_type y) __NOEXC { - return __sycl_std::__invoke_u_min(x, y); + return __sycl_std::__invoke_u_min(x, T(y)); } // geninteger mul_hi (geninteger x, geninteger y) template typename std::enable_if::value, T>::type mul_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul_hi, T) return __sycl_std::__invoke_s_mul_hi(x, y); } @@ -914,7 +877,6 @@ mul_hi(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type mul_hi(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul_hi, T) return __sycl_std::__invoke_u_mul_hi(x, y); } @@ -922,7 +884,6 @@ mul_hi(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type rotate(T v, T i) __NOEXC { - __NO_SUPPORT_HOST_VERSION(rotate, T) return __sycl_std::__invoke_rotate(v, i); } @@ -930,7 +891,6 @@ rotate(T v, T i) __NOEXC { template typename std::enable_if::value, T>::type sub_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(sub_sat, T) return __sycl_std::__invoke_s_sub_sat(x, y); } @@ -938,15 +898,18 @@ sub_sat(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type sub_sat(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(sub_sat, T) return __sycl_std::__invoke_u_sub_sat(x, y); } +// TODO delete when Intel CPU OpenCL runtime will be fixed +// OpExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) +#define __invoke_s_upsample __invoke_u_upsample + // ugeninteger16bit upsample (ugeninteger8bit hi, ugeninteger8bit lo) -template ::value, T>::type> -typename detail::make_upper::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) +template +typename std::enable_if::value, + typename detail::make_upper::type>::type +upsample(T hi, T lo) __NOEXC { return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -957,7 +920,6 @@ typename std::enable_if::value && detail::is_ugeninteger8bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } @@ -967,7 +929,6 @@ template typename std::enable_if::value, typename detail::make_upper::type>::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -978,7 +939,6 @@ typename std::enable_if::value && detail::is_ugeninteger16bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } @@ -988,7 +948,6 @@ template typename std::enable_if::value, typename detail::make_upper::type>::type upsample(T hi, T lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_u_upsample::type>( hi, lo); } @@ -999,34 +958,32 @@ typename std::enable_if::value && detail::is_ugeninteger32bit::value, typename detail::make_upper::type>::type upsample(T hi, T2 lo) __NOEXC { - __NO_SUPPORT_HOST_VERSION(upsample, T) return __sycl_std::__invoke_s_upsample::type>( hi, lo); } +#undef __invoke_s_upsample + // geninteger popcount (geninteger x) template typename std::enable_if::value, T>::type -popcount(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(popcount, T) - return __sycl_std::__invoke_popcount(x, y); +popcount(T x) __NOEXC { + return __sycl_std::__invoke_popcount(x); } -// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, geninteger32bit -// z) +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) template typename std::enable_if::value, T>::type mad24(T x, T y, T z) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad24, T) return __sycl_std::__invoke_s_mad24(x, y, z); } -// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, geninteger32bit -// z) +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) template typename std::enable_if::value, T>::type mad24(T x, T y, T z) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mad24, T) return __sycl_std::__invoke_u_mad24(x, y, z); } @@ -1034,7 +991,6 @@ mad24(T x, T y, T z) __NOEXC { template typename std::enable_if::value, T>::type mul24(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul24, T) return __sycl_std::__invoke_s_mul24(x, y); } @@ -1042,7 +998,6 @@ mul24(T x, T y) __NOEXC { template typename std::enable_if::value, T>::type mul24(T x, T y) __NOEXC { - __NO_SUPPORT_HOST_VERSION(mul24, T) return __sycl_std::__invoke_u_mul24(x, y); } @@ -1084,7 +1039,6 @@ dot(T p0, T p1) __NOEXC { return __sycl_std::__invoke_OpDot(p0, p1); } -#ifndef __HALF_NO_ENABLED // half dot (vgengeohalf p0, vgengeohalf p1) template typename std::enable_if::value, @@ -1092,13 +1046,11 @@ typename std::enable_if::value, dot(T p0, T p1) __NOEXC { return __sycl_std::__invoke_OpDot(p0, p1); } -#endif // float distance (gengeofloat p0, gengeofloat p1) template ::value, T>::type> cl::sycl::cl_float distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } @@ -1106,25 +1058,20 @@ cl::sycl::cl_float distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_double distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } -#ifndef __HALF_NO_ENABLED // half distance (gengeohalf p0, gengeohalf p1) template ::value, T>::type> cl::sycl::cl_half distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(distance, T) return __sycl_std::__invoke_distance(p0, p1); } -#endif // float length (gengeofloat p) template ::value, T>::type> cl::sycl::cl_float length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } @@ -1132,25 +1079,20 @@ cl::sycl::cl_float length(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_double length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } -#ifndef __HALF_NO_ENABLED // half length (gengeohalf p) template ::value, T>::type> cl::sycl::cl_half length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(length, T) return __sycl_std::__invoke_length(p); } -#endif // gengeofloat normalize (gengeofloat p) template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } @@ -1158,25 +1100,20 @@ normalize(T p) __NOEXC { template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } -#ifndef __HALF_NO_ENABLED // gengeohalf normalize (gengeohalf p) template typename std::enable_if::value, T>::type normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(normalize, T) return __sycl_std::__invoke_normalize(p); } -#endif // float fast_distance (gengeofloat p0, gengeofloat p1) template ::value, T>::type> cl::sycl::cl_float fast_distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_distance, T) return __sycl_std::__invoke_fast_distance(p0, p1); } @@ -1184,7 +1121,6 @@ cl::sycl::cl_float fast_distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_double fast_distance(T p0, T p1) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_distance, T) return __sycl_std::__invoke_fast_distance(p0, p1); } @@ -1192,7 +1128,6 @@ cl::sycl::cl_double fast_distance(T p0, T p1) __NOEXC { template ::value, T>::type> cl::sycl::cl_float fast_length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_length, T) return __sycl_std::__invoke_fast_length(p); } @@ -1200,7 +1135,6 @@ cl::sycl::cl_float fast_length(T p) __NOEXC { template ::value, T>::type> cl::sycl::cl_double fast_length(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_length, T) return __sycl_std::__invoke_fast_length(p); } @@ -1208,7 +1142,6 @@ cl::sycl::cl_double fast_length(T p) __NOEXC { template typename std::enable_if::value, T>::type fast_normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_normalize, T) return __sycl_std::__invoke_fast_normalize(p); } @@ -1216,12 +1149,10 @@ fast_normalize(T p) __NOEXC { template typename std::enable_if::value, T>::type fast_normalize(T p) __NOEXC { - __NO_SUPPORT_HOST_VERSION(fast_normalize, T) return __sycl_std::__invoke_fast_normalize(p); } /* --------------- 4.13.7 Relational functions. Device version --------------*/ -// // int isequal (half x, half y) // shortn isequal (halfn x, halfn y) // igeninteger32bit isequal (genfloatf x, genfloatf y) @@ -1229,10 +1160,9 @@ fast_normalize(T p) __NOEXC { // longn isequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isequal(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdEqual>(x, y)); } // int isnotequal (half x, half y) @@ -1242,10 +1172,9 @@ typename detail::float_point_to_sign_integral::type isequal(T x, // longn isnotequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnotequal(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFUnordNotEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFUnordNotEqual>(x, y)); } // int isgreater (half x, half y) @@ -1255,10 +1184,9 @@ typename detail::float_point_to_sign_integral::type isnotequal(T x, // longn isgreater (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isgreater(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdGreaterThan< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdGreaterThan>(x, y)); } // int isgreaterequal (half x, half y) @@ -1268,10 +1196,9 @@ typename detail::float_point_to_sign_integral::type isgreater(T x, // longn isgreaterequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -isgreaterequal(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdGreaterThanEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdGreaterThanEqual>(x, y)); } // int isless (half x, half y) @@ -1281,11 +1208,11 @@ isgreaterequal(T x, T y) __NOEXC { // longn isless (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isless(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdLessThan< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isless(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdLessThan>(x, y)); } + // int islessequal (half x, half y) // shortn islessequal (halfn x, halfn y) // igeninteger32bit islessequal (genfloatf x, genfloatf y) @@ -1293,10 +1220,9 @@ typename detail::float_point_to_sign_integral::type isless(T x, // longn islessequal (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -islessequal(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpFOrdLessThanEqual< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpFOrdLessThanEqual>(x, y)); } // int islessgreater (half x, half y) @@ -1306,11 +1232,11 @@ islessequal(T x, T y) __NOEXC { // longn islessgreater (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -islessgreater(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpLessOrGreater< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpLessOrGreater>(x, y)); } + // int isfinite (half x) // shortn isfinite (halfn x) // igeninteger32bit isfinite (genfloatf x) @@ -1318,10 +1244,9 @@ islessgreater(T x, T y) __NOEXC { // longn isfinite (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isfinite(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isfinite, T) - return __sycl_std::__invoke_OpIsFinite< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isfinite(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsFinite>(x)); } // int isinf (half x) @@ -1331,10 +1256,9 @@ typename detail::float_point_to_sign_integral::type isfinite(T x) __NOEXC { // longn isinf (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isinf(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isinf, T) - return __sycl_std::__invoke_OpIsInf< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isinf(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsInf>(x)); } // int isnan (half x) @@ -1344,11 +1268,11 @@ typename detail::float_point_to_sign_integral::type isinf(T x) __NOEXC { // longn isnan (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnan(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isnan, T) - return __sycl_std::__invoke_OpIsNan< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isnan(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsNan>(x)); } + // int isnormal (half x) // shortn isnormal (halfn x) // igeninteger32bit isnormal (genfloatf x) @@ -1356,10 +1280,9 @@ typename detail::float_point_to_sign_integral::type isnan(T x) __NOEXC { // longn isnormal (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isnormal(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(isnormal, T) - return __sycl_std::__invoke_OpIsNormal< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t isnormal(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpIsNormal>(x)); } // int isordered (half x) @@ -1369,10 +1292,9 @@ typename detail::float_point_to_sign_integral::type isnormal(T x) __NOEXC { // longn isordered (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type isordered(T x, - T y) __NOEXC { - return __sycl_std::__invoke_OpOrdered< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isordered(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpOrdered>(x, y)); } // int isunordered (half x, half y) @@ -1382,10 +1304,9 @@ typename detail::float_point_to_sign_integral::type isordered(T x, // longn isunordered (doublen x, doublen y) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type -isunordered(T x, T y) __NOEXC { - return __sycl_std::__invoke_OpUnordered< - typename detail::float_point_to_sign_integral::type>(x, y); +detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpUnordered>(x, y)); } // int signbit (half x) @@ -1395,26 +1316,45 @@ isunordered(T x, T y) __NOEXC { // longn signbit (doublen x) template ::value, T>::type> -typename detail::float_point_to_sign_integral::type signbit(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(signbit, T) - return __sycl_std::__invoke_OpSignBitSet< - typename detail::float_point_to_sign_integral::type>(x); +detail::common_rel_ret_t signbit(T x) __NOEXC { + return detail::RelConverter::apply( + __sycl_std::__invoke_OpSignBitSet>(x)); } -// int any (igeninteger x) -template ::value, T>::type> -cl::sycl::cl_int any(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(signbit, T) - return __sycl_std::__invoke_OpAny(x); +// int any (sigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +any(T x) __NOEXC { + return detail::Boolean<1>(cl::sycl::cl_int(detail::msbIsSet(x))); } -// int all (igeninteger x) -template ::value, T>::type> -cl::sycl::cl_int all(T x) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(all, T) - return __sycl_std::__invoke_OpAll(x); +// int any (vigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +any(T x) __NOEXC { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_OpAny>( + detail::rel_sign_bit_test_arg_t(x))); +} + +// int all (sigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +all(T x) __NOEXC { + return detail::Boolean<1>(cl::sycl::cl_int(detail::msbIsSet(x))); +} + +// int all (vigeninteger x) +template +typename std::enable_if::value, + cl::sycl::cl_int>::type +all(T x) __NOEXC { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_OpAll>( + detail::rel_sign_bit_test_arg_t(x))); } // gentype bitselect (gentype a, gentype b, gentype c) @@ -1430,8 +1370,7 @@ typename std::enable_if::value && detail::is_igeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // geninteger select (geninteger a, geninteger b, ugeninteger c) @@ -1440,8 +1379,7 @@ typename std::enable_if::value && detail::is_ugeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, genint c) @@ -1449,8 +1387,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_genint::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, ugenint c) @@ -1458,8 +1395,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_ugenint::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) @@ -1468,8 +1404,7 @@ typename std::enable_if::value && detail::is_igeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) @@ -1478,31 +1413,26 @@ typename std::enable_if::value && detail::is_ugeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -#ifndef __HALF_NO_ENABLED -// genfloath select (genfloath a, genfloath b, igeninteger64 c) +// genfloath select (genfloath a, genfloath b, igeninteger16 c) template typename std::enable_if::value && - detail::is_igeninteger64bit::value, + detail::is_igeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -// genfloath select (genfloath a, genfloath b, ugeninteger64 c) +// genfloath select (genfloath a, genfloath b, ugeninteger16 c) template typename std::enable_if::value && - detail::is_ugeninteger64bit::value, + detail::is_ugeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - __NO_SUPPORT_DEVICE_VERSION(select, T) - return __sycl_std::__invoke_OpSelect(a, b, c); + return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); } -#endif namespace native { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ @@ -1709,8 +1639,4 @@ tan(T x) __NOEXC { } // namespace sycl } // namespace cl -#undef __HALF_NO_ENABLED #undef __NOEXC -#undef __NO_SUPPORT_HOST_VERSION -#undef __NO_SUPPORT_DEVICE_VERSION -#undef __DEVICE_SIDE diff --git a/sycl/include/CL/sycl/detail/boolean.hpp b/sycl/include/CL/sycl/detail/boolean.hpp new file mode 100644 index 0000000000000..295f7358d48fa --- /dev/null +++ b/sycl/include/CL/sycl/detail/boolean.hpp @@ -0,0 +1,144 @@ +//==----------- boolean.hpp - SYCL boolean type ----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +template struct Assigner { + template static void assign(R &r, const T x) { + Assigner::assign(r, x); + r.template swizzle() = x.value[Num]; + } + + template + static void init(R &r, const T x) { + Assigner::template init(r, x); + ET v = x.template swizzle(); + r.value[Num] = msbIsSet(v); + } +}; + +template <> struct Assigner<0> { + template static void assign(R &r, const T x) { + r.template swizzle<0>() = x.value[0]; + } + template + static void init(R &r, const T x) { + ET v = x.template swizzle<0>(); + r.value[0] = msbIsSet(v); + } +}; + +template struct alignas(N == 3 ? 4 : N) Boolean { + static_assert(((N == 2) || (N == 3) || (N == 4) || (N == 8) || (N == 16)), + "Invalid size"); + + using element_type = bool; + +#ifdef __SYCL_DEVICE_ONLY__ + using DataType = + element_type __attribute__((ext_vector_type(N == 3 ? 4 : N))); + using vector_t = DataType; +#else + using DataType = element_type[N == 3 ? 4 : N]; +#endif + + Boolean() : value{false} {} + + Boolean(std::initializer_list l) { + for (size_t I = 0; I < N; ++I) { + value[I] = *(l.begin() + I); + } + } + + Boolean(const Boolean &rhs) { + for (size_t I = 0; I < N; ++I) { + value[I] = rhs.value[I]; + } + } + +#ifdef __SYCL_DEVICE_ONLY__ + // TODO change this to the vectors assignment when the assignment will be + // fixed on Intel GPU NEO OpenCL runtime + Boolean(const vector_t rhs) { + for (size_t I = 0; I < N; ++I) { + value[I] = rhs[I]; + } + } +#endif + + template Boolean(const T rhs) { + static_assert(is_vgeninteger::value, "Invalid constructor"); + Assigner::template init, T, typename T::element_type>( + *this, rhs); + } + +#ifdef __SYCL_DEVICE_ONLY__ + operator vector_t() const { return value; } +#endif + + template operator T() const { + static_assert(is_vgeninteger::value, "Invalid conversion"); + T r; + Assigner::assign(r, *this); + return r * -1; + } + +private: + template friend struct Assigner; + DataType value; +}; + +template <> struct alignas(1) Boolean<1> { + + using element_type = bool; + +#ifdef __SYCL_DEVICE_ONLY__ + using DataType = element_type; + using vector_t = DataType; +#else + using DataType = element_type; +#endif + + Boolean() : value(false) {} + + Boolean(const Boolean &rhs) : value(rhs.value) {} + +#ifdef __SYCL_DEVICE_ONLY__ + Boolean(const vector_t rhs) : value(rhs) {} +#endif + + template Boolean(T val) : value(val) { + static_assert(is_sgeninteger::value, "Invalid constructor"); + } + +#ifdef __SYCL_DEVICE_ONLY__ + operator vector_t() const { return value; } +#endif + + template operator T() const { + static_assert(is_sgeninteger::value, "Invalid conversion"); + return value; + } + +private: + DataType value; +}; + +} // namespace detail +} // namespace sycl +} // namespace cl \ No newline at end of file diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index 69223620110e4..763cccd8fe4d8 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include // TODO Delete this include after solving the problems in the test @@ -18,84 +19,10 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -namespace cl { -namespace sycl { -namespace detail { - -// Try to get pointer_t, otherwise T -template class TryToGetPointerT { - static T check(...); - template static typename A::pointer_t check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get element_type, otherwise T -template class TryToGetElementType { - static T check(...); - template static typename A::element_type check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get vector_t, otherwise T -template class TryToGetVectorT { - static T check(...); - template static typename A::vector_t check(const A &); - -public: - using type = decltype(check(T())); - static constexpr bool value = !std::is_same::value; -}; - -// Try to get pointer_t (if pointer_t indicates on the type with vector_t -// creates a pointer type on vector_t), otherwise T -template class TryToGetPointerVecT { - static T check(...); - template - static typename PtrValueType< - typename TryToGetVectorT::type>::type, - A::address_space>::type * - check(const A &); - -public: - using type = decltype(check(T())); -}; - -template ::value, std::true_type>::type> -typename TryToGetPointerVecT::type TryToGetPointer(T &t) { - // TODO find the better way to get the pointer to underlying data from vec - // class - return reinterpret_cast::type>(t.get()); -} - -template ::value, std::false_type>::type> -T TryToGetPointer(T &t) { - return t; -} - -// Converts T to OpenCL friendly -template -using ConvertToOpenCLType = std::conditional< - TryToGetVectorT::value, typename TryToGetVectorT::type, - typename std::conditional::value, - typename TryToGetPointerVecT::type, T>::type>; - -} // namespace detail -} // namespace sycl -} // namespace cl - #define MAKE_CALL_ARG1(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ extern Ret call(Arg1); \ @@ -104,9 +31,8 @@ using ConvertToOpenCLType = std::conditional< #define MAKE_CALL_ARG2(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1, T2 t2) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1, T2 t2) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ @@ -117,9 +43,8 @@ using ConvertToOpenCLType = std::conditional< #define MAKE_CALL_ARG3(call) \ template \ - ALWAYS_INLINE \ - typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ - T1 t1, T2 t2, T3 t3) __NOEXC { \ + ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ + __invoke_##call(T1 t1, T2 t2, T3 t3) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ @@ -264,8 +189,8 @@ MAKE_CALL_ARG2(s_upsample) MAKE_CALL_ARG1(popcount) MAKE_CALL_ARG3(s_mad24) MAKE_CALL_ARG3(u_mad24) -MAKE_CALL_ARG3(s_mul24) -MAKE_CALL_ARG3(u_mul24) +MAKE_CALL_ARG2(s_mul24) +MAKE_CALL_ARG2(u_mul24) /* --------------- 4.13.5 Common functions. ---------------------------------*/ MAKE_CALL_ARG3(fclamp) MAKE_CALL_ARG1(degrees) diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 6b51f4cb454be..aae3f16413252 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -11,11 +11,9 @@ #include #include +#include #include -// TODO Delete when half type will supported by SYCL Runtime -#define __HALF_NO_ENABLED - namespace cl { namespace sycl { namespace detail { @@ -61,7 +59,6 @@ using is_genfloatd = std::integral_constant>::value || is_doublen::value>; -#ifndef __HALF_NO_ENABLED // halfn: half2, half3, half4, half8, half16 template using is_halfn = typename is_contained< @@ -72,34 +69,23 @@ template using is_genfloath = std::integral_constant>::value || is_halfn::value>; -#endif // genfloat: genfloatf, genfloatd, genfloath template using is_genfloat = std::integral_constant::value || - is_genfloatd::value -#ifndef __HALF_NO_ENABLED - || is_genfloath::value -#endif - >; + is_genfloatd::value || + is_genfloath::value>; // sgenfloat: float, double, half template -using is_sgenfloat = typename is_contained>::type; +using is_sgenfloat = + typename is_contained>::type; // vgenfloat: floatn, doublen, halfn template using is_vgenfloat = - std::integral_constant::value || is_doublen::value -#ifndef __HALF_NO_ENABLED - || is_halfn::value -#endif - >; + std::integral_constant::value || is_doublen::value || + is_halfn::value>; // gengeofloat: float, float2, float3, float4 template @@ -111,12 +97,10 @@ template using is_gengeodouble = typename is_contained< T, type_list>::type; -#ifndef __HALF_NO_ENABLED // gengeohalf: half, half2, half3, half4 template using is_gengeohalf = typename is_contained< T, type_list>::type; -#endif // gengeofloat: float, float2, float3, float4 template @@ -129,12 +113,22 @@ using is_vgengeodouble = typename is_contained>::type; -#ifndef __HALF_NO_ENABLED // gengeohalf: half2, half3, half4 template using is_vgengeohalf = typename is_contained>::type; -#endif + +// sgengeo: float, double, half +template +using is_sgengeo = std::integral_constant< + bool, is_contained>::value>; + +// vgengeo: vgengeofloat, vgengeodouble, vgengeohalf +template +using is_vgengeo = + std::integral_constant::value || + is_vgengeodouble::value || + is_vgengeohalf::value>; // gencrossfloat: float3, float4 template @@ -146,22 +140,17 @@ template using is_gencrossdouble = typename is_contained>::type; -#ifndef __HALF_NO_ENABLED // gencrosshalf: half3, half4 template using is_gencrosshalf = typename is_contained>::type; -#endif // gencross: gencrossfloat, gencrossdouble, gencrosshalf template using is_gencross = std::integral_constant::value || - is_gencrossdouble::value -#ifndef __HALF_NO_ENABLED - || is_gencrosshalf::value -#endif - >; + is_gencrossdouble::value || + is_gencrosshalf::value>; // charn: char2, char3, char4, char8, char16 template @@ -367,11 +356,15 @@ template using is_gentype = std::integral_constant::value || is_geninteger::value>; +// forward declarations +template class TryToGetElementType; + // genintegerNbit All types within geninteger whose base type are N bits in // size, where N = 8, 16, 32, 64 template using is_igenintegerNbit = typename std::integral_constant< - bool, is_igeninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_igeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // igeninteger8bit All types within igeninteger whose base type are 8 bits in // size @@ -393,7 +386,8 @@ template using is_igeninteger64bit = is_igenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_ugenintegerNbit = typename std::integral_constant< - bool, is_ugeninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_ugeninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // ugeninteger8bit All types within ugeninteger whose base type are 8 bits in // size @@ -415,7 +409,8 @@ template using is_ugeninteger64bit = is_ugenintegerNbit; // size, where N = 8, 16, 32, 64. template using is_genintegerNbit = typename std::integral_constant< - bool, is_geninteger::value || (sizeof(typename T::element_type) == N)>; + bool, is_geninteger::value && + (sizeof(typename TryToGetElementType::type) == N)>; // geninteger8bit All types within geninteger whose base type are 8 bits in size template using is_geninteger8bit = is_genintegerNbit; @@ -463,14 +458,12 @@ using is_genfloatptr = is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || -#ifndef __HALF_NO_ENABLED is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || -#endif is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || is_MultiPtrOfGLR::value || @@ -499,7 +492,6 @@ template <> struct unsign_integral_to_float_point { using type = cl_float16; }; -#ifndef __HALF_NO_ENABLED template <> struct unsign_integral_to_float_point { using type = cl_half; }; @@ -518,7 +510,6 @@ template <> struct unsign_integral_to_float_point { template <> struct unsign_integral_to_float_point { using type = cl_half16; }; -#endif template <> struct unsign_integral_to_float_point { using type = cl_double; @@ -585,7 +576,6 @@ template <> struct float_point_to_sign_integral { using type = cl_int16; }; -#ifndef __HALF_NO_ENABLED template <> struct float_point_to_sign_integral { using type = cl_int; }; @@ -604,7 +594,6 @@ template <> struct float_point_to_sign_integral { template <> struct float_point_to_sign_integral { using type = cl_short16; }; -#endif template <> struct float_point_to_sign_integral { using type = cl_int; @@ -633,14 +622,14 @@ template <> struct float_point_to_int { using type = cl_int3; }; template <> struct float_point_to_int { using type = cl_int4; }; template <> struct float_point_to_int { using type = cl_int8; }; template <> struct float_point_to_int { using type = cl_int16; }; -#ifndef __HALF_NO_ENABLED + template <> struct float_point_to_int { using type = cl_int; }; template <> struct float_point_to_int { using type = cl_int2; }; template <> struct float_point_to_int { using type = cl_int3; }; template <> struct float_point_to_int { using type = cl_int4; }; template <> struct float_point_to_int { using type = cl_int8; }; template <> struct float_point_to_int { using type = cl_int16; }; -#endif + template <> struct float_point_to_int { using type = cl_int; }; template <> struct float_point_to_int { using type = cl_int2; }; template <> struct float_point_to_int { using type = cl_int3; }; @@ -649,34 +638,35 @@ template <> struct float_point_to_int { using type = cl_int8; }; template <> struct float_point_to_int { using type = cl_int16; }; // Used for abs and abs_diff built-in -template struct make_unsigned; -template <> struct make_unsigned { using type = uchar; }; -template <> struct make_unsigned { using type = uchar2; }; -template <> struct make_unsigned { using type = uchar3; }; -template <> struct make_unsigned { using type = uchar4; }; -template <> struct make_unsigned { using type = uchar8; }; -template <> struct make_unsigned { using type = uchar16; }; - -template <> struct make_unsigned { using type = ushort; }; -template <> struct make_unsigned { using type = ushort2; }; -template <> struct make_unsigned { using type = ushort3; }; -template <> struct make_unsigned { using type = ushort4; }; -template <> struct make_unsigned { using type = ushort8; }; -template <> struct make_unsigned { using type = ushort16; }; - -template <> struct make_unsigned { using type = uint; }; -template <> struct make_unsigned { using type = uint2; }; -template <> struct make_unsigned { using type = uint3; }; -template <> struct make_unsigned { using type = uint4; }; -template <> struct make_unsigned { using type = uint8; }; -template <> struct make_unsigned { using type = uint16; }; - -template <> struct make_unsigned { using type = ulong; }; -template <> struct make_unsigned { using type = ulong2; }; -template <> struct make_unsigned { using type = ulong3; }; -template <> struct make_unsigned { using type = ulong4; }; -template <> struct make_unsigned { using type = ulong8; }; -template <> struct make_unsigned { using type = ulong16; }; +template struct make_unsigned { using type = T; }; + +template <> struct make_unsigned { using type = cl_uchar; }; +template <> struct make_unsigned { using type = cl_uchar2; }; +template <> struct make_unsigned { using type = cl_uchar3; }; +template <> struct make_unsigned { using type = cl_uchar4; }; +template <> struct make_unsigned { using type = cl_uchar8; }; +template <> struct make_unsigned { using type = cl_uchar16; }; + +template <> struct make_unsigned { using type = cl_ushort; }; +template <> struct make_unsigned { using type = cl_ushort2; }; +template <> struct make_unsigned { using type = cl_ushort3; }; +template <> struct make_unsigned { using type = cl_ushort4; }; +template <> struct make_unsigned { using type = cl_ushort8; }; +template <> struct make_unsigned { using type = cl_ushort16; }; + +template <> struct make_unsigned { using type = cl_uint; }; +template <> struct make_unsigned { using type = cl_uint2; }; +template <> struct make_unsigned { using type = cl_uint3; }; +template <> struct make_unsigned { using type = cl_uint4; }; +template <> struct make_unsigned { using type = cl_uint8; }; +template <> struct make_unsigned { using type = cl_uint16; }; + +template <> struct make_unsigned { using type = cl_ulong; }; +template <> struct make_unsigned { using type = cl_ulong2; }; +template <> struct make_unsigned { using type = cl_ulong3; }; +template <> struct make_unsigned { using type = cl_ulong4; }; +template <> struct make_unsigned { using type = cl_ulong8; }; +template <> struct make_unsigned { using type = cl_ulong16; }; template <> struct make_unsigned { using type = ulonglong; }; template <> struct make_unsigned { using type = ulonglong2; }; @@ -685,30 +675,292 @@ template <> struct make_unsigned { using type = ulonglong4; }; template <> struct make_unsigned { using type = ulonglong8; }; template <> struct make_unsigned { using type = ulonglong16; }; +template struct make_signed { using type = T; }; + +template <> struct make_signed { using type = cl_char; }; +template <> struct make_signed { using type = cl_char2; }; +template <> struct make_signed { using type = cl_char3; }; +template <> struct make_signed { using type = cl_char4; }; +template <> struct make_signed { using type = cl_char8; }; +template <> struct make_signed { using type = cl_char16; }; + +template <> struct make_signed { using type = cl_short; }; +template <> struct make_signed { using type = cl_short2; }; +template <> struct make_signed { using type = cl_short3; }; +template <> struct make_signed { using type = cl_short4; }; +template <> struct make_signed { using type = cl_short8; }; +template <> struct make_signed { using type = cl_short16; }; + +template <> struct make_signed { using type = cl_int; }; +template <> struct make_signed { using type = cl_int2; }; +template <> struct make_signed { using type = cl_int3; }; +template <> struct make_signed { using type = cl_int4; }; +template <> struct make_signed { using type = cl_int8; }; +template <> struct make_signed { using type = cl_int16; }; + +template <> struct make_signed { using type = cl_long; }; +template <> struct make_signed { using type = cl_long2; }; +template <> struct make_signed { using type = cl_long3; }; +template <> struct make_signed { using type = cl_long4; }; +template <> struct make_signed { using type = cl_long8; }; +template <> struct make_signed { using type = cl_long16; }; + +template <> struct make_signed { using type = longlong; }; +template <> struct make_signed { using type = longlong2; }; +template <> struct make_signed { using type = longlong3; }; +template <> struct make_signed { using type = longlong4; }; +template <> struct make_signed { using type = longlong8; }; +template <> struct make_signed { using type = longlong16; }; + // Used for upsample built-in // Bases on Table 4.93: Scalar data type aliases supported by SYCL template struct make_upper; -template <> struct make_upper { - using type = cl::sycl::cl_short; + +template <> struct make_upper { using type = cl_short; }; +template <> struct make_upper { using type = cl_short2; }; +template <> struct make_upper { using type = cl_short3; }; +template <> struct make_upper { using type = cl_short4; }; +template <> struct make_upper { using type = cl_short8; }; +template <> struct make_upper { using type = cl_short16; }; + +template <> struct make_upper { using type = cl_ushort; }; +template <> struct make_upper { using type = cl_ushort2; }; +template <> struct make_upper { using type = cl_ushort3; }; +template <> struct make_upper { using type = cl_ushort4; }; +template <> struct make_upper { using type = cl_ushort8; }; +template <> struct make_upper { using type = cl_ushort16; }; + +template <> struct make_upper { using type = cl_int; }; +template <> struct make_upper { using type = cl_int2; }; +template <> struct make_upper { using type = cl_int3; }; +template <> struct make_upper { using type = cl_int4; }; +template <> struct make_upper { using type = cl_int8; }; +template <> struct make_upper { using type = cl_int16; }; + +template <> struct make_upper { using type = cl_uint; }; +template <> struct make_upper { using type = cl_uint2; }; +template <> struct make_upper { using type = cl_uint3; }; +template <> struct make_upper { using type = cl_uint4; }; +template <> struct make_upper { using type = cl_uint8; }; +template <> struct make_upper { using type = cl_uint16; }; + +template <> struct make_upper { using type = cl_long; }; +template <> struct make_upper { using type = cl_long2; }; +template <> struct make_upper { using type = cl_long3; }; +template <> struct make_upper { using type = cl_long4; }; +template <> struct make_upper { using type = cl_long8; }; +template <> struct make_upper { using type = cl_long16; }; + +template <> struct make_upper { using type = cl_ulong; }; +template <> struct make_upper { using type = cl_ulong2; }; +template <> struct make_upper { using type = cl_ulong3; }; +template <> struct make_upper { using type = cl_ulong4; }; +template <> struct make_upper { using type = cl_ulong8; }; +template <> struct make_upper { using type = cl_ulong16; }; + +template <> struct make_upper { using type = longlong; }; +template <> struct make_upper { using type = longlong2; }; +template <> struct make_upper { using type = longlong3; }; +template <> struct make_upper { using type = longlong4; }; +template <> struct make_upper { using type = longlong8; }; +template <> struct make_upper { using type = longlong16; }; + +template <> struct make_upper { using type = ulonglong; }; +template <> struct make_upper { using type = ulonglong2; }; +template <> struct make_upper { using type = ulonglong3; }; +template <> struct make_upper { using type = ulonglong4; }; +template <> struct make_upper { using type = ulonglong8; }; +template <> struct make_upper { using type = ulonglong16; }; + +// Try to get pointer_t, otherwise T +template class TryToGetPointerT { + static T check(...); + template static typename A::pointer_t check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get element_type, otherwise T +template class TryToGetElementType { + static T check(...); + template static typename A::element_type check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get vector_t, otherwise T +template class TryToGetVectorT { + static T check(...); + template static typename A::vector_t check(const A &); + +public: + using type = decltype(check(T())); + static constexpr bool value = !std::is_same::value; +}; + +// Try to get pointer_t (if pointer_t indicates on the type with vector_t +// creates a pointer type on vector_t), otherwise T +template class TryToGetPointerVecT { + static T check(...); + template + static typename PtrValueType< + typename TryToGetVectorT::type>::type, + A::address_space>::type * + check(const A &); + +public: + using type = decltype(check(T())); +}; + +template ::value, std::true_type>::type> +typename TryToGetPointerVecT::type TryToGetPointer(T &t) { + // TODO find the better way to get the pointer to underlying data from vec + // class + return reinterpret_cast::type>(t.get()); +} + +template ::value, std::false_type>::type> +T TryToGetPointer(T &t) { + return t; +} + +// Converts T to OpenCL friendly +template +using ConvertToOpenCLType = std::conditional< + TryToGetVectorT::value, typename TryToGetVectorT::type, + typename std::conditional::value, + typename TryToGetPointerVecT::type, T>::type>; + +// Used for all,any and select relational built-in functions +template inline constexpr T msbMask(T) { + using UT = typename std::make_unsigned::type; + return T(UT(1) << (sizeof(T) * 8 - 1)); +} + +template inline constexpr bool msbIsSet(const T x) { + return (x & msbMask(x)); +} + +template +using common_rel_ret_t = typename detail::float_point_to_sign_integral::type; + +// forward declaration +template struct Boolean; + +// Try to get vector element count or 1 otherwise +template class TryToGetNumElements; + +template +struct TryToGetNumElements< + T, typename std::enable_if::value>::type> { + static constexpr int value = T::get_count(); }; -template <> struct make_upper { - using type = cl::sycl::cl_ushort; +template +struct TryToGetNumElements< + T, typename std::enable_if::value>::type> { + static constexpr int value = 1; }; -template <> struct make_upper { - using type = cl::sycl::cl_int; + +// Used for relational comparison built-in functions +template struct RelationalReturnType { +#ifdef __SYCL_DEVICE_ONLY__ + using type = Boolean::value>; +#else + using type = common_rel_ret_t; +#endif }; -template <> struct make_upper { - using type = cl::sycl::cl_uint; + +// Used for select built-in function +template struct SelectWrapperTypeArgC { +#ifdef __SYCL_DEVICE_ONLY__ + using type = Boolean::value>; +#else + using type = T; +#endif }; -template <> struct make_upper { - using type = cl::sycl::cl_long; + +template +using select_arg_c_t = typename SelectWrapperTypeArgC::type; + +template using rel_ret_t = typename RelationalReturnType::type; + +// Used for any and all built-in functions +template struct RelationalTestForSignBitType { +#ifdef __SYCL_DEVICE_ONLY__ + using return_type = detail::Boolean<1>; + using argument_type = detail::Boolean::value>; +#else + using return_type = cl::sycl::cl_int; + using argument_type = T; +#endif }; -template <> struct make_upper { - using type = cl::sycl::cl_ulong; + +template +using rel_sign_bit_test_ret_t = + typename RelationalTestForSignBitType::return_type; + +template +using rel_sign_bit_test_arg_t = + typename RelationalTestForSignBitType::argument_type; + +template struct RelConverter; + +template +struct RelConverter< + T, typename std::enable_if::value>::type> { + static const int N = T::get_count(); +#ifdef __SYCL_DEVICE_ONLY__ + using bool_t = typename Boolean::vector_t; + using ret_t = common_rel_ret_t; +#else + using bool_t = Boolean; + using ret_t = rel_ret_t; +#endif + + static ret_t apply(bool_t value) { +#ifdef __SYCL_DEVICE_ONLY__ + typename ret_t::vector_t result(0); + for (size_t I = 0; I < N; ++I) { + result[I] = 0 - value[I]; + } + return result; +#else + return value; +#endif + } +}; + +template +struct RelConverter< + T, typename std::enable_if::value>::type> { + using R = rel_ret_t; +#ifdef __SYCL_DEVICE_ONLY__ + using value_t = bool; +#else + using value_t = R; +#endif + + static R apply(value_t value) { return value; } }; +template static constexpr T max_v() { + return std::numeric_limits::max(); +} + +template static constexpr T min_v() { + return std::numeric_limits::min(); +} + +template static constexpr T quiet_NaN() { + return std::numeric_limits::quiet_NaN(); +} + } // namespace detail } // namespace sycl } // namespace cl - -#undef __HALF_NO_ENABLED diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 566171a712b0e..c454a5965db89 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -12,15 +12,12 @@ #include #include -#include // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -// TODO Remove when half type will supported by SYCL Runtime -#define NO_HALF_ENABLED - namespace s = cl::sycl; +namespace d = s::detail; #define __MAKE_1V(Fun, Call, N, Ret, Arg1) \ Ret##N Fun __NOEXC(Arg1##N x) { \ @@ -56,6 +53,15 @@ namespace s = cl::sycl; return r; \ } +#define __MAKE_1V_RS(Fun, Call, N, Ret, Arg1) \ + Ret Fun __NOEXC(Arg1##N x) { \ + Ret r = Ret(); \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_rs( \ + r, [](Ret &r, base1_t x) { return cl::__host_std::Call(r, x); }, x); \ + return r; \ + } + #define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \ Ret##N Fun __NOEXC(Arg1##N x, Arg2##N y, Arg3##N z) { \ Ret##N r; \ @@ -71,6 +77,19 @@ namespace s = cl::sycl; return r; \ } +#define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \ + Ret##N Fun __NOEXC(Arg1##N x, Arg2 y, Arg3 z) { \ + Ret##N r; \ + using base1_t = typename Arg1##N::element_type; \ + detail::helper().run_1v_2s_3s( \ + r, \ + [](base1_t x, Arg2 y, Arg3 z) { \ + return cl::__host_std::Fun(x, y, z); \ + }, \ + x, y, z); \ + return r; \ + } + #define __MAKE_1V_2S(Fun, N, Ret, Arg1, Arg2) \ Ret##N Fun __NOEXC(Arg1##N x, Arg2 y) { \ Ret##N r; \ @@ -79,6 +98,7 @@ namespace s = cl::sycl; r, [](base1_t x, Arg2 y) { return cl::__host_std::Fun(x, y); }, x, y); \ return r; \ } + #define __MAKE_SR_1V_AND(Fun, Call, N, Ret, Arg1) \ Ret Fun __NOEXC(Arg1##N x) { \ Ret r; \ @@ -87,6 +107,7 @@ namespace s = cl::sycl; r, [](base_t x) { return cl::__host_std::Call(x); }, x); \ return r; \ } + #define __MAKE_SR_1V_OR(Fun, Call, N, Ret, Arg1) \ Ret Fun __NOEXC(Arg1##N x) { \ Ret r; \ @@ -123,15 +144,15 @@ namespace s = cl::sycl; } #define MAKE_1V(Fun, Ret, Arg1) MAKE_1V_FUNC(Fun, Fun, Ret, Arg1) + #define MAKE_1V_FUNC(Fun, Call, Ret, Arg1) \ __MAKE_1V(Fun, Call, 2, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 3, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 4, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 8, Ret, Arg1) \ - __MAKE_1V(Fun, Call, 16, Ret, Arg1) + __MAKE_1V(Fun, Call, 3, Ret, Arg1) __MAKE_1V(Fun, Call, 4, Ret, Arg1) \ + __MAKE_1V(Fun, Call, 8, Ret, Arg1) __MAKE_1V(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2V(Fun, Ret, Arg1, Arg2) \ MAKE_1V_2V_FUNC(Fun, Fun, Ret, Arg1, Arg2) + #define MAKE_1V_2V_FUNC(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \ @@ -141,6 +162,7 @@ namespace s = cl::sycl; #define MAKE_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + #define MAKE_1V_2V_3V_FUNC(Fun, Call, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 2, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \ @@ -151,47 +173,51 @@ namespace s = cl::sycl; #define MAKE_SC_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3) + #define MAKE_SC_FSC_1V_2V_3V_FV(FunSc, FunV, Ret, Arg1, Arg2, Arg3) \ MAKE_SC_3ARG(FunSc, Ret, Arg1, Arg2, Arg3) \ MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3) + #define MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \ Ret Fun __NOEXC(Arg1 x, Arg2 y, Arg3 z) { return (Ret)__##Fun(x, y, z); } + #define MAKE_1V_2S(Fun, Ret, Arg1, Arg2) \ __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) - -#define MAKE_SR_1V_AND(Fun, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_AND(Fun, Fun, 16, Ret, Arg1) - -#define MAKE_SR_1V_OR(Fun, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 2, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 3, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 4, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 8, Ret, Arg1) \ - __MAKE_SR_1V_OR(Fun, Fun, 16, Ret, Arg1) - -#define MSB_MASK(x) (((decltype(x))1) << ((sizeof(x) * 8) - 1)) + __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2) + +#define MAKE_1V_2S_3S(Fun, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 2, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \ + __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3) + +#define MAKE_SR_1V_AND(Fun, Call, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 2, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_AND(Fun, Call, 16, Ret, Arg1) + +#define MAKE_SR_1V_OR(Fun, Call, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 2, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 3, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 4, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 8, Ret, Arg1) \ + __MAKE_SR_1V_OR(Fun, Call, 16, Ret, Arg1) #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \ __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) + __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \ + __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2) -#define MAKE_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ +#define MAKE_GEO_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 2, Ret, Arg1, Arg2) \ __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 8, Ret, Arg1, Arg2) \ - __MAKE_1V_2V_RS(Fun, Call, 16, Ret, Arg1, Arg2) + __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) #define MAKE_1V_2V_3P(Fun, Ret, Arg1, Arg2, Arg3) \ __MAKE_1V_2V_3P(Fun, 2, Ret, Arg1, Arg2, Arg3) \ @@ -206,30 +232,44 @@ namespace detail { template struct helper { template - void run_1v(Res &r, Op op, T1 x) { + inline void run_1v(Res &r, Op op, T1 x) { helper().run_1v(r, op, x); r.template swizzle() = op(x.template swizzle()); } + template - void run_1v_2v(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y.template swizzle()); } + template - void run_1v_2s(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2s(r, op, x, y); r.template swizzle() = op(x.template swizzle(), y); } + template + inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + helper().run_1v_2s_3s(r, op, x, y, z); + r.template swizzle() = op(x.template swizzle(), y, z); + } + template - void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2v_rs(r, op, x, y); op(r, x.template swizzle(), y.template swizzle()); } + template + inline void run_1v_rs(Res &r, Op op, T1 x) { + helper().run_1v_rs(r, op, x); + op(r, x.template swizzle()); + } + template - void run_1v_2p(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) { helper().run_1v_2p(r, op, x, y); // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; @@ -238,7 +278,7 @@ template struct helper { } template - void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { helper().run_1v_2v_3p(r, op, x, y, z); // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; @@ -248,19 +288,21 @@ template struct helper { } template - void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { helper().run_1v_2v_3v(r, op, x, y, z); r.template swizzle() = op(x.template swizzle(), y.template swizzle(), z.template swizzle()); } + template - void run_1v_sr_or(Res &r, Op op, T1 x) { + inline void run_1v_sr_or(Res &r, Op op, T1 x) { helper().run_1v_sr_or(r, op, x); r = (op(x.template swizzle()) || r); } + template - void run_1v_sr_and(Res &r, Op op, T1 x) { + inline void run_1v_sr_and(Res &r, Op op, T1 x) { helper().run_1v_sr_and(r, op, x); r = (op(x.template swizzle()) && r); } @@ -268,137 +310,568 @@ template struct helper { template <> struct helper<0> { template - void run_1v(Res &r, Op op, T1 x) { + inline void run_1v(Res &r, Op op, T1 x) { r.template swizzle<0>() = op(x.template swizzle<0>()); } + template - void run_1v_2v(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>()); } + template - void run_1v_2s(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) { r.template swizzle<0>() = op(x.template swizzle<0>(), y); } + + template + inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) { + r.template swizzle<0>() = op(x.template swizzle<0>(), y, z); + } + template - void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) { op(r, x.template swizzle<0>(), y.template swizzle<0>()); } + + template + inline void run_1v_rs(Res &r, Op op, T1 x) { + op(r, x.template swizzle<0>()); + } + template - void run_1v_2p(Res &r, Op op, T1 x, T2 y) { + inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) { // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; r.template swizzle<0>() = op(x.template swizzle<0>(), &temp); y->template swizzle<0>() = temp; } + template - void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) { // TODO avoid creating a temporary variable typename std::remove_pointer::type::element_type temp; r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>(), &temp); z->template swizzle<0>() = temp; } + template - void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { + inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) { r.template swizzle<0>() = op(x.template swizzle<0>(), y.template swizzle<0>(), z.template swizzle<0>()); } + template - void run_1v_sr_or(Res &r, Op op, T1 x) { + inline void run_1v_sr_or(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } + template - void run_1v_sr_and(Res &r, Op op, T1 x) { + inline void run_1v_sr_and(Res &r, Op op, T1 x) { r = op(x.template swizzle<0>()); } }; } // namespace detail + +s::cl_float OpDot(s::cl_float2, s::cl_float2); +s::cl_float OpDot(s::cl_float3, s::cl_float3); +s::cl_float OpDot(s::cl_float4, s::cl_float4); +s::cl_double OpDot(s::cl_double2, s::cl_double2); +s::cl_double OpDot(s::cl_double3, s::cl_double3); +s::cl_double OpDot(s::cl_double4, s::cl_double4); +s::cl_half OpDot(s::cl_half2, s::cl_half2); +s::cl_half OpDot(s::cl_half3, s::cl_half3); +s::cl_half OpDot(s::cl_half4, s::cl_half4); + +s::cl_int OpAll(s::cl_int2); +s::cl_int OpAll(s::cl_int3); +s::cl_int OpAll(s::cl_int4); + +namespace { +template inline T __acospi(T x) { return std::acos(x) / M_PI; } + +template inline T __asinpi(T x) { return std::asin(x) / M_PI; } + +template inline T __atanpi(T x) { return std::atan(x) / M_PI; } + +template inline T __atan2pi(T x, T y) { + return std::atan2(x, y) / M_PI; +} + +template inline T __cospi(T x) { return std::cos(M_PI * x); } + +template T inline __fract(T x, T *iptr) { + T f = std::floor(x); + *(iptr) = f; + return std::fmin(x - f, nextafter(T(1.0), T(0.0))); +} + +template inline T __mad(T a, T b, T c) { return (a * b) + c; } + +template inline T __maxmag(T x, T y) { + if (std::fabs(x) > std::fabs(y)) + return x; + if (std::fabs(y) > std::fabs(x)) + return y; + return std::fmax(x, y); +} + +template inline T __minmag(T x, T y) { + if (std::fabs(x) < std::fabs(y)) + return x; + if (std::fabs(y) < std::fabs(x)) + return y; + return std::fmin(x, y); +} + +template inline T __powr(T x, T y) { + return (x >= T(0)) ? T(std::pow(x, y)) : x; +} + +template inline T __rootn(T x, s::cl_int y) { + return std::pow(x, T(1.0) / y); +} + +template inline T __rsqrt(T x) { return T(1.0) / std::sqrt(x); } + +template inline T __sincos(T x, T *cosval) { + (*cosval) = std::cos(x); + return std::sin(x); +} + +template inline T __sinpi(T x) { return std::sin(M_PI * x); } + +template inline T __tanpi(T x) { return std::tan(M_PI * x); } + +template inline T __abs_diff(T x, T y) { return std::abs(x - y); } + +template inline T __u_add_sat(T x, T y) { + return (x < (d::max_v() - y) ? x + y : d::max_v()); +} + +template inline T __s_add_sat(T x, T y) { + if (x > 0 && y > 0) + return (x < (d::max_v() - y) ? (x + y) : d::max_v()); + if (x < 0 && y < 0) + return (x > (d::min_v() - y) ? (x + y) : d::min_v()); + return x + y; +} + +template inline T __hadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y & x) & one); +} + +template inline T __rhadd(T x, T y) { + const T one = 1; + return (x >> one) + (y >> one) + ((y | x) & one); +} + +template inline T __clamp(T x, T minval, T maxval) { + return std::min(std::max(x, minval), maxval); +} + +template inline constexpr T __clz_impl(T x, T m, T n = 0) { + return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n); +} + +template inline constexpr T __clz(T x) { + using UT = typename std::make_unsigned::type; + return (x == T(0)) ? sizeof(T) * 8 : __clz_impl(x, d::msbMask(x)); +} + +template T __mul_hi(T a, T b) { + using UPT = typename d::make_upper::type; + UPT a_s = a; + UPT b_s = b; + UPT mul = a_s * b_s; + return (mul >> (sizeof(T) * 8)); +} + +// T is minimum of 64 bits- long or longlong +template inline T __long_mul_hi(T a, T b) { + int halfsize = (sizeof(T) * 8) / 2; + T a1 = a >> halfsize; + T a0 = (a << halfsize) >> halfsize; + T b1 = b >> halfsize; + T b0 = (b << halfsize) >> halfsize; + + // a1b1 - for bits - [64-128) + // a1b0 a0b1 for bits - [32-96) + // a0b0 for bits - [0-64) + T a1b1 = a1 * b1; + T a0b1 = a0 * b1; + T a1b0 = a1 * b0; + T a0b0 = a0 * b0; + + // To get the upper 64 bits: + // 64 bits from a1b1, upper 32 bits from [a1b0 + (a0b1 + a0b0>>32 (carry bit + // in 33rd bit))] with carry bit on 64th bit - use of hadd. Add the a1b1 to + // the above 32 bit result. + T result = + a1b1 + (__hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1)); + return result; +} + +template inline T __mad_hi(T a, T b, T c) { + return __mul_hi(a, b) + c; +} + +template inline T __long_mad_hi(T a, T b, T c) { + return __long_mul_hi(a, b) + c; +} + +template inline T __s_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT max = d::max_v(); + const UPT min = d::min_v(); + mul = std::min(std::max(mul, min), max); + return __s_add_sat(T(mul), c); +} + +template inline T __s_long_mad_sat(T a, T b, T c) { + bool neg_prod = (a < 0) ^ (b < 0); + T mulhi = __long_mul_hi(a, b); + + // check mul_hi. If it is any value != 0. + // if prod is +ve, any value in mulhi means we need to saturate. + // if prod is -ve, any value in mulhi besides -1 means we need to saturate. + if (!neg_prod && mulhi != 0) + return d::max_v(); + if (neg_prod && mulhi != -1) + return d::max_v(); // essentially some other negative value. + return __s_add_sat(T(a * b), c); +} + +template inline T __u_mad_sat(T a, T b, T c) { + using UPT = typename d::make_upper::type; + UPT mul = UPT(a) * UPT(b); + const UPT min = d::min_v(); + const UPT max = d::max_v(); + mul = std::min(std::max(mul, min), max); + return __u_add_sat(T(mul), c); +} + +template inline T __u_long_mad_sat(T a, T b, T c) { + T mulhi = __long_mul_hi(a, b); + // check mul_hi. If it is any value != 0. + if (mulhi != 0) + return d::max_v(); + return __u_add_sat(T(a * b), c); +} + +template inline T __rotate(T x, T n) { + using UT = typename std::make_unsigned::type; + return (x << n) | (UT(x) >> ((sizeof(x) * 8) - n)); +} + +template inline T __u_sub_sat(T x, T y) { + return (y < (x - d::min_v())) ? (x - y) : d::min_v(); +} + +template inline T __s_sub_sat(T x, T y) { + if (y > 0) + return (y < (x - d::min_v()) ? x - y : d::min_v()); + if (y < 0) + return (y > (x - d::max_v()) ? x - y : d::max_v()); + return x; +} + +template +typename d::make_upper::type inline __upsample(T1 hi, T2 lo) { + using UT = typename d::make_upper::type; + return (UT(hi) << (sizeof(T1) * 8)) | lo; +} + +template inline constexpr T __popcount_impl(T x, size_t n = 0) { + return (x == T(0)) ? n : __popcount_impl(x >> 1, ((x & T(1)) ? ++n : n)); +} + +template inline constexpr T __popcount(T x) { + using UT = typename d::make_unsigned::type; + return __popcount_impl(UT(x)); +} + +template inline T __mad24(T x, T y, T z) { return (x * y) + z; } + +template inline T __mul24(T x, T y) { return (x * y); } + +template inline T __fclamp(T x, T minval, T maxval) { + return std::fmin(std::fmax(x, minval), maxval); +} + +template inline T __degrees(T radians) { + return (180 / M_PI) * radians; +} + +template inline T __mix(T x, T y, T a) { return x + (y - x) * a; } + +template inline T __radians(T degrees) { + return (M_PI / 180) * degrees; +} + +template inline T __step(T edge, T x) { + return (x < edge) ? 0.0 : 1.0; +} + +template inline T __smoothstep(T edge0, T edge1, T x) { + T t; + T v = (x - edge0) / (edge1 - edge0); + t = __fclamp(v, T(0), T(1)); + return t * t * (3 - 2 * t); +} + +template inline T __sign(T x) { + if (std::isnan(x)) + return T(0.0); + if (x > 0) + return T(1.0); + if (x < 0) + return T(-1.0); + /* x is +0.0 or -0.0 */ + return x; +} + +template inline T __cross(T p0, T p1) { + T result(0); + result.x() = p0.y() * p1.z() - p0.z() * p1.y(); + result.y() = p0.z() * p1.x() - p0.x() * p1.z(); + result.z() = p0.x() * p1.y() - p0.y() * p1.x(); + return result; +} + +template inline void __OpFMul_impl(T &r, T p0, T p1) { + r += p0 * p1; +} + +template inline T __OpFMul(T p0, T p1) { + T result = 0; + __OpFMul_impl(result, p0, p1); + return result; +} + +template +inline typename std::enable_if::value, T>::type __length(T t) { + return std::sqrt(__OpFMul(t, t)); +} + +template +inline typename std::enable_if::value, + typename T::element_type>::type +__length(T t) { + return std::sqrt(OpDot(t, t)); +} + +template +inline typename std::enable_if::value, T>::type +__normalize(T t) { + T r = __length(t); + return t / T(r); +} + +template +inline typename std::enable_if::value, T>::type +__normalize(T t) { + typename T::element_type r = __length(t); + return t / T(r); +} + +template +inline typename std::enable_if::value, T>::type +__fast_length(T t) { + return std::sqrt(__OpFMul(t, t)); +} + +template +inline typename std::enable_if::value, + typename T::element_type>::type +__fast_length(T t) { + return std::sqrt(OpDot(t, t)); +} + +template +inline typename std::enable_if::value, T>::type +__fast_normalize(T t) { + if (OpAll(t == T(0.0f))) + return t; + typename T::element_type r = std::sqrt(OpDot(t, t)); + return t / T(r); +} + +template inline T __vOpFOrdEqual(T x, T y) { return -(x == y); } + +template inline T __sOpFOrdEqual(T x, T y) { return x == y; } + +template inline T __vOpFUnordNotEqual(T x, T y) { + return -(x != y); +} + +template inline T __sOpFUnordNotEqual(T x, T y) { return x != y; } + +template inline T __vOpFOrdGreaterThan(T x, T y) { + return -(x > y); +} + +template inline T __sOpFOrdGreaterThan(T x, T y) { return x > y; } + +template inline T __vOpFOrdGreaterThanEqual(T x, T y) { + return -(x >= y); +} + +template inline T __sOpFOrdGreaterThanEqual(T x, T y) { + return x >= y; +} + +template inline T __vOpFOrdLessThanEqual(T x, T y) { + return -(x <= y); +} + +template inline T __sOpFOrdLessThanEqual(T x, T y) { + return x <= y; +} + +template inline T __vOpLessOrGreater(T x, T y) { + return -((x < y) || (x > y)); +} + +template inline T __sOpLessOrGreater(T x, T y) { + return ((x < y) || (x > y)); +} + +template cl_int inline __OpAny(T x) { return d::msbIsSet(x); } +template cl_int inline __OpAll(T x) { return d::msbIsSet(x); } + +template inline T __vOpOrdered(T x, T y) { + return -(!(std::isunordered(x, y))); +} + +template inline T __sOpOrdered(T x, T y) { + return !(std::isunordered(x, y)); +} + +template inline T __vOpUnordered(T x, T y) { + return -(std::isunordered(x, y)); +} + +template inline T __sOpUnordered(T x, T y) { + return std::isunordered(x, y); +} + +template +inline typename std::enable_if::value, T>::type +__bitselect(T a, T b, T c) { + return (a & ~c) | (b & c); +} + +template union databitset; +// float +template <> union databitset { + static_assert(sizeof(uint32_t) == sizeof(float), + "size of float is not equal to 32 bits."); + float f; + uint32_t i; +}; + +// double +template <> union databitset { + static_assert(sizeof(uint64_t) == sizeof(double), + "size of double is not equal to 64 bits."); + double f; + uint64_t i; +}; + +// half +template <> union databitset { + static_assert(sizeof(uint16_t) == sizeof(s::cl_half), + "size of half is not equal to 16 bits."); + s::cl_half f; + uint16_t i; +}; + +template +typename std::enable_if::value, T>::type inline __bitselect( + T a, T b, T c) { + databitset ba; + ba.f = a; + databitset bb; + bb.f = b; + databitset bc; + bc.f = c; + databitset br; + br.f = 0; + br.i = ((ba.i & ~bc.i) | (bb.i & bc.i)); + return br.f; +} + +template inline T2 __OpSelect(T c, T2 b, T2 a) { + return (c ? b : a); +} + +template inline T2 __vOpSelect(T c, T2 b, T2 a) { + return d::msbIsSet(c) ? b : a; +} +} // namespace + /* ----------------- 4.13.3 Math functions. Host version --------------------*/ // acos cl_float acos(s::cl_float x) __NOEXC { return std::acos(x); } cl_double acos(s::cl_double x) __NOEXC { return std::acos(x); } -#ifndef NO_HALF_ENABLED cl_half acos(s::cl_half x) __NOEXC { return std::acos(x); } -#endif MAKE_1V(acos, s::cl_float, s::cl_float) MAKE_1V(acos, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acos, s::cl_half, s::cl_half) -#endif // acosh cl_float acosh(s::cl_float x) __NOEXC { return std::acosh(x); } cl_double acosh(s::cl_double x) __NOEXC { return std::acosh(x); } -#ifndef NO_HALF_ENABLED cl_half acosh(s::cl_half x) __NOEXC { return std::acosh(x); } -#endif MAKE_1V(acosh, s::cl_float, s::cl_float) MAKE_1V(acosh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acosh, s::cl_half, s::cl_half) -#endif // acospi -cl_float acospi(s::cl_float x) __NOEXC { return std::acos(x) / M_PI; } -cl_double acospi(s::cl_double x) __NOEXC { return std::acos(x) / M_PI; } -#ifndef NO_HALF_ENABLED -cl_half acospi(s::cl_half x) __NOEXC { return std::acos(x) / M_PI; } -#endif +cl_float acospi(s::cl_float x) __NOEXC { return __acospi(x); } +cl_double acospi(s::cl_double x) __NOEXC { return __acospi(x); } +cl_half acospi(s::cl_half x) __NOEXC { return __acospi(x); } MAKE_1V(acospi, s::cl_float, s::cl_float) MAKE_1V(acospi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(acospi, s::cl_half, s::cl_half) -#endif // asin cl_float asin(s::cl_float x) __NOEXC { return std::asin(x); } cl_double asin(s::cl_double x) __NOEXC { return std::asin(x); } -#ifndef NO_HALF_ENABLED cl_half asin(s::cl_half x) __NOEXC { return std::asin(x); } -#endif MAKE_1V(asin, s::cl_float, s::cl_float) MAKE_1V(asin, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asin, s::cl_half, s::cl_half) -#endif // asinh cl_float asinh(s::cl_float x) __NOEXC { return std::asinh(x); } cl_double asinh(s::cl_double x) __NOEXC { return std::asinh(x); } -#ifndef NO_HALF_ENABLED cl_half asinh(s::cl_half x) __NOEXC { return std::asinh(x); } -#endif MAKE_1V(asinh, s::cl_float, s::cl_float) MAKE_1V(asinh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asinh, s::cl_half, s::cl_half) -#endif // asinpi -cl_float asinpi(s::cl_float x) __NOEXC { return std::asin(x) / M_PI; } -cl_double asinpi(s::cl_double x) __NOEXC { return std::asin(x) / M_PI; } -#ifndef NO_HALF_ENABLED -cl_half asinpi(s::cl_half x) __NOEXC { return std::asin(x) / M_PI; } -#endif +cl_float asinpi(s::cl_float x) __NOEXC { return __asinpi(x); } +cl_double asinpi(s::cl_double x) __NOEXC { return __asinpi(x); } +cl_half asinpi(s::cl_half x) __NOEXC { return __asinpi(x); } MAKE_1V(asinpi, s::cl_float, s::cl_float) MAKE_1V(asinpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(asinpi, s::cl_half, s::cl_half) -#endif // atan cl_float atan(s::cl_float x) __NOEXC { return std::atan(x); } cl_double atan(s::cl_double x) __NOEXC { return std::atan(x); } -#ifndef NO_HALF_ENABLED cl_half atan(s::cl_half x) __NOEXC { return std::atan(x); } -#endif MAKE_1V(atan, s::cl_float, s::cl_float) MAKE_1V(atan, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atan, s::cl_half, s::cl_half) -#endif // atan2 cl_float atan2(s::cl_float x, s::cl_float y) __NOEXC { @@ -407,80 +880,54 @@ cl_float atan2(s::cl_float x, s::cl_float y) __NOEXC { cl_double atan2(s::cl_double x, s::cl_double y) __NOEXC { return std::atan2(x, y); } -#ifndef NO_HALF_ENABLED cl_half atan2(s::cl_half x, s::cl_half y) __NOEXC { return std::atan2(x, y); } -#endif MAKE_1V_2V(atan2, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(atan2, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(atan2, s::cl_half, s::cl_half, s::cl_half) -#endif // atanh cl_float atanh(s::cl_float x) __NOEXC { return std::atanh(x); } cl_double atanh(s::cl_double x) __NOEXC { return std::atanh(x); } -#ifndef NO_HALF_ENABLED cl_half atanh(s::cl_half x) __NOEXC { return std::atanh(x); } -#endif MAKE_1V(atanh, s::cl_float, s::cl_float) MAKE_1V(atanh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atanh, s::cl_half, s::cl_half) -#endif // atanpi -cl_float atanpi(s::cl_float x) __NOEXC { return std::atan(x) / M_PI; } -cl_double atanpi(s::cl_double x) __NOEXC { return std::atan(x) / M_PI; } -#ifndef NO_HALF_ENABLED -cl_half atanpi(s::cl_half x) __NOEXC { return std::atan(x) / M_PI; } -#endif +cl_float atanpi(s::cl_float x) __NOEXC { return __atanpi(x); } +cl_double atanpi(s::cl_double x) __NOEXC { return __atanpi(x); } +cl_half atanpi(s::cl_half x) __NOEXC { return __atanpi(x); } MAKE_1V(atanpi, s::cl_float, s::cl_float) MAKE_1V(atanpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(atanpi, s::cl_half, s::cl_half) -#endif // atan2pi cl_float atan2pi(s::cl_float x, s::cl_float y) __NOEXC { - return std::atan2(x, y) / M_PI; + return __atan2pi(x, y); } cl_double atan2pi(s::cl_double x, s::cl_double y) __NOEXC { - return std::atan2(x, y) / M_PI; -} -#ifndef NO_HALF_ENABLED -cl_half atan2pi(s::cl_half x, s::cl_half y) __NOEXC { - return std::atan2(x, y) / M_PI; + return __atan2pi(x, y); } -#endif +cl_half atan2pi(s::cl_half x, s::cl_half y) __NOEXC { return __atan2pi(x, y); } MAKE_1V_2V(atan2pi, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(atan2pi, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(atan2pi, s::cl_half, s::cl_half, s::cl_half) -#endif // cbrt cl_float cbrt(s::cl_float x) __NOEXC { return std::cbrt(x); } cl_double cbrt(s::cl_double x) __NOEXC { return std::cbrt(x); } -#ifndef NO_HALF_ENABLED cl_half cbrt(s::cl_half x) __NOEXC { return std::cbrt(x); } -#endif MAKE_1V(cbrt, s::cl_float, s::cl_float) MAKE_1V(cbrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cbrt, s::cl_half, s::cl_half) -#endif // ceil cl_float ceil(s::cl_float x) __NOEXC { return std::ceil(x); } cl_double ceil(s::cl_double x) __NOEXC { return std::ceil(x); } -#ifndef NO_HALF_ENABLED cl_half ceil(s::cl_half x) __NOEXC { return std::ceil(x); } -#endif MAKE_1V(ceil, s::cl_float, s::cl_float) MAKE_1V(ceil, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(ceil, s::cl_half, s::cl_half) -#endif // copysign cl_float copysign(s::cl_float x, s::cl_float y) __NOEXC { @@ -489,162 +936,110 @@ cl_float copysign(s::cl_float x, s::cl_float y) __NOEXC { cl_double copysign(s::cl_double x, s::cl_double y) __NOEXC { return std::copysign(x, y); } -#ifndef NO_HALF_ENABLED cl_half copysign(s::cl_half x, s::cl_half y) __NOEXC { return std::copysign(x, y); } -#endif MAKE_1V_2V(copysign, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(copysign, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(copysign, s::cl_half, s::cl_half, s::cl_half) -#endif // cos cl_float cos(s::cl_float x) __NOEXC { return std::cos(x); } cl_double cos(s::cl_double x) __NOEXC { return std::cos(x); } -#ifndef NO_HALF_ENABLED cl_half cos(s::cl_half x) __NOEXC { return std::cos(x); } -#endif MAKE_1V(cos, s::cl_float, s::cl_float) MAKE_1V(cos, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cos, s::cl_half, s::cl_half) -#endif // cosh cl_float cosh(s::cl_float x) __NOEXC { return std::cosh(x); } cl_double cosh(s::cl_double x) __NOEXC { return std::cosh(x); } -#ifndef NO_HALF_ENABLED cl_half cosh(s::cl_half x) __NOEXC { return std::cosh(x); } -#endif MAKE_1V(cosh, s::cl_float, s::cl_float) MAKE_1V(cosh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cosh, s::cl_half, s::cl_half) -#endif // cospi -cl_float cospi(s::cl_float x) __NOEXC { return std::cos(M_PI * x); } -cl_double cospi(s::cl_double x) __NOEXC { return std::cos(M_PI * x); } -#ifndef NO_HALF_ENABLED -cl_half cospi(s::cl_half x) __NOEXC { return std::cos(M_PI * x); } -#endif +cl_float cospi(s::cl_float x) __NOEXC { return __cospi(x); } +cl_double cospi(s::cl_double x) __NOEXC { return __cospi(x); } +cl_half cospi(s::cl_half x) __NOEXC { return __cospi(x); } MAKE_1V(cospi, s::cl_float, s::cl_float) MAKE_1V(cospi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(cospi, s::cl_half, s::cl_half) -#endif // erfc cl_float erfc(s::cl_float x) __NOEXC { return std::erfc(x); } cl_double erfc(s::cl_double x) __NOEXC { return std::erfc(x); } -#ifndef NO_HALF_ENABLED cl_half erfc(s::cl_half x) __NOEXC { return std::erfc(x); } -#endif MAKE_1V(erfc, s::cl_float, s::cl_float) MAKE_1V(erfc, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(erfc, s::cl_half, s::cl_half) -#endif // erf cl_float erf(s::cl_float x) __NOEXC { return std::erf(x); } cl_double erf(s::cl_double x) __NOEXC { return std::erf(x); } -#ifndef NO_HALF_ENABLED cl_half erf(s::cl_half x) __NOEXC { return std::erf(x); } -#endif MAKE_1V(erf, s::cl_float, s::cl_float) MAKE_1V(erf, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(erf, s::cl_half, s::cl_half) -#endif // exp cl_float exp(s::cl_float x) __NOEXC { return std::exp(x); } cl_double exp(s::cl_double x) __NOEXC { return std::exp(x); } -#ifndef NO_HALF_ENABLED cl_half exp(s::cl_half x) __NOEXC { return std::exp(x); } -#endif MAKE_1V(exp, s::cl_float, s::cl_float) MAKE_1V(exp, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp, s::cl_half, s::cl_half) -#endif // exp2 cl_float exp2(s::cl_float x) __NOEXC { return std::exp2(x); } cl_double exp2(s::cl_double x) __NOEXC { return std::exp2(x); } -#ifndef NO_HALF_ENABLED cl_half exp2(s::cl_half x) __NOEXC { return std::exp2(x); } -#endif MAKE_1V(exp2, s::cl_float, s::cl_float) MAKE_1V(exp2, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp2, s::cl_half, s::cl_half) -#endif // exp10 cl_float exp10(s::cl_float x) __NOEXC { return std::pow(10, x); } cl_double exp10(s::cl_double x) __NOEXC { return std::pow(10, x); } -#ifndef NO_HALF_ENABLED cl_half exp10(s::cl_half x) __NOEXC { return std::pow(10, x); } -#endif MAKE_1V(exp10, s::cl_float, s::cl_float) MAKE_1V(exp10, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(exp10, s::cl_half, s::cl_half) -#endif // expm1 cl_float expm1(s::cl_float x) __NOEXC { return std::expm1(x); } cl_double expm1(s::cl_double x) __NOEXC { return std::expm1(x); } -#ifndef NO_HALF_ENABLED cl_half expm1(s::cl_half x) __NOEXC { return std::expm1(x); } -#endif MAKE_1V(expm1, s::cl_float, s::cl_float) MAKE_1V(expm1, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(expm1, s::cl_half, s::cl_half) -#endif // fabs cl_float fabs(s::cl_float x) __NOEXC { return std::fabs(x); } cl_double fabs(s::cl_double x) __NOEXC { return std::fabs(x); } -#ifndef NO_HALF_ENABLED cl_half fabs(s::cl_half x) __NOEXC { return std::fabs(x); } -#endif MAKE_1V(fabs, s::cl_float, s::cl_float) MAKE_1V(fabs, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(fabs, s::cl_half, s::cl_half) -#endif // fdim cl_float fdim(s::cl_float x, s::cl_float y) __NOEXC { return std::fdim(x, y); } cl_double fdim(s::cl_double x, s::cl_double y) __NOEXC { return std::fdim(x, y); } -#ifndef NO_HALF_ENABLED cl_half fdim(s::cl_half x, s::cl_half y) __NOEXC { return std::fdim(x, y); } -#endif MAKE_1V_2V(fdim, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fdim, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fdim, s::cl_half, s::cl_half, s::cl_half) -#endif // floor cl_float floor(s::cl_float x) __NOEXC { return std::floor(x); } cl_double floor(s::cl_double x) __NOEXC { return std::floor(x); } -#ifndef NO_HALF_ENABLED cl_half floor(s::cl_half x) __NOEXC { return std::floor(x); } -#endif MAKE_1V(floor, s::cl_float, s::cl_float) MAKE_1V(floor, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(floor, s::cl_half, s::cl_half) -#endif // fma cl_float fma(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { @@ -653,82 +1048,71 @@ cl_float fma(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { cl_double fma(s::cl_double a, s::cl_double b, s::cl_double c) __NOEXC { return std::fma(a, b, c); } -#ifndef NO_HALF_ENABLED cl_half fma(s::cl_half a, s::cl_half b, s::cl_half c) __NOEXC { return std::fma(a, b, c); } -#endif MAKE_1V_2V_3V(fma, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(fma, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(fma, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // fmax cl_float fmax(s::cl_float x, s::cl_float y) __NOEXC { return std::fmax(x, y); } cl_double fmax(s::cl_double x, s::cl_double y) __NOEXC { return std::fmax(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmax(s::cl_half x, s::cl_half y) __NOEXC { return std::fmax(x, y); } -#endif MAKE_1V_2V(fmax, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmax, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmax, s::cl_half, s::cl_half, s::cl_half) -#endif // fmin cl_float fmin(s::cl_float x, s::cl_float y) __NOEXC { return std::fmin(x, y); } cl_double fmin(s::cl_double x, s::cl_double y) __NOEXC { return std::fmin(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmin(s::cl_half x, s::cl_half y) __NOEXC { return std::fmin(x, y); } -#endif MAKE_1V_2V(fmin, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmin, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmin, s::cl_half, s::cl_half, s::cl_half) -#endif // fmod cl_float fmod(s::cl_float x, s::cl_float y) __NOEXC { return std::fmod(x, y); } cl_double fmod(s::cl_double x, s::cl_double y) __NOEXC { return std::fmod(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmod(s::cl_half x, s::cl_half y) __NOEXC { return std::fmod(x, y); } -#endif MAKE_1V_2V(fmod, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmod, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmod, s::cl_half, s::cl_half, s::cl_half) -#endif -/* fract - disabled until proper C++11 compatible implementation + +// nextafter +cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { + return std::nextafter(x, y); +} +cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { + return std::nextafter(x, y); +} +cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { + return std::nextafter(x, y); +} +MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) +MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) +MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) + +// fract cl_float fract(s::cl_float x, s::cl_float *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } cl_double fract(s::cl_double x, s::cl_double *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } -#ifdef __HAFL_ENABLED cl_half fract(s::cl_half x, s::cl_half *iptr) __NOEXC { - decltype(x) f = std::floor(x); - iptr[0] = f; - return std::fmin(x - f, 0x1.fffffep-1f); + return __fract(x, iptr); } -#endif MAKE_1V_2P(fract, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(fract, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED MAKE_1V_2P(fract, s::cl_half, s::cl_half, s::cl_half) -#endif -*/ + // frexp cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); @@ -736,16 +1120,12 @@ cl_float frexp(s::cl_float x, s::cl_int *exp) __NOEXC { cl_double frexp(s::cl_double x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } -#ifdef __HAFL_ENABLED cl_half frexp(s::cl_half x, s::cl_int *exp) __NOEXC { return std::frexp(x, exp); } -#endif MAKE_1V_2P(frexp, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(frexp, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED MAKE_1V_2P(frexp, s::cl_half, s::cl_half, s::cl_int) -#endif // hypot cl_float hypot(s::cl_float x, s::cl_float y) __NOEXC { @@ -754,52 +1134,36 @@ cl_float hypot(s::cl_float x, s::cl_float y) __NOEXC { cl_double hypot(s::cl_double x, s::cl_double y) __NOEXC { return std::hypot(x, y); } -#ifndef NO_HALF_ENABLED cl_half hypot(s::cl_half x, s::cl_half y) __NOEXC { return std::hypot(x, y); } -#endif MAKE_1V_2V(hypot, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(hypot, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(hypot, s::cl_half, s::cl_half, s::cl_half) -#endif // ilogb cl_int ilogb(s::cl_float x) __NOEXC { return std::ilogb(x); } cl_int ilogb(s::cl_double x) __NOEXC { return std::ilogb(x); } -#ifndef NO_HALF_ENABLED cl_int ilogb(s::cl_half x) __NOEXC { return std::ilogb(x); } -#endif MAKE_1V(ilogb, s::cl_int, s::cl_float) MAKE_1V(ilogb, s::cl_int, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(ilogb, s::cl_int, s::cl_half) -#endif // ldexp cl_float ldexp(s::cl_float x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } cl_double ldexp(s::cl_double x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } -#ifndef NO_HALF_ENABLED cl_half ldexp(s::cl_half x, s::cl_int k) __NOEXC { return std::ldexp(x, k); } -#endif MAKE_1V_2V(ldexp, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(ldexp, s::cl_double, s::cl_double, s::cl_int) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(ldexp, s::cl_half, s::cl_half, s::cl_int) -#endif // lgamma cl_float lgamma(s::cl_float x) __NOEXC { return std::lgamma(x); } cl_double lgamma(s::cl_double x) __NOEXC { return std::lgamma(x); } -#ifndef NO_HALF_ENABLED cl_half lgamma(s::cl_half x) __NOEXC { return std::lgamma(x); } -#endif MAKE_1V(lgamma, s::cl_float, s::cl_float) MAKE_1V(lgamma, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(lgamma, s::cl_half, s::cl_half) -#endif // lgamma_r cl_float lgamma_r(s::cl_float x, s::cl_int *signp) __NOEXC { @@ -808,166 +1172,86 @@ cl_float lgamma_r(s::cl_float x, s::cl_int *signp) __NOEXC { cl_double lgamma_r(s::cl_double x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } -#ifdef __HAFL_ENABLED cl_half lgamma_r(s::cl_half x, s::cl_int *signp) __NOEXC { return ::lgamma_r(x, signp); } -#endif MAKE_1V_2P(lgamma_r, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2P(lgamma_r, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED MAKE_1V_2P(lgamma_r, s::cl_half, s::cl_half, s::cl_int) -#endif // log cl_float log(s::cl_float x) __NOEXC { return std::log(x); } cl_double log(s::cl_double x) __NOEXC { return std::log(x); } -#ifndef NO_HALF_ENABLED cl_half log(s::cl_half x) __NOEXC { return std::log(x); } -#endif MAKE_1V(log, s::cl_float, s::cl_float) MAKE_1V(log, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log, s::cl_half, s::cl_half) -#endif // log2 cl_float log2(s::cl_float x) __NOEXC { return std::log2(x); } cl_double log2(s::cl_double x) __NOEXC { return std::log2(x); } -#ifndef NO_HALF_ENABLED cl_half log2(s::cl_half x) __NOEXC { return std::log2(x); } -#endif MAKE_1V(log2, s::cl_float, s::cl_float) MAKE_1V(log2, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log2, s::cl_half, s::cl_half) -#endif // log10 cl_float log10(s::cl_float x) __NOEXC { return std::log10(x); } cl_double log10(s::cl_double x) __NOEXC { return std::log10(x); } -#ifndef NO_HALF_ENABLED cl_half log10(s::cl_half x) __NOEXC { return std::log10(x); } -#endif MAKE_1V(log10, s::cl_float, s::cl_float) MAKE_1V(log10, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log10, s::cl_half, s::cl_half) -#endif // log1p cl_float log1p(s::cl_float x) __NOEXC { return std::log1p(x); } cl_double log1p(s::cl_double x) __NOEXC { return std::log1p(x); } -#ifndef NO_HALF_ENABLED cl_half log1p(s::cl_half x) __NOEXC { return std::log1p(x); } -#endif MAKE_1V(log1p, s::cl_float, s::cl_float) MAKE_1V(log1p, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(log1p, s::cl_half, s::cl_half) -#endif // logb cl_float logb(s::cl_float x) __NOEXC { return std::logb(x); } cl_double logb(s::cl_double x) __NOEXC { return std::logb(x); } -#ifndef NO_HALF_ENABLED cl_half logb(s::cl_half x) __NOEXC { return std::logb(x); } -#endif MAKE_1V(logb, s::cl_float, s::cl_float) MAKE_1V(logb, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(logb, s::cl_half, s::cl_half) -#endif // mad cl_float mad(s::cl_float a, s::cl_float b, s::cl_float c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } cl_double mad(s::cl_double a, s::cl_double b, s::cl_double c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } -#ifndef NO_HALF_ENABLED cl_half mad(s::cl_half a, s::cl_half b, s::cl_half c) __NOEXC { - return (a * b) + c; + return __mad(a, b, c); } -#endif MAKE_1V_2V_3V(mad, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(mad, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(mad, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // maxmag -cl_float maxmag(s::cl_float x, s::cl_float y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } -} +cl_float maxmag(s::cl_float x, s::cl_float y) __NOEXC { return __maxmag(x, y); } cl_double maxmag(s::cl_double x, s::cl_double y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } -} -#ifndef NO_HALF_ENABLED -cl_half maxmag(s::cl_half x, s::cl_half y) __NOEXC { - if (std::fabs(x) > std::fabs(y)) { - return x; - } else if (std::fabs(y) > std::fabs(x)) { - return y; - } else { - return std::fmax(x, y); - } + return __maxmag(x, y); } -#endif +cl_half maxmag(s::cl_half x, s::cl_half y) __NOEXC { return __maxmag(x, y); } MAKE_1V_2V(maxmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(maxmag, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(maxmag, s::cl_half, s::cl_half, s::cl_half) -#endif // minmag -cl_float minmag(s::cl_float x, s::cl_float y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } -} +cl_float minmag(s::cl_float x, s::cl_float y) __NOEXC { return __minmag(x, y); } cl_double minmag(s::cl_double x, s::cl_double y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } -} -#ifndef NO_HALF_ENABLED -cl_half minmag(s::cl_half x, s::cl_half y) __NOEXC { - if (std::fabs(x) < std::fabs(y)) { - return x; - } else if (std::fabs(y) < std::fabs(x)) { - return y; - } else { - return std::fmin(x, y); - } + return __minmag(x, y); } -#endif +cl_half minmag(s::cl_half x, s::cl_half y) __NOEXC { return __minmag(x, y); } MAKE_1V_2V(minmag, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(minmag, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(minmag, s::cl_half, s::cl_half, s::cl_half) -#endif // modf cl_float modf(s::cl_float x, s::cl_float *iptr) __NOEXC { @@ -976,96 +1260,48 @@ cl_float modf(s::cl_float x, s::cl_float *iptr) __NOEXC { cl_double modf(s::cl_double x, s::cl_double *iptr) __NOEXC { return std::modf(x, iptr); } -#ifdef __HAFL_ENABLED cl_half modf(s::cl_half x, s::cl_half *iptr) __NOEXC { - return std::modf(x, iptr); + return std::modf(x, reinterpret_cast(iptr)); } -#endif MAKE_1V_2P(modf, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(modf, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED MAKE_1V_2P(modf, s::cl_half, s::cl_half, s::cl_half) -#endif // nan -cl_float nan(s::cl_uint nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); -} -cl_double nan(s::cl_ulong nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); -} -cl_double nan(s::ulonglong nancode) __NOEXC { - return std::numeric_limits::quiet_NaN(); +cl_float nan(s::cl_uint nancode) __NOEXC { return d::quiet_NaN(); } +cl_double nan(s::cl_ulong nancode) __NOEXC { return d::quiet_NaN(); } +cl_double nan(s::ulonglong nancode) __NOEXC { return d::quiet_NaN(); } +cl_half nan(s::cl_ushort nancode) __NOEXC { + return s::cl_half(d::quiet_NaN()); } -#ifdef __HAFL_ENABLED -cl_half nan(s::cl_ushort nancode) __NOEXC { return NAN; } -#endif MAKE_1V(nan, s::cl_float, s::cl_uint) MAKE_1V(nan, s::cl_double, s::cl_ulong) MAKE_1V(nan, s::cl_double, s::ulonglong) -#ifdef __HAFL_ENABLED MAKE_1V(nan, s::cl_half, s::cl_ushort) -#endif - -// nextafter -cl_float nextafter(s::cl_float x, s::cl_float y) __NOEXC { - return std::nextafter(x, y); -} -cl_double nextafter(s::cl_double x, s::cl_double y) __NOEXC { - return std::nextafter(x, y); -} -#ifdef __HAFL_ENABLED -cl_half nextafter(s::cl_half x, s::cl_half y) __NOEXC { - return std::nextafter(x, y); -} -#endif -MAKE_1V_2V(nextafter, s::cl_float, s::cl_float, s::cl_float) -MAKE_1V_2V(nextafter, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED -MAKE_1V_2V(nextafter, s::cl_half, s::cl_half, s::cl_half) -#endif // pow cl_float pow(s::cl_float x, s::cl_float y) __NOEXC { return std::pow(x, y); } cl_double pow(s::cl_double x, s::cl_double y) __NOEXC { return std::pow(x, y); } -#ifdef __HAFL_ENABLED cl_half pow(s::cl_half x, s::cl_half y) __NOEXC { return std::pow(x, y); } -#endif MAKE_1V_2V(pow, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(pow, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED MAKE_1V_2V(pow, s::cl_half, s::cl_half, s::cl_half) -#endif // pown cl_float pown(s::cl_float x, s::cl_int y) __NOEXC { return std::pow(x, y); } cl_double pown(s::cl_double x, s::cl_int y) __NOEXC { return std::pow(x, y); } -#ifdef __HAFL_ENABLED cl_half pown(s::cl_half x, s::cl_int y) __NOEXC { return std::pow(x, y); } -#endif MAKE_1V_2V(pown, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(pown, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED MAKE_1V_2V(pown, s::cl_half, s::cl_half, s::cl_int) -#endif // powr -cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); -} -cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); -} -#ifdef __HAFL_ENABLED -cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { - return (x >= 0 ? std::pow(x, y) : x); -} -#endif +cl_float powr(s::cl_float x, s::cl_float y) __NOEXC { return __powr(x, y); } +cl_double powr(s::cl_double x, s::cl_double y) __NOEXC { return __powr(x, y); } +cl_half powr(s::cl_half x, s::cl_half y) __NOEXC { return __powr(x, y); } MAKE_1V_2V(powr, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(powr, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED MAKE_1V_2V(powr, s::cl_half, s::cl_half, s::cl_half) -#endif // remainder cl_float remainder(s::cl_float x, s::cl_float y) __NOEXC { @@ -1074,16 +1310,12 @@ cl_float remainder(s::cl_float x, s::cl_float y) __NOEXC { cl_double remainder(s::cl_double x, s::cl_double y) __NOEXC { return std::remainder(x, y); } -#ifdef __HAFL_ENABLED cl_half remainder(s::cl_half x, s::cl_half y) __NOEXC { return std::remainder(x, y); } -#endif MAKE_1V_2V(remainder, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(remainder, s::cl_double, s::cl_double, s::cl_double) -#ifdef __HAFL_ENABLED MAKE_1V_2V(remainder, s::cl_half, s::cl_half, s::cl_half) -#endif // remquo cl_float remquo(s::cl_float x, s::cl_float y, s::cl_int *quo) __NOEXC { @@ -1092,214 +1324,520 @@ cl_float remquo(s::cl_float x, s::cl_float y, s::cl_int *quo) __NOEXC { cl_double remquo(s::cl_double x, s::cl_double y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } -#ifdef __HAFL_ENABLED cl_half remquo(s::cl_half x, s::cl_half y, s::cl_int *quo) __NOEXC { return std::remquo(x, y, quo); } -#endif MAKE_1V_2V_3P(remquo, s::cl_float, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V_3P(remquo, s::cl_double, s::cl_double, s::cl_double, s::cl_int) -#ifdef __HAFL_ENABLED MAKE_1V_2V_3P(remquo, s::cl_half, s::cl_half, s::cl_half, s::cl_int) -#endif // rint cl_float rint(s::cl_float x) __NOEXC { return std::rint(x); } cl_double rint(s::cl_double x) __NOEXC { return std::rint(x); } -#ifndef NO_HALF_ENABLED cl_half rint(s::cl_half x) __NOEXC { return std::rint(x); } -#endif MAKE_1V(rint, s::cl_float, s::cl_float) MAKE_1V(rint, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(rint, s::cl_half, s::cl_half) -#endif // rootn -cl_float rootn(s::cl_float x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} -cl_double rootn(s::cl_double x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} -#ifndef NO_HALF_ENABLED -cl_half rootn(s::cl_half x, s::cl_int y) __NOEXC { - return std::pow(x, 1.0 / y); -} -#endif +cl_float rootn(s::cl_float x, s::cl_int y) __NOEXC { return __rootn(x, y); } +cl_double rootn(s::cl_double x, s::cl_int y) __NOEXC { return __rootn(x, y); } +cl_half rootn(s::cl_half x, s::cl_int y) __NOEXC { return __rootn(x, y); } MAKE_1V_2V(rootn, s::cl_float, s::cl_float, s::cl_int) MAKE_1V_2V(rootn, s::cl_double, s::cl_double, s::cl_int) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(rootn, s::cl_half, s::cl_half, s::cl_int) -#endif // round cl_float round(s::cl_float x) __NOEXC { return std::round(x); } cl_double round(s::cl_double x) __NOEXC { return std::round(x); } -#ifndef NO_HALF_ENABLED cl_half round(s::cl_half x) __NOEXC { return std::round(x); } -#endif MAKE_1V(round, s::cl_float, s::cl_float) MAKE_1V(round, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(round, s::cl_half, s::cl_half) -#endif // rsqrt -cl_float rsqrt(s::cl_float x) __NOEXC { return 1.0 / std::sqrt(x); } -cl_double rsqrt(s::cl_double x) __NOEXC { return 1.0 / std::sqrt(x); } -#ifndef NO_HALF_ENABLED -cl_half rsqrt(s::cl_half x) __NOEXC { return 1.0 / std::sqrt(x); } -#endif +cl_float rsqrt(s::cl_float x) __NOEXC { return __rsqrt(x); } +cl_double rsqrt(s::cl_double x) __NOEXC { return __rsqrt(x); } +cl_half rsqrt(s::cl_half x) __NOEXC { return __rsqrt(x); } MAKE_1V(rsqrt, s::cl_float, s::cl_float) MAKE_1V(rsqrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(rsqrt, s::cl_half, s::cl_half) -#endif // sin cl_float sin(s::cl_float x) __NOEXC { return std::sin(x); } cl_double sin(s::cl_double x) __NOEXC { return std::sin(x); } -#ifndef NO_HALF_ENABLED cl_half sin(s::cl_half x) __NOEXC { return std::sin(x); } -#endif MAKE_1V(sin, s::cl_float, s::cl_float) MAKE_1V(sin, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sin, s::cl_half, s::cl_half) -#endif // sincos cl_float sincos(s::cl_float x, s::cl_float *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } cl_double sincos(s::cl_double x, s::cl_double *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } -#ifndef NO_HALF_ENABLED cl_half sincos(s::cl_half x, s::cl_half *cosval) __NOEXC { - cosval[0] = std::cos(x); - return std::sin(x); + return __sincos(x, cosval); } -#endif MAKE_1V_2P(sincos, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2P(sincos, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2P(sincos, s::cl_half, s::cl_half, s::cl_half) -#endif // sinh cl_float sinh(s::cl_float x) __NOEXC { return std::sinh(x); } cl_double sinh(s::cl_double x) __NOEXC { return std::sinh(x); } -#ifndef NO_HALF_ENABLED cl_half sinh(s::cl_half x) __NOEXC { return std::sinh(x); } -#endif MAKE_1V(sinh, s::cl_float, s::cl_float) MAKE_1V(sinh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sinh, s::cl_half, s::cl_half) -#endif // sinpi -cl_float sinpi(s::cl_float x) __NOEXC { return std::sin(M_PI * x); } -cl_double sinpi(s::cl_double x) __NOEXC { return std::sin(M_PI * x); } -#ifndef NO_HALF_ENABLED -cl_half sinpi(s::cl_half x) __NOEXC { return std::sin(M_PI * x); } -#endif +cl_float sinpi(s::cl_float x) __NOEXC { return __sinpi(x); } +cl_double sinpi(s::cl_double x) __NOEXC { return __sinpi(x); } +cl_half sinpi(s::cl_half x) __NOEXC { return __sinpi(x); } MAKE_1V(sinpi, s::cl_float, s::cl_float) MAKE_1V(sinpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sinpi, s::cl_half, s::cl_half) -#endif // sqrt cl_float sqrt(s::cl_float x) __NOEXC { return std::sqrt(x); } cl_double sqrt(s::cl_double x) __NOEXC { return std::sqrt(x); } -#ifndef NO_HALF_ENABLED cl_half sqrt(s::cl_half x) __NOEXC { return std::sqrt(x); } -#endif MAKE_1V(sqrt, s::cl_float, s::cl_float) MAKE_1V(sqrt, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sqrt, s::cl_half, s::cl_half) -#endif // tan cl_float tan(s::cl_float x) __NOEXC { return std::tan(x); } cl_double tan(s::cl_double x) __NOEXC { return std::tan(x); } -#ifndef NO_HALF_ENABLED cl_half tan(s::cl_half x) __NOEXC { return std::tan(x); } -#endif MAKE_1V(tan, s::cl_float, s::cl_float) MAKE_1V(tan, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tan, s::cl_half, s::cl_half) -#endif // tanh cl_float tanh(s::cl_float x) __NOEXC { return std::tanh(x); } cl_double tanh(s::cl_double x) __NOEXC { return std::tanh(x); } -#ifndef NO_HALF_ENABLED cl_half tanh(s::cl_half x) __NOEXC { return std::tanh(x); } -#endif MAKE_1V(tanh, s::cl_float, s::cl_float) MAKE_1V(tanh, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tanh, s::cl_half, s::cl_half) -#endif // tanpi -cl_float tanpi(s::cl_float x) __NOEXC { return std::tan(M_PI * x); } -cl_double tanpi(s::cl_double x) __NOEXC { return std::tan(M_PI * x); } -#ifndef NO_HALF_ENABLED -cl_half tanpi(s::cl_half x) __NOEXC { return std::tan(M_PI * x); } -#endif +cl_float tanpi(s::cl_float x) __NOEXC { return __tanpi(x); } +cl_double tanpi(s::cl_double x) __NOEXC { return __tanpi(x); } +cl_half tanpi(s::cl_half x) __NOEXC { return __tanpi(x); } MAKE_1V(tanpi, s::cl_float, s::cl_float) MAKE_1V(tanpi, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tanpi, s::cl_half, s::cl_half) -#endif // tgamma cl_float tgamma(s::cl_float x) __NOEXC { return std::tgamma(x); } cl_double tgamma(s::cl_double x) __NOEXC { return std::tgamma(x); } -#ifndef NO_HALF_ENABLED cl_half tgamma(s::cl_half x) __NOEXC { return std::tgamma(x); } -#endif MAKE_1V(tgamma, s::cl_float, s::cl_float) MAKE_1V(tgamma, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(tgamma, s::cl_half, s::cl_half) -#endif // trunc cl_float trunc(s::cl_float x) __NOEXC { return std::trunc(x); } cl_double trunc(s::cl_double x) __NOEXC { return std::trunc(x); } -#ifndef NO_HALF_ENABLED cl_half trunc(s::cl_half x) __NOEXC { return std::trunc(x); } -#endif MAKE_1V(trunc, s::cl_float, s::cl_float) MAKE_1V(trunc, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(trunc, s::cl_half, s::cl_half) -#endif /* --------------- 4.13.4 Integer functions. Host version -------------------*/ +// u_abs +cl_uchar u_abs(s::cl_uchar x) __NOEXC { return x; } +cl_ushort u_abs(s::cl_ushort x) __NOEXC { return x; } +cl_uint u_abs(s::cl_uint x) __NOEXC { return x; } +cl_ulong u_abs(s::cl_ulong x) __NOEXC { return x; } +s::ulonglong u_abs(s::ulonglong x) __NOEXC { return x; } +MAKE_1V(u_abs, s::cl_uchar, s::cl_uchar) +MAKE_1V(u_abs, s::cl_ushort, s::cl_ushort) +MAKE_1V(u_abs, s::cl_uint, s::cl_uint) +MAKE_1V(u_abs, s::cl_ulong, s::cl_ulong) +MAKE_1V(u_abs, s::ulonglong, s::ulonglong) + +// s_abs +cl_uchar s_abs(s::cl_char x) __NOEXC { return std::abs(x); } +cl_ushort s_abs(s::cl_short x) __NOEXC { return std::abs(x); } +cl_uint s_abs(s::cl_int x) __NOEXC { return std::abs(x); } +cl_ulong s_abs(s::cl_long x) __NOEXC { return std::abs(x); } +s::ulonglong s_abs(s::longlong x) __NOEXC { return std::abs(x); } +MAKE_1V(s_abs, s::cl_uchar, s::cl_char) +MAKE_1V(s_abs, s::cl_ushort, s::cl_short) +MAKE_1V(s_abs, s::cl_uint, s::cl_int) +MAKE_1V(s_abs, s::cl_ulong, s::cl_long) +MAKE_1V(s_abs, s::ulonglong, s::longlong) + +// u_abs_diff +cl_uchar u_abs_diff(s::cl_uchar x, s::cl_uchar y) __NOEXC { return x - y; } +cl_ushort u_abs_diff(s::cl_ushort x, s::cl_ushort y) __NOEXC { return x - y; } +cl_uint u_abs_diff(s::cl_uint x, s::cl_uint y) __NOEXC { return x - y; } +cl_ulong u_abs_diff(s::cl_ulong x, s::cl_ulong y) __NOEXC { return x - y; } +s::ulonglong u_abs_diff(s::ulonglong x, s::ulonglong y) __NOEXC { + return x - y; +} +MAKE_1V_2V(u_abs_diff, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_abs_diff, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_abs_diff, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_abs_diff, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_abs_diff, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_abs_diff +cl_uchar s_abs_diff(s::cl_char x, s::cl_char y) __NOEXC { + return __abs_diff(x, y); +} +cl_ushort s_abs_diff(s::cl_short x, s::cl_short y) __NOEXC { + return __abs_diff(x, y); +} +cl_uint s_abs_diff(s::cl_int x, s::cl_int y) __NOEXC { + return __abs_diff(x, y); +} +cl_ulong s_abs_diff(s::cl_long x, s::cl_long y) __NOEXC { + return __abs_diff(x, y); +} +s::ulonglong s_abs_diff(s::longlong x, s::longlong y) __NOEXC { + return __abs_diff(x, y); +} +MAKE_1V_2V(s_abs_diff, s::cl_uchar, s::cl_char, s::cl_char) +MAKE_1V_2V(s_abs_diff, s::cl_ushort, s::cl_short, s::cl_short) +MAKE_1V_2V(s_abs_diff, s::cl_uint, s::cl_int, s::cl_int) +MAKE_1V_2V(s_abs_diff, s::cl_ulong, s::cl_long, s::cl_long) +MAKE_1V_2V(s_abs_diff, s::ulonglong, s::longlong, s::longlong) + +// u_add_sat +cl_uchar u_add_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __u_add_sat(x, y); +} +cl_ushort u_add_sat(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __u_add_sat(x, y); +} +cl_uint u_add_sat(s::cl_uint x, s::cl_uint y) __NOEXC { + return __u_add_sat(x, y); +} +cl_ulong u_add_sat(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __u_add_sat(x, y); +} +s::ulonglong u_add_sat(s::ulonglong x, s::ulonglong y) __NOEXC { + return __u_add_sat(x, y); +} +MAKE_1V_2V(u_add_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_add_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_add_sat, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_add_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_add_sat, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_add_sat +cl_char s_add_sat(s::cl_char x, s::cl_char y) __NOEXC { + return __s_add_sat(x, y); +} +cl_short s_add_sat(s::cl_short x, s::cl_short y) __NOEXC { + return __s_add_sat(x, y); +} +cl_int s_add_sat(s::cl_int x, s::cl_int y) __NOEXC { return __s_add_sat(x, y); } +cl_long s_add_sat(s::cl_long x, s::cl_long y) __NOEXC { + return __s_add_sat(x, y); +} +s::longlong s_add_sat(s::longlong x, s::longlong y) __NOEXC { + return __s_add_sat(x, y); +} +MAKE_1V_2V(s_add_sat, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_add_sat, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_add_sat, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_add_sat, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_add_sat, s::longlong, s::longlong, s::longlong) + +// u_hadd +cl_uchar u_hadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __hadd(x, y); } +cl_ushort u_hadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __hadd(x, y); +} +cl_uint u_hadd(s::cl_uint x, s::cl_uint y) __NOEXC { return __hadd(x, y); } +cl_ulong u_hadd(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __hadd(x, y); } +s::ulonglong u_hadd(s::ulonglong x, s::ulonglong y) __NOEXC { + return __hadd(x, y); +} +MAKE_1V_2V(u_hadd, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_hadd, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_hadd, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_hadd, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_hadd, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_hadd +cl_char s_hadd(s::cl_char x, s::cl_char y) __NOEXC { return __hadd(x, y); } +cl_short s_hadd(s::cl_short x, s::cl_short y) __NOEXC { return __hadd(x, y); } +cl_int s_hadd(s::cl_int x, s::cl_int y) __NOEXC { return __hadd(x, y); } +cl_long s_hadd(s::cl_long x, s::cl_long y) __NOEXC { return __hadd(x, y); } +s::longlong s_hadd(s::longlong x, s::longlong y) __NOEXC { + return __hadd(x, y); +} +MAKE_1V_2V(s_hadd, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_hadd, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_hadd, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_hadd, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_hadd, s::longlong, s::longlong, s::longlong) + +// u_rhadd +cl_uchar u_rhadd(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rhadd(x, y); } +cl_ushort u_rhadd(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __rhadd(x, y); +} +cl_uint u_rhadd(s::cl_uint x, s::cl_uint y) __NOEXC { return __rhadd(x, y); } +cl_ulong u_rhadd(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __rhadd(x, y); } +s::ulonglong u_rhadd(s::ulonglong x, s::ulonglong y) __NOEXC { + return __rhadd(x, y); +} +MAKE_1V_2V(u_rhadd, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_rhadd, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_rhadd, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_rhadd, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_rhadd, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_rhadd +cl_char s_rhadd(s::cl_char x, s::cl_char y) __NOEXC { return __rhadd(x, y); } +cl_short s_rhadd(s::cl_short x, s::cl_short y) __NOEXC { return __rhadd(x, y); } +cl_int s_rhadd(s::cl_int x, s::cl_int y) __NOEXC { return __rhadd(x, y); } +cl_long s_rhadd(s::cl_long x, s::cl_long y) __NOEXC { return __rhadd(x, y); } +s::longlong s_rhadd(s::longlong x, s::longlong y) __NOEXC { + return __rhadd(x, y); +} +MAKE_1V_2V(s_rhadd, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_rhadd, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_rhadd, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_rhadd, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_rhadd, s::longlong, s::longlong, s::longlong) + +// u_clamp +cl_uchar u_clamp(s::cl_uchar x, s::cl_uchar minval, + s::cl_uchar maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_ushort u_clamp(s::cl_ushort x, s::cl_ushort minval, + s::cl_ushort maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_uint u_clamp(s::cl_uint x, s::cl_uint minval, s::cl_uint maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_ulong u_clamp(s::cl_ulong x, s::cl_ulong minval, + s::cl_ulong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +s::ulonglong u_clamp(s::ulonglong x, s::ulonglong minval, + s::ulonglong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +MAKE_1V_2V_3V(u_clamp, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_clamp, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_clamp, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_clamp, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_clamp, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) +MAKE_1V_2S_3S(u_clamp, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2S_3S(u_clamp, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2S_3S(u_clamp, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2S_3S(u_clamp, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S_3S(u_clamp, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_clamp +cl_char s_clamp(s::cl_char x, s::cl_char minval, s::cl_char maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_short s_clamp(s::cl_short x, s::cl_short minval, + s::cl_short maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_int s_clamp(s::cl_int x, s::cl_int minval, s::cl_int maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +cl_long s_clamp(s::cl_long x, s::cl_long minval, s::cl_long maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +s::longlong s_clamp(s::longlong x, s::longlong minval, + s::longlong maxval) __NOEXC { + return __clamp(x, minval, maxval); +} +MAKE_1V_2V_3V(s_clamp, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_clamp, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_clamp, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_clamp, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_clamp, s::longlong, s::longlong, s::longlong, s::longlong) +MAKE_1V_2S_3S(s_clamp, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2S_3S(s_clamp, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2S_3S(s_clamp, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2S_3S(s_clamp, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S_3S(s_clamp, s::longlong, s::longlong, s::longlong, s::longlong) + +// clz +cl_uchar clz(s::cl_uchar x) __NOEXC { return __clz(x); } +cl_char clz(s::cl_char x) __NOEXC { return __clz(x); } +cl_ushort clz(s::cl_ushort x) __NOEXC { return __clz(x); } +cl_short clz(s::cl_short x) __NOEXC { return __clz(x); } +cl_uint clz(s::cl_uint x) __NOEXC { return __clz(x); } +cl_int clz(s::cl_int x) __NOEXC { return __clz(x); } +cl_ulong clz(s::cl_ulong x) __NOEXC { return __clz(x); } +cl_long clz(s::cl_long x) __NOEXC { return __clz(x); } +s::ulonglong clz(s::ulonglong x) __NOEXC { return __clz(x); } +s::longlong clz(s::longlong x) __NOEXC { return __clz(x); } +MAKE_1V(clz, s::cl_uchar, s::cl_uchar) +MAKE_1V(clz, s::cl_char, s::cl_char) +MAKE_1V(clz, s::cl_ushort, s::cl_ushort) +MAKE_1V(clz, s::cl_short, s::cl_short) +MAKE_1V(clz, s::cl_uint, s::cl_uint) +MAKE_1V(clz, s::cl_int, s::cl_int) +MAKE_1V(clz, s::cl_ulong, s::cl_ulong) +MAKE_1V(clz, s::cl_long, s::cl_long) +MAKE_1V(clz, s::longlong, s::longlong) +MAKE_1V(clz, s::ulonglong, s::ulonglong) + +// s_mul_hi +cl_char s_mul_hi(cl_char a, cl_char b) { return __mul_hi(a, b); } +cl_short s_mul_hi(cl_short a, cl_short b) { return __mul_hi(a, b); } +cl_int s_mul_hi(cl_int a, cl_int b) { return __mul_hi(a, b); } +cl_long s_mul_hi(s::cl_long x, s::cl_long y) __NOEXC { + return __long_mul_hi(x, y); +} +s::longlong s_mul_hi(s::longlong x, s::longlong y) __NOEXC { + return __long_mul_hi(x, y); +} +MAKE_1V_2V(s_mul_hi, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_mul_hi, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_mul_hi, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_mul_hi, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_mul_hi, s::longlong, s::longlong, s::longlong) + +// u_mul_hi +cl_uchar u_mul_hi(cl_uchar a, cl_uchar b) { return __mul_hi(a, b); } +cl_ushort u_mul_hi(cl_ushort a, cl_ushort b) { return __mul_hi(a, b); } +cl_uint u_mul_hi(cl_uint a, cl_uint b) { return __mul_hi(a, b); } +cl_ulong u_mul_hi(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __long_mul_hi(x, y); +} +s::ulonglong u_mul_hi(s::ulonglong x, s::ulonglong y) __NOEXC { + return __long_mul_hi(x, y); +} +MAKE_1V_2V(u_mul_hi, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_mul_hi, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_mul_hi, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_mul_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_mul_hi, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_mad_hi +cl_char s_mad_hi(s::cl_char x, s::cl_char minval, s::cl_char maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_short s_mad_hi(s::cl_short x, s::cl_short minval, + s::cl_short maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_int s_mad_hi(s::cl_int x, s::cl_int minval, s::cl_int maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_long s_mad_hi(s::cl_long x, s::cl_long minval, s::cl_long maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +s::longlong s_mad_hi(s::longlong x, s::longlong minval, + s::longlong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +MAKE_1V_2V_3V(s_mad_hi, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_mad_hi, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_mad_hi, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_mad_hi, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_mad_hi, s::longlong, s::longlong, s::longlong, s::longlong) + +// u_mad_hi +cl_uchar u_mad_hi(s::cl_uchar x, s::cl_uchar minval, + s::cl_uchar maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_ushort u_mad_hi(s::cl_ushort x, s::cl_ushort minval, + s::cl_ushort maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_uint u_mad_hi(s::cl_uint x, s::cl_uint minval, s::cl_uint maxval) __NOEXC { + return __mad_hi(x, minval, maxval); +} +cl_ulong u_mad_hi(s::cl_ulong x, s::cl_ulong minval, + s::cl_ulong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +s::ulonglong u_mad_hi(s::ulonglong x, s::ulonglong minval, + s::ulonglong maxval) __NOEXC { + return __long_mad_hi(x, minval, maxval); +} +MAKE_1V_2V_3V(u_mad_hi, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_mad_hi, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_mad_hi, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_mad_hi, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_mad_hi, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_mad_sat +cl_char s_mad_sat(s::cl_char a, s::cl_char b, s::cl_char c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_short s_mad_sat(s::cl_short a, s::cl_short b, s::cl_short c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_int s_mad_sat(s::cl_int a, s::cl_int b, s::cl_int c) __NOEXC { + return __s_mad_sat(a, b, c); +} +cl_long s_mad_sat(s::cl_long a, s::cl_long b, s::cl_long c) __NOEXC { + return __s_long_mad_sat(a, b, c); +} +s::longlong s_mad_sat(s::longlong a, s::longlong b, s::longlong c) __NOEXC { + return __s_long_mad_sat(a, b, c); +} +MAKE_1V_2V_3V(s_mad_sat, s::cl_char, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V_3V(s_mad_sat, s::cl_short, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V_3V(s_mad_sat, s::cl_int, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V_3V(s_mad_sat, s::cl_long, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V_3V(s_mad_sat, s::longlong, s::longlong, s::longlong, s::longlong) + +// u_mad_sat +cl_uchar u_mad_sat(s::cl_uchar a, s::cl_uchar b, s::cl_uchar c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_ushort u_mad_sat(s::cl_ushort a, s::cl_ushort b, s::cl_ushort c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_uint u_mad_sat(s::cl_uint a, s::cl_uint b, s::cl_uint c) __NOEXC { + return __u_mad_sat(a, b, c); +} +cl_ulong u_mad_sat(s::cl_ulong a, s::cl_ulong b, s::cl_ulong c) __NOEXC { + return __u_long_mad_sat(a, b, c); +} +s::ulonglong u_mad_sat(s::ulonglong a, s::ulonglong b, s::ulonglong c) __NOEXC { + return __u_long_mad_sat(a, b, c); +} +MAKE_1V_2V_3V(u_mad_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V_3V(u_mad_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V_3V(u_mad_sat, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V_3V(u_mad_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V_3V(u_mad_sat, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) + // s_max cl_char s_max(s::cl_char x, s::cl_char y) __NOEXC { return std::max(x, y); } cl_short s_max(s::cl_short x, s::cl_short y) __NOEXC { return std::max(x, y); } cl_int s_max(s::cl_int x, s::cl_int y) __NOEXC { return std::max(x, y); } cl_long s_max(s::cl_long x, s::cl_long y) __NOEXC { return std::max(x, y); } +s::longlong s_max(s::longlong x, s::longlong y) __NOEXC { + return std::max(x, y); +} MAKE_1V_2V(s_max, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V(s_max, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V(s_max, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2V(s_max, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_max, s::longlong, s::longlong, s::longlong) MAKE_1V_2S(s_max, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2S(s_max, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2S(s_max, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2S(s_max, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S(s_max, s::longlong, s::longlong, s::longlong) // u_max cl_uchar u_max(s::cl_uchar x, s::cl_uchar y) __NOEXC { return std::max(x, y); } @@ -1308,28 +1846,38 @@ cl_ushort u_max(s::cl_ushort x, s::cl_ushort y) __NOEXC { } cl_uint u_max(s::cl_uint x, s::cl_uint y) __NOEXC { return std::max(x, y); } cl_ulong u_max(s::cl_ulong x, s::cl_ulong y) __NOEXC { return std::max(x, y); } +s::ulonglong u_max(s::ulonglong x, s::ulonglong y) __NOEXC { + return std::max(x, y); +} MAKE_1V_2V(u_max, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_max, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_max, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2V(u_max, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_max, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_1V_2S(u_max, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2S(u_max, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2S(u_max, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2S(u_max, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S(u_max, s::ulonglong, s::ulonglong, s::ulonglong) // s_min cl_char s_min(s::cl_char x, s::cl_char y) __NOEXC { return std::min(x, y); } cl_short s_min(s::cl_short x, s::cl_short y) __NOEXC { return std::min(x, y); } cl_int s_min(s::cl_int x, s::cl_int y) __NOEXC { return std::min(x, y); } cl_long s_min(s::cl_long x, s::cl_long y) __NOEXC { return std::min(x, y); } +s::longlong s_min(s::longlong x, s::longlong y) __NOEXC { + return std::min(x, y); +} MAKE_1V_2V(s_min, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2V(s_min, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2V(s_min, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2V(s_min, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_min, s::longlong, s::longlong, s::longlong) MAKE_1V_2S(s_min, s::cl_char, s::cl_char, s::cl_char) MAKE_1V_2S(s_min, s::cl_short, s::cl_short, s::cl_short) MAKE_1V_2S(s_min, s::cl_int, s::cl_int, s::cl_int) MAKE_1V_2S(s_min, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2S(s_min, s::longlong, s::longlong, s::longlong) // u_min cl_uchar u_min(s::cl_uchar x, s::cl_uchar y) __NOEXC { return std::min(x, y); } @@ -1338,48 +1886,189 @@ cl_ushort u_min(s::cl_ushort x, s::cl_ushort y) __NOEXC { } cl_uint u_min(s::cl_uint x, s::cl_uint y) __NOEXC { return std::min(x, y); } cl_ulong u_min(s::cl_ulong x, s::cl_ulong y) __NOEXC { return std::min(x, y); } +s::ulonglong u_min(s::ulonglong x, s::ulonglong y) __NOEXC { + return std::min(x, y); +} MAKE_1V_2V(u_min, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2V(u_min, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_min, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2V(u_min, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_min, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_1V_2S(u_min, s::cl_uchar, s::cl_uchar, s::cl_uchar) MAKE_1V_2S(u_min, s::cl_ushort, s::cl_ushort, s::cl_ushort) MAKE_1V_2S(u_min, s::cl_uint, s::cl_uint, s::cl_uint) MAKE_1V_2S(u_min, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2S(u_min, s::ulonglong, s::ulonglong, s::ulonglong) + +// rotate +cl_uchar rotate(s::cl_uchar x, s::cl_uchar y) __NOEXC { return __rotate(x, y); } +cl_ushort rotate(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __rotate(x, y); +} +cl_uint rotate(s::cl_uint x, s::cl_uint y) __NOEXC { return __rotate(x, y); } +cl_ulong rotate(s::cl_ulong x, s::cl_ulong y) __NOEXC { return __rotate(x, y); } +s::ulonglong rotate(s::ulonglong x, s::ulonglong y) __NOEXC { + return __rotate(x, y); +} +cl_char rotate(s::cl_char x, s::cl_char y) __NOEXC { return __rotate(x, y); } +cl_short rotate(s::cl_short x, s::cl_short y) __NOEXC { return __rotate(x, y); } +cl_int rotate(s::cl_int x, s::cl_int y) __NOEXC { return __rotate(x, y); } +cl_long rotate(s::cl_long x, s::cl_long y) __NOEXC { return __rotate(x, y); } +s::longlong rotate(s::longlong x, s::longlong y) __NOEXC { + return __rotate(x, y); +} +MAKE_1V_2V(rotate, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(rotate, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(rotate, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(rotate, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(rotate, s::ulonglong, s::ulonglong, s::ulonglong) +MAKE_1V_2V(rotate, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(rotate, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(rotate, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(rotate, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(rotate, s::longlong, s::longlong, s::longlong) + +// u_sub_sat +cl_uchar u_sub_sat(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_ushort u_sub_sat(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_uint u_sub_sat(s::cl_uint x, s::cl_uint y) __NOEXC { + return __u_sub_sat(x, y); +} +cl_ulong u_sub_sat(s::cl_ulong x, s::cl_ulong y) __NOEXC { + return __u_sub_sat(x, y); +} +s::ulonglong u_sub_sat(s::ulonglong x, s::ulonglong y) __NOEXC { + return __u_sub_sat(x, y); +} +MAKE_1V_2V(u_sub_sat, s::cl_uchar, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_sub_sat, s::cl_ushort, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_sub_sat, s::cl_uint, s::cl_uint, s::cl_uint) +MAKE_1V_2V(u_sub_sat, s::cl_ulong, s::cl_ulong, s::cl_ulong) +MAKE_1V_2V(u_sub_sat, s::ulonglong, s::ulonglong, s::ulonglong) + +// s_sub_sat +cl_char s_sub_sat(s::cl_char x, s::cl_char y) __NOEXC { + return __s_sub_sat(x, y); +} +cl_short s_sub_sat(s::cl_short x, s::cl_short y) __NOEXC { + return __s_sub_sat(x, y); +} +cl_int s_sub_sat(s::cl_int x, s::cl_int y) __NOEXC { return __s_sub_sat(x, y); } +cl_long s_sub_sat(s::cl_long x, s::cl_long y) __NOEXC { + return __s_sub_sat(x, y); +} +s::longlong s_sub_sat(s::longlong x, s::longlong y) __NOEXC { + return __s_sub_sat(x, y); +} +MAKE_1V_2V(s_sub_sat, s::cl_char, s::cl_char, s::cl_char) +MAKE_1V_2V(s_sub_sat, s::cl_short, s::cl_short, s::cl_short) +MAKE_1V_2V(s_sub_sat, s::cl_int, s::cl_int, s::cl_int) +MAKE_1V_2V(s_sub_sat, s::cl_long, s::cl_long, s::cl_long) +MAKE_1V_2V(s_sub_sat, s::longlong, s::longlong, s::longlong) + +// u_upsample +cl_ushort u_upsample(s::cl_uchar x, s::cl_uchar y) __NOEXC { + return __upsample(x, y); +} +cl_uint u_upsample(s::cl_ushort x, s::cl_ushort y) __NOEXC { + return __upsample(x, y); +} +cl_ulong u_upsample(s::cl_uint x, s::cl_uint y) __NOEXC { + return __upsample(x, y); +} +MAKE_1V_2V(u_upsample, s::cl_ushort, s::cl_uchar, s::cl_uchar) +MAKE_1V_2V(u_upsample, s::cl_uint, s::cl_ushort, s::cl_ushort) +MAKE_1V_2V(u_upsample, s::cl_ulong, s::cl_uint, s::cl_uint) + +// TODO delete when Intel CPU OpenCL runtime will be fixed +// OpExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) +#define s_upsample u_upsample + +cl_short s_upsample(s::cl_char x, s::cl_uchar y) __NOEXC { + return __upsample(x, y); +} +cl_int s_upsample(s::cl_short x, s::cl_ushort y) __NOEXC { + return __upsample(x, y); +} +cl_long s_upsample(s::cl_int x, s::cl_uint y) __NOEXC { + return __upsample(x, y); +} +MAKE_1V_2V(s_upsample, s::cl_short, s::cl_char, s::cl_uchar) +MAKE_1V_2V(s_upsample, s::cl_int, s::cl_short, s::cl_ushort) +MAKE_1V_2V(s_upsample, s::cl_long, s::cl_int, s::cl_uint) + +#undef s_upsample + +// popcount +cl_uchar popcount(s::cl_uchar x) __NOEXC { return __popcount(x); } +cl_ushort popcount(s::cl_ushort x) __NOEXC { return __popcount(x); } +cl_uint popcount(s::cl_uint x) __NOEXC { return __popcount(x); } +cl_ulong popcount(s::cl_ulong x) __NOEXC { return __popcount(x); } +s::ulonglong popcount(s::ulonglong x) __NOEXC { return __popcount(x); } +MAKE_1V(popcount, s::cl_uchar, s::cl_uchar) +MAKE_1V(popcount, s::cl_ushort, s::cl_ushort) +MAKE_1V(popcount, s::cl_uint, s::cl_uint) +MAKE_1V(popcount, s::cl_ulong, s::cl_ulong) +MAKE_1V(popcount, s::ulonglong, s::ulonglong) + +cl_char popcount(s::cl_char x) __NOEXC { return __popcount(x); } +cl_short popcount(s::cl_short x) __NOEXC { return __popcount(x); } +cl_int popcount(s::cl_int x) __NOEXC { return __popcount(x); } +cl_long popcount(s::cl_long x) __NOEXC { return __popcount(x); } +s::longlong popcount(s::longlong x) __NOEXC { return __popcount(x); } +MAKE_1V(popcount, s::cl_char, s::cl_char) +MAKE_1V(popcount, s::cl_short, s::cl_short) +MAKE_1V(popcount, s::cl_int, s::cl_int) +MAKE_1V(popcount, s::cl_long, s::cl_long) +MAKE_1V(popcount, s::longlong, s::longlong) + +// u_mad24 +cl_uint u_mad24(s::cl_uint x, s::cl_uint y, s::cl_uint z) __NOEXC { + return __mad24(x, y, z); +} +MAKE_1V_2V_3V(u_mad24, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) + +// s_mad24 +cl_int s_mad24(s::cl_int x, s::cl_int y, s::cl_int z) __NOEXC { + return __mad24(x, y, z); +} +MAKE_1V_2V_3V(s_mad24, s::cl_int, s::cl_int, s::cl_int, s::cl_int) + +// u_mul24 +cl_uint u_mul24(s::cl_uint x, s::cl_uint y) __NOEXC { return __mul24(x, y); } +MAKE_1V_2V(u_mul24, s::cl_uint, s::cl_uint, s::cl_uint) + +// s_mul24 +cl_int s_mul24(s::cl_int x, s::cl_int y) __NOEXC { return __mul24(x, y); } +MAKE_1V_2V(s_mul24, s::cl_int, s::cl_int, s::cl_int) /* --------------- 4.13.5 Common functions. Host version --------------------*/ // fclamp cl_float fclamp(s::cl_float x, s::cl_float minval, s::cl_float maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } cl_double fclamp(s::cl_double x, s::cl_double minval, s::cl_double maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } -#ifndef NO_HALF_ENABLED cl_half fclamp(s::cl_half x, s::cl_half minval, s::cl_half maxval) __NOEXC { - return std::fmin(std::fmax(x, minval), maxval); + return __fclamp(x, minval, maxval); } -#endif MAKE_1V_2V_3V(fclamp, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(fclamp, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(fclamp, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // degrees -cl_float degrees(s::cl_float radians) __NOEXC { return (180 / M_PI) * radians; } -cl_double degrees(s::cl_double radians) __NOEXC { - return (180 / M_PI) * radians; -} -#ifndef NO_HALF_ENABLED -cl_half degrees(s::cl_half radians) __NOEXC { return (180 / M_PI) * radians; } -#endif +cl_float degrees(s::cl_float radians) __NOEXC { return __degrees(radians); } +cl_double degrees(s::cl_double radians) __NOEXC { return __degrees(radians); } +cl_half degrees(s::cl_half radians) __NOEXC { return __degrees(radians); } MAKE_1V(degrees, s::cl_float, s::cl_float) MAKE_1V(degrees, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(degrees, s::cl_half, s::cl_half) -#endif // fmin_common cl_float fmin_common(s::cl_float x, s::cl_float y) __NOEXC { @@ -1388,16 +2077,12 @@ cl_float fmin_common(s::cl_float x, s::cl_float y) __NOEXC { cl_double fmin_common(s::cl_double x, s::cl_double y) __NOEXC { return std::fmin(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmin_common(s::cl_half x, s::cl_half y) __NOEXC { return std::fmin(x, y); } -#endif MAKE_1V_2V(fmin_common, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmin_common, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmin_common, s::cl_half, s::cl_half, s::cl_half) -#endif // fmax_common cl_float fmax_common(s::cl_float x, s::cl_float y) __NOEXC { @@ -1406,315 +2091,251 @@ cl_float fmax_common(s::cl_float x, s::cl_float y) __NOEXC { cl_double fmax_common(s::cl_double x, s::cl_double y) __NOEXC { return std::fmax(x, y); } -#ifndef NO_HALF_ENABLED cl_half fmax_common(s::cl_half x, s::cl_half y) __NOEXC { return std::fmax(x, y); } -#endif MAKE_1V_2V(fmax_common, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(fmax_common, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(fmax_common, s::cl_half, s::cl_half, s::cl_half) -#endif // mix cl_float mix(s::cl_float x, s::cl_float y, s::cl_float a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } cl_double mix(s::cl_double x, s::cl_double y, s::cl_double a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } -#ifndef NO_HALF_ENABLED cl_half mix(s::cl_half x, s::cl_half y, s::cl_half a) __NOEXC { - return x + (y - x) * a; + return __mix(x, y, a); } -#endif MAKE_1V_2V_3V(mix, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(mix, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(mix, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // radians -cl_float radians(s::cl_float degrees) __NOEXC { return (M_PI / 180) * degrees; } -cl_double radians(s::cl_double degrees) __NOEXC { - return (M_PI / 180) * degrees; -} -#ifndef NO_HALF_ENABLED -cl_half radians(s::cl_half degrees) __NOEXC { return (M_PI / 180) * degrees; } -#endif +cl_float radians(s::cl_float degrees) __NOEXC { return __radians(degrees); } +cl_double radians(s::cl_double degrees) __NOEXC { return __radians(degrees); } +cl_half radians(s::cl_half degrees) __NOEXC { return __radians(degrees); } MAKE_1V(radians, s::cl_float, s::cl_float) MAKE_1V(radians, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(radians, s::cl_half, s::cl_half) -#endif // step cl_float step(s::cl_float edge, s::cl_float x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; + return __step(edge, x); } cl_double step(s::cl_double edge, s::cl_double x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; -} -#ifndef NO_HALF_ENABLED -cl_half step(s::cl_half edge, s::cl_half x) __NOEXC { - return (x < edge) ? 0.0 : 1.0; + return __step(edge, x); } -#endif +cl_half step(s::cl_half edge, s::cl_half x) __NOEXC { return __step(edge, x); } MAKE_1V_2V(step, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V(step, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V(step, s::cl_half, s::cl_half, s::cl_half) -#endif // fma cl_float smoothstep(s::cl_float edge0, s::cl_float edge1, s::cl_float x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } cl_double smoothstep(s::cl_double edge0, s::cl_double edge1, s::cl_double x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } -#ifndef NO_HALF_ENABLED cl_half smoothstep(s::cl_half edge0, s::cl_half edge1, s::cl_half x) __NOEXC { - cl_float t; - t = fclamp((x - edge0) / (edge1 - edge0), 0, 1); - return t * t * (3 - 2 * t); + return __smoothstep(edge0, edge1, x); } -#endif MAKE_1V_2V_3V(smoothstep, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_1V_2V_3V(smoothstep, s::cl_double, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_3V(smoothstep, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // sign -cl_float sign(s::cl_float x) __NOEXC { - if (std::isnan(x)) { - return 0.0; - } else if (x > 0) { - return 1.0; - } else if (x < 0) { - return -1.0; - } else /* x is +0.0 or -0.0} */ { - return x; - } -} -cl_double sign(s::cl_double x) __NOEXC { - if (std::isnan(x)) { - return 0.0; - } else if (x > 0) { - return 1.0; - } else if (x < 0) { - return -1.0; - } else /* x is +0.0 or -0.0} */ { - return x; - } -} -#ifndef NO_HALF_ENABLED -cl_half sign(s::cl_half x) __NOEXC { - if (std::isnan(x)) { - return 0.0; - } else if (x > 0) { - return 1.0; - } else if (x < 0) { - return -1.0; - } else /* x is +0.0 or -0.0} */ { - return x; - } -} -#endif +cl_float sign(s::cl_float x) __NOEXC { return __sign(x); } +cl_double sign(s::cl_double x) __NOEXC { return __sign(x); } +cl_half sign(s::cl_half x) __NOEXC { return __sign(x); } MAKE_1V(sign, s::cl_float, s::cl_float) MAKE_1V(sign, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V(sign, s::cl_half, s::cl_half) -#endif /* --------------- 4.13.6 Geometric Functions. Host version -----------------*/ // cross -#define MAKE_CROSS(r, p0, p1) \ - r.x() = p0.y() * p1.z() - p0.z() * p1.y(); \ - r.y() = p0.z() * p1.x() - p0.x() * p1.z(); \ - r.z() = p0.x() * p1.y() - p0.y() * p1.x(); - s::cl_float3 cross(s::cl_float3 p0, s::cl_float3 p1) __NOEXC { - s::cl_float3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_float4 cross(s::cl_float4 p0, s::cl_float4 p1) __NOEXC { - s::cl_float4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } s::cl_double3 cross(s::cl_double3 p0, s::cl_double3 p1) __NOEXC { - s::cl_double3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_double4 cross(s::cl_double4 p0, s::cl_double4 p1) __NOEXC { - s::cl_double4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } -#ifndef NO_HALF_ENABLED s::cl_half3 cross(s::cl_half3 p0, s::cl_half3 p1) __NOEXC { - s::cl_half3 r; - MAKE_CROSS(r, p0, p1) return r; + return __cross(p0, p1); } s::cl_half4 cross(s::cl_half4 p0, s::cl_half4 p1) __NOEXC { - s::cl_half4 r; - MAKE_CROSS(r, p0, p1) r.w() = 0; - return r; + return __cross(p0, p1); } -#endif -#undef MAKE_CROSS // OpFMul -template -typename std::enable_if::value, void>::type -__OpFMul(T &r, T p0, T p1) { - r += p0 * p1; -} +cl_float OpFMul(s::cl_float p0, s::cl_float p1) { return __OpFMul(p0, p1); } +cl_double OpFMul(s::cl_double p0, s::cl_double p1) { return __OpFMul(p0, p1); } +cl_float OpFMul(s::cl_half p0, s::cl_half p1) { return __OpFMul(p0, p1); } -cl_float OpFMul(s::cl_float p0, s::cl_float p1) { - s::cl_float r = 0; - __OpFMul(r, p0, p1); - return r; -} -cl_double OpFMul(s::cl_double p0, s::cl_double p1) { - s::cl_double r = 0; - __OpFMul(r, p0, p1); - return r; -} -#ifndef NO_HALF_ENABLED -cl_float OpFMul(s::cl_half p0, s::cl_half p1) { - s::cl_half r = 0; - __OpFMul(r, p0, p1); - return r; -} -#endif // OpDot -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_float, s::cl_float, s::cl_float) -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_double, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED -MAKE_1V_2V_RS(OpDot, __OpFMul, s::cl_half, s::cl_half, s::cl_half) -#endif +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_float, s::cl_float, s::cl_float) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_double, s::cl_double, + s::cl_double) +MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_half, s::cl_half, s::cl_half) + +// length +cl_float length(s::cl_float p) { return __length(p); } +cl_double length(s::cl_double p) { return __length(p); } +cl_half length(s::cl_half p) { return __length(p); } +cl_float length(s::cl_float2 p) { return __length(p); } +cl_float length(s::cl_float3 p) { return __length(p); } +cl_float length(s::cl_float4 p) { return __length(p); } +cl_double length(s::cl_double2 p) { return __length(p); } +cl_double length(s::cl_double3 p) { return __length(p); } +cl_double length(s::cl_double4 p) { return __length(p); } +cl_half length(s::cl_half2 p) { return __length(p); } +cl_half length(s::cl_half3 p) { return __length(p); } +cl_half length(s::cl_half4 p) { return __length(p); } + +// distance +cl_float distance(s::cl_float p0, s::cl_float p1) { return length(p0 - p1); } +cl_float distance(s::cl_float2 p0, s::cl_float2 p1) { return length(p0 - p1); } +cl_float distance(s::cl_float3 p0, s::cl_float3 p1) { return length(p0 - p1); } +cl_float distance(s::cl_float4 p0, s::cl_float4 p1) { return length(p0 - p1); } +cl_double distance(s::cl_double p0, s::cl_double p1) { return length(p0 - p1); } +cl_double distance(s::cl_double2 p0, s::cl_double2 p1) { + return length(p0 - p1); +} +cl_double distance(s::cl_double3 p0, s::cl_double3 p1) { + return length(p0 - p1); +} +cl_double distance(s::cl_double4 p0, s::cl_double4 p1) { + return length(p0 - p1); +} +cl_half distance(s::cl_half p0, s::cl_half p1) { return length(p0 - p1); } +cl_half distance(s::cl_half2 p0, s::cl_half2 p1) { return length(p0 - p1); } +cl_half distance(s::cl_half3 p0, s::cl_half3 p1) { return length(p0 - p1); } +cl_half distance(s::cl_half4 p0, s::cl_half4 p1) { return length(p0 - p1); } + +// normalize +s::cl_float normalize(s::cl_float p) { return __normalize(p); } +s::cl_float2 normalize(s::cl_float2 p) { return __normalize(p); } +s::cl_float3 normalize(s::cl_float3 p) { return __normalize(p); } +s::cl_float4 normalize(s::cl_float4 p) { return __normalize(p); } +s::cl_double normalize(s::cl_double p) { return __normalize(p); } +s::cl_double2 normalize(s::cl_double2 p) { return __normalize(p); } +s::cl_double3 normalize(s::cl_double3 p) { return __normalize(p); } +s::cl_double4 normalize(s::cl_double4 p) { return __normalize(p); } +s::cl_half normalize(s::cl_half p) { return __normalize(p); } +s::cl_half2 normalize(s::cl_half2 p) { return __normalize(p); } +s::cl_half3 normalize(s::cl_half3 p) { return __normalize(p); } +s::cl_half4 normalize(s::cl_half4 p) { return __normalize(p); } + +// fast_length +cl_float fast_length(s::cl_float p) { return __fast_length(p); } +cl_float fast_length(s::cl_float2 p) { return __fast_length(p); } +cl_float fast_length(s::cl_float3 p) { return __fast_length(p); } +cl_float fast_length(s::cl_float4 p) { return __fast_length(p); } + +// fast_normalize +s::cl_float fast_normalize(s::cl_float p) { + if (p == 0.0f) + return p; + s::cl_float r = std::sqrt(OpFMul(p, p)); + return p / r; +} +s::cl_float2 fast_normalize(s::cl_float2 p) { return __fast_normalize(p); } +s::cl_float3 fast_normalize(s::cl_float3 p) { return __fast_normalize(p); } +s::cl_float4 fast_normalize(s::cl_float4 p) { return __fast_normalize(p); } + +// fast_distance +cl_float fast_distance(s::cl_float p0, s::cl_float p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float2 p0, s::cl_float2 p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float3 p0, s::cl_float3 p1) { + return fast_length(p0 - p1); +} +cl_float fast_distance(s::cl_float4 p0, s::cl_float4 p1) { + return fast_length(p0 - p1); +} /* --------------- 4.13.7 Relational functions. Host version --------------*/ // OpFOrdEqual-isequal -cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { return (x == y); } -cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { return (x == y); } -cl_int __vOpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sOpFOrdEqual(x, y); } -cl_long __vOpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sOpFOrdEqual(x, y); } -#ifndef NO_HALF_ENABLED -cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { return (x == y); } -cl_short __vOpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x == y); +cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFOrdEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // OpFUnordNotEqual-isnotequal cl_int OpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x != y); + return __sOpFUnordNotEqual(x, y); } cl_int OpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x != y); -} -cl_int __vOpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x != y); + return __sOpFUnordNotEqual(x, y); } -cl_long __vOpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x != y); +cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFUnordNotEqual(x, y); } -#ifndef NO_HALF_ENABLED -cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { return (x != y); } -cl_short __vOpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x != y); -} -#endif MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdGreaterThan) // isgreater cl_int OpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { - return (x > y); + return __sOpFOrdGreaterThan(x, y); } cl_int OpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { - return (x > y); -} -cl_int __vOpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { - return -(x > y); -} -cl_long __vOpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { - return -(x > y); + return __sOpFOrdGreaterThan(x, y); } -#ifndef NO_HALF_ENABLED -cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { return (x > y); } -cl_short __vOpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { - return -(x > y); +cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { + return __sOpFOrdGreaterThan(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdGreaterThanEqual) // isgreaterequal cl_int OpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } cl_int OpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x >= y); -} -cl_int __vOpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x >= y); -} -cl_long __vOpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return (x >= y); + return __sOpFOrdGreaterThanEqual(x, y); } -cl_short __vOpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x >= y); -} -#endif MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdLessThan) // isless cl_int OpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { return (x < y); } @@ -1727,312 +2348,153 @@ cl_int __vOpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { cl_long __vOpFOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { return -(x < y); } -#ifndef NO_HALF_ENABLED cl_int OpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return (x < y); } cl_short __vOpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return -(x < y); } -#endif MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpFOrdLessThanEqual) // islessequal cl_int OpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return (x <= y); + return __sOpFOrdLessThanEqual(x, y); } cl_int OpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return (x <= y); -} -cl_int __vOpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return -(x <= y); + return __sOpFOrdLessThanEqual(x, y); } -cl_long __vOpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return -(x <= y); -} -#ifndef NO_HALF_ENABLED cl_int OpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return (x <= y); -} -cl_short __vOpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return -(x <= y); + return __sOpFOrdLessThanEqual(x, y); } -#endif MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpLessOrGreater) // islessgreater cl_int OpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { - return ((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } cl_int OpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { - return ((x < y) || (x > y)); -} -cl_int __vOpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { - return -((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } -cl_long __vOpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { - return -((x < y) || (x > y)); -} -#ifndef NO_HALF_ENABLED cl_int OpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { - return ((x < y) || (x > y)); -} -cl_short __vOpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { - return -((x < y) || (x > y)); + return __sOpLessOrGreater(x, y); } -#endif MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpIsFinite) // isfinite -cl_int OpIsFinite(s::cl_float x) __NOEXC { return (std::isfinite(x)); } -cl_int OpIsFinite(s::cl_double x) __NOEXC { return (std::isfinite(x)); } +cl_int OpIsFinite(s::cl_float x) __NOEXC { return std::isfinite(x); } +cl_int OpIsFinite(s::cl_double x) __NOEXC { return std::isfinite(x); } cl_int __vOpIsFinite(s::cl_float x) __NOEXC { return -(std::isfinite(x)); } cl_long __vOpIsFinite(s::cl_double x) __NOEXC { return -(std::isfinite(x)); } -#ifndef NO_HALF_ENABLED -cl_int OpIsFinite(s::cl_half x) __NOEXC { return (std::isfinite(x)); } +cl_int OpIsFinite(s::cl_half x) __NOEXC { return std::isfinite(x); } cl_short __vOpIsFinite(s::cl_half x) __NOEXC { return -(std::isfinite(x)); } -#endif MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_short, s::cl_half) -#endif // (OpIsInf) // isinf -cl_int OpIsInf(s::cl_float x) __NOEXC { return (std::isinf(x)); } -cl_int OpIsInf(s::cl_double x) __NOEXC { return (std::isinf(x)); } +cl_int OpIsInf(s::cl_float x) __NOEXC { return std::isinf(x); } +cl_int OpIsInf(s::cl_double x) __NOEXC { return std::isinf(x); } cl_int __vOpIsInf(s::cl_float x) __NOEXC { return -(std::isinf(x)); } cl_long __vOpIsInf(s::cl_double x) __NOEXC { return -(std::isinf(x)); } -#ifndef NO_HALF_ENABLED -cl_int OpIsInf(s::cl_half x) __NOEXC { return (std::isinf(x)); } +cl_int OpIsInf(s::cl_half x) __NOEXC { return std::isinf(x); } cl_short __vOpIsInf(s::cl_half x) __NOEXC { return -(std::isinf(x)); } -#endif MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_short, s::cl_half) -#endif // (OpIsNan) // isnan -cl_int OpIsNan(s::cl_float x) __NOEXC { - return (std::isnan(x)); -} -cl_int OpIsNan(s::cl_double x) __NOEXC { return (std::isnan(x)); } -cl_int __vOpIsNan(s::cl_float x) __NOEXC { - return -(std::isnan(x)); -} +cl_int OpIsNan(s::cl_float x) __NOEXC { return std::isnan(x); } +cl_int OpIsNan(s::cl_double x) __NOEXC { return std::isnan(x); } +cl_int __vOpIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } cl_long __vOpIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } -#ifndef NO_HALF_ENABLED -cl_int OpIsNan(s::cl_half x) __NOEXC { - return (std::isnan(x)); -} -cl_short __vOpIsNan(s::cl_half x) __NOEXC { - return -(std::isnan(x)); -} -#endif +cl_int OpIsNan(s::cl_half x) __NOEXC { return std::isnan(x); } +cl_short __vOpIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_short, s::cl_half) -#endif // (OpIsNormal) // isnormal -cl_int OpIsNormal(s::cl_float x) __NOEXC { - return (std::isnormal(x)); -} -cl_int OpIsNornmal(s::cl_double x) __NOEXC { return (std::isnormal(x)); } -cl_int __vOpIsNormal(s::cl_float x) __NOEXC { - return -(std::isnormal(x)); -} -cl_long __vOpIsNornmal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } -#ifndef NO_HALF_ENABLED -cl_int OpIsNormal(s::cl_half x) __NOEXC { - return (std::isnormal(x)); -} -cl_short __vOpIsNormal(s::cl_half x) __NOEXC { - return -(std::isnormal(x)); -} -#endif +cl_int OpIsNormal(s::cl_float x) __NOEXC { return std::isnormal(x); } +cl_int OpIsNormal(s::cl_double x) __NOEXC { return std::isnormal(x); } +cl_int __vOpIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } +cl_long __vOpIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } +cl_int OpIsNormal(s::cl_half x) __NOEXC { return std::isnormal(x); } +cl_short __vOpIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_short, s::cl_half) -#endif // (OpOrdered) // isordered cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return !(std::isunordered(x,y)); + return __vOpOrdered(x, y); } cl_int OpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return !(std::isunordered(x,y)); -} -cl_int __vOpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return -(!(std::isunordered(x,y))); + return __vOpOrdered(x, y); } -cl_long __vOpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return -(!(std::isunordered(x,y))); -} -#ifndef NO_HALF_ENABLED cl_int OpOrdered(s::cl_half x, s::cl_half y) __NOEXC { - return (OpFOrdEqual(x, x) && OpFOrdEqual(y, y)); -} -cl_short __vOpOrdered(s::cl_half x, s::cl_half y) __NOEXC { - return -((OpFOrdEqual(x, x) && OpFOrdEqual(y, y))); + return __vOpOrdered(x, y); } -#endif MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpUnordered) // isunordered cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return std::isunordered(x,y); + return __sOpUnordered(x, y); } cl_int OpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return std::isunordered(x,y); + return __sOpUnordered(x, y); } -cl_int __vOpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return -(std::isunordered(x,y)); -} -cl_long __vOpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return -(std::isunordered(x,y)); -} -#ifndef NO_HALF_ENABLED cl_int OpUnordered(s::cl_half x, s::cl_half y) __NOEXC { - return (OpIsNan(x) || OpIsNan(y)); + return __sOpUnordered(x, y); } -cl_short __vOpUnordered(s::cl_half x, s::cl_half y) __NOEXC { - return -((OpIsNan(x) || OpIsNan(y))); -} -#endif MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_int, s::cl_float, s::cl_float) MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_long, s::cl_double, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_short, s::cl_half, s::cl_half) -#endif // (OpSignBitSet) // signbit cl_int OpSignBitSet(s::cl_float x) __NOEXC { return std::signbit(x); } cl_int OpSignBitSet(s::cl_double x) __NOEXC { return std::signbit(x); } cl_int __vOpSignBitSet(s::cl_float x) __NOEXC { return -(std::signbit(x)); } cl_long __vOpSignBitSet(s::cl_double x) __NOEXC { return -(std::signbit(x)); } -#ifndef NO_HALF_ENABLED cl_int OpSignBitSet(s::cl_half x) __NOEXC { return std::signbit(x); } cl_short __vOpSignBitSet(s::cl_half x) __NOEXC { return -(std::signbit(x)); } -#endif MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_int, s::cl_float) MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_long, s::cl_double) -#ifndef NO_HALF_ENABLED MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_short, s::cl_half) -#endif // (OpAny) // any -template cl_int __OpAny(T x) { - return ((x & MSB_MASK(x)) == MSB_MASK(x)); -} -cl_int OpAny(s::cl_char x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_short x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_int x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::cl_long x) __NOEXC { return __OpAny(x); } -cl_int OpAny(s::longlong x) __NOEXC { return __OpAny(x); } - -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_char) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_short) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_int) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::cl_long) -MAKE_SR_1V_OR(OpAny, s::cl_int, s::longlong) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_char) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_short) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_int) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_long) +MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::longlong) // (OpAll) // all -template cl_int __OpAll(T x) { - return ((x & MSB_MASK(x)) == MSB_MASK(x)); -} -cl_int OpAll(s::cl_char x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_short x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_int x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::cl_long x) __NOEXC { return __OpAll(x); } -cl_int OpAll(s::longlong x) __NOEXC { return __OpAll(x); } - -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_char) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_short) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_int) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::cl_long) -MAKE_SR_1V_AND(OpAll, s::cl_int, s::longlong) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_char) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_short) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_int) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_long) +MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::longlong) // (bitselect) - -template -typename std::enable_if::value, T>::type -__bitselect(T a, T b, T c) { - return ((a & ~c) | (b & c)); -} - -template union databitset; -// float -template <> union databitset { - static_assert(sizeof(uint32_t) == sizeof(float), - "size of float is not equal to 32 bits."); - float f; - uint32_t i; -}; - -// double -template <> union databitset { - static_assert(sizeof(uint64_t) == sizeof(double), - "size of double is not equal to 64 bits."); - double f; - uint64_t i; -}; - -#ifndef NO_HALF_ENABLED -// Half -template <> union databitset { - static_assert(sizeof(uint16_t) == sizeof(cl_half), - "size of half is not equal to 16 bits."); - cl_half f; - uint16_t i; -}; -#endif - -template -typename std::enable_if::value, T>::type -__bitselect(T a, T b, T c) { - databitset ba; - ba.f = a; - databitset bb; - bb.f = b; - databitset bc; - bc.f = c; - databitset br; - br.f = 0; - br.i = ((ba.i & ~bc.i) | (bb.i & bc.i)); - return br.f; -} - // Instantiate functions for the scalar types and vector types. MAKE_SC_1V_2V_3V(bitselect, s::cl_float, s::cl_float, s::cl_float, s::cl_float) MAKE_SC_1V_2V_3V(bitselect, s::cl_double, s::cl_double, s::cl_double, @@ -2049,91 +2511,67 @@ MAKE_SC_1V_2V_3V(bitselect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) MAKE_SC_1V_2V_3V(bitselect, s::longlong, s::longlong, s::longlong, s::longlong) MAKE_SC_1V_2V_3V(bitselect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) -#ifndef NO_HALF_ENABLED MAKE_SC_1V_2V_3V(bitselect, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -#endif // (OpSelect) // select // for scalar: result = c ? b : a. // for vector: result[i] = (MSB of c[i] is set)? b[i] : a[i] - -template T __OpSelect(T a, T b, T2 c) { - return (c ? b : a); -} - -template T __vOpSelect(T a, T b, T2 c) { - return ((c && MSB_MASK(c)) ? b : a); -} - -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_float, - s::cl_float, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_float, - s::cl_float, s::cl_uint) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::cl_ulong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_double, - s::cl_double, s::ulonglong) - +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_int, + s::cl_float, s::cl_float) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_uint, + s::cl_float, s::cl_float) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_long, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_ulong, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::longlong, + s::cl_double, s::cl_double) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::ulonglong, + s::cl_double, s::cl_double) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, s::cl_char, s::cl_char) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, - s::cl_char, s::cl_uchar) - -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, - s::cl_uchar, s::cl_char) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_uchar, + s::cl_char, s::cl_char) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_char, + s::cl_uchar, s::cl_uchar) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, s::cl_short, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, - s::cl_short, s::cl_ushort) - -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, - s::cl_ushort, s::cl_short) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_ushort, + s::cl_short, s::cl_short) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_short, + s::cl_ushort, s::cl_ushort) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, - s::cl_uint) - -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, - s::cl_uint, s::cl_int) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_uint, s::cl_int, + s::cl_int) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_int, + s::cl_uint, s::cl_uint) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, s::cl_long, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, - s::cl_long, s::cl_ulong) - -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, - s::cl_ulong, s::cl_long) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_ulong, + s::cl_long, s::cl_long) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_long, + s::cl_ulong, s::cl_ulong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) - MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, s::longlong, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, - s::longlong, s::ulonglong) - +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::ulonglong, + s::longlong, s::longlong) MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, - s::ulonglong, s::longlong) - -#ifndef NO_HALF_ENABLED -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_half, - s::cl_half, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_half, - s::cl_half, s::cl_ushort) -#endif +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::longlong, + s::ulonglong, s::ulonglong) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_short, + s::cl_half, s::cl_half) +MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_ushort, + s::cl_half, s::cl_half) /* --------------- 4.13.3 Native Math functions. Host version ---------------*/ // native_cos @@ -2253,17 +2691,29 @@ MAKE_1V(half_tan, s::cl_float, s::cl_float) } // namespace __host_std } // namespace cl +#undef __NOEXC #undef __MAKE_1V #undef __MAKE_1V_2V +#undef __MAKE_1V_2V_RS +#undef __MAKE_1V_RS +#undef __MAKE_1V_2V_3V #undef __MAKE_1V_2S +#undef __MAKE_SR_1V_AND +#undef __MAKE_SR_1V_OR #undef __MAKE_1V_2P -#undef __MAKE_1V_2V_3V #undef __MAKE_1V_2V_3P #undef MAKE_1V +#undef MAKE_1V_FUNC #undef MAKE_1V_2V +#undef MAKE_1V_2V_FUNC +#undef MAKE_1V_2V_3V +#undef MAKE_1V_2V_3V_FUNC +#undef MAKE_SC_1V_2V_3V +#undef MAKE_SC_FSC_1V_2V_3V_FV +#undef MAKE_SC_3ARG #undef MAKE_1V_2S +#undef MAKE_SR_1V_AND +#undef MAKE_SR_1V_OR #undef MAKE_1V_2P -#undef MAKE_1V_2V_3V +#undef MAKE_GEO_1V_2V_RS #undef MAKE_1V_2V_3P - -#undef __NOEXC diff --git a/sycl/test/basic_tests/boolean.cpp b/sycl/test/basic_tests/boolean.cpp new file mode 100644 index 0000000000000..439a5410ebf39 --- /dev/null +++ b/sycl/test/basic_tests/boolean.cpp @@ -0,0 +1,150 @@ +// RUN: %clang -std=c++11 -fsycl %s -o %t.out -lstdc++ -lOpenCL +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#include + +using namespace cl::sycl; +namespace s = cl::sycl; +namespace d = s::detail; + +d::Boolean<3> foo() { + d::Boolean<3> b3{true, false, true}; + return b3; +} + +int main() { + { + s::cl_long4 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + d::Boolean<4> b4{false, true, false, false}; + AccR[0] = b4; + }); + }); + } + s::cl_long r1 = r.s0(); + s::cl_long r2 = r.s1(); + s::cl_long r3 = r.s2(); + s::cl_long r4 = r.s3(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 + << std::endl; + + assert(r1 == 0); + assert(r2 == -1); + assert(r3 == 0); + assert(r4 == 0); + } + + { + s::cl_short3 r{0}; + { + buffer BufR(&r, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { AccR[0] = foo(); }); + }); + } + s::cl_short r1 = r.s0(); + s::cl_short r2 = r.s1(); + s::cl_short r3 = r.s2(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << std::endl; + + assert(r1 == -1); + assert(r2 == 0); + assert(r3 == -1); + } + + { + s::cl_int r1[6]; + s::cl_int r2[6]; + { + buffer BufR1(r1, range<1>(6)); + buffer BufR2(r2, range<1>(6)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto AccR1 = BufR1.get_access(cgh); + auto AccR2 = BufR2.get_access(cgh); + cgh.single_task([=]() { + AccR1[0] = sizeof(d::Boolean<1>); + AccR1[1] = sizeof(d::Boolean<2>); + AccR1[2] = sizeof(d::Boolean<3>); + AccR1[3] = sizeof(d::Boolean<4>); + AccR1[4] = sizeof(d::Boolean<8>); + AccR1[5] = sizeof(d::Boolean<16>); + + AccR2[0] = alignof(d::Boolean<1>); + AccR2[1] = alignof(d::Boolean<2>); + AccR2[2] = alignof(d::Boolean<3>); + AccR2[3] = alignof(d::Boolean<4>); + AccR2[4] = alignof(d::Boolean<8>); + AccR2[5] = alignof(d::Boolean<16>); + }); + }); + } + + for (size_t I = 0; I < 6; I++) { + std::cout << " r1[" << I << "] " << r1[I]; + } + std::cout << std::endl; + + for (size_t I = 0; I < 6; I++) { + std::cout << " r2[" << I << "] " << r2[I]; + } + std::cout << std::endl; + assert(r1[0] == sizeof(d::Boolean<1>)); + assert(r1[1] == sizeof(d::Boolean<2>)); + assert(r1[2] == sizeof(d::Boolean<3>)); + assert(r1[3] == sizeof(d::Boolean<4>)); + assert(r1[4] == sizeof(d::Boolean<8>)); + assert(r1[5] == sizeof(d::Boolean<16>)); + + assert(r2[0] == alignof(d::Boolean<1>)); + assert(r2[1] == alignof(d::Boolean<2>)); + assert(r2[2] == alignof(d::Boolean<3>)); + assert(r2[3] == alignof(d::Boolean<4>)); + assert(r2[4] == alignof(d::Boolean<8>)); + assert(r2[5] == alignof(d::Boolean<16>)); + } + + { + s::cl_int4 i4 = {1, -2, 0, -3}; + d::Boolean<4> b4(i4); + i4 = b4; + + s::cl_int r1 = i4.s0(); + s::cl_int r2 = i4.s1(); + s::cl_int r3 = i4.s2(); + s::cl_int r4 = i4.s3(); + + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 + << std::endl; + assert(r1 == 0); + assert(r2 == -1); + assert(r3 == 0); + assert(r4 == -1); + } + + { + s::cl_int r1 = d::Boolean<1>(s::cl_int{-1}); + s::cl_int r2 = d::Boolean<1>(s::cl_int{0}); + s::cl_int r3 = d::Boolean<1>(s::cl_int{1}); + std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << std::endl; + assert(r1 == 1); + assert(r2 == 0); + assert(r3 == 1); + } + + return 0; +} diff --git a/sycl/test/built-ins/scalar_common.cpp b/sycl/test/built-ins/scalar_common.cpp index 0096c8610e64d..7a8c8e6697a1c 100644 --- a/sycl/test/built-ins/scalar_common.cpp +++ b/sycl/test/built-ins/scalar_common.cpp @@ -6,27 +6,24 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_float{0.5f}, cl::sycl::cl_float{2.3f}); + AccR[0] = s::max(s::cl_float{ 0.5f }, s::cl_float{ 2.3f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.3f); } diff --git a/sycl/test/built-ins/scalar_geometric.cpp b/sycl/test/built-ins/scalar_geometric.cpp index 1aeb4c7421d8d..6060e9432c258 100644 --- a/sycl/test/built-ins/scalar_geometric.cpp +++ b/sycl/test/built-ins/scalar_geometric.cpp @@ -6,29 +6,123 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // dot { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::dot(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{1.6}); + AccR[0] = s::dot(s::cl_float{ 0.5 }, s::cl_float{ 1.6 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.8f); } + // distance + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::distance(s::cl_float{ 1.f }, s::cl_float{ 3.f }); + }); + }); + } + assert(r == 2.f); + } + + // length + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::length(s::cl_float{ 1.f }); + }); + }); + } + assert(r == 1.f); + } + + // normalize + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::normalize(s::cl_float{ 2.f }); + }); + }); + } + assert(r == 1.f); + } + + // fast_distance + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_distance(s::cl_float{ 1.f }, s::cl_float{ 3.f }); + }); + }); + } + assert(r == 2.f); + } + + // fast_length + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_length(s::cl_float{ 2.f }); + }); + }); + } + assert(r == 2.f); + } + + // fast_normalize + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_normalize(s::cl_float{ 2.f }); + }); + }); + } + + assert(r == 1.f); + } + return 0; } \ No newline at end of file diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 76f28ceb52080..fe630e5874770 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -9,73 +9,414 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_int{5}, cl::sycl::cl_int{2}); + AccR[0] = s::max(s::cl_int{ 5 }, s::cl_int{ 2 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 5); } + // max { - cl::sycl::cl_uint r{0}; + s::cl_uint r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_uint{5}, cl::sycl::cl_uint{2}); + AccR[0] = s::max(s::cl_uint{ 5 }, s::cl_uint{ 2 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 5); } + // min { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_int{5}, cl::sycl::cl_int{2}); + AccR[0] = s::min(s::cl_int{ 5 }, s::cl_int{ 2 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2); } + // min { - cl::sycl::cl_uint r{0}; + s::cl_uint r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_uint{5}, cl::sycl::cl_uint{2}); + AccR[0] = s::min(s::cl_uint{ 5 }, s::cl_uint{ 2 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2); } + // abs + { + s::cl_uint r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs(s::cl_int{ -5 }); + }); + }); + } + assert(r == 5); + } + + // abs_diff + { + s::cl_uint r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs_diff(s::cl_int{ -5 }, s::cl_int{ -1 }); + }); + }); + } + assert(r == 4); + } + + // add_sat + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::add_sat(s::cl_int{ 0x7FFFFFFF }, s::cl_int{ 100 }); + }); + }); + } + assert(r == 0x7FFFFFFF); + } + + // hadd + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::hadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); + }); + }); + } + assert(r == 0x0000004F); + } + + // rhadd + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rhadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); + }); + }); + } + assert(r == 0x50); + } + + // clamp + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clamp(s::cl_int{ 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); + }); + }); + } + assert(r == 10); + } + + // clz + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clz(s::cl_int{ 0x0FFFFFFF }); + }); + }); + } + assert(r == 4); + } + + // mad_hi + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, + s::cl_int{ 0x00000001 }); + }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. + }); + } + assert(r == 0x11); + } + + // mad_sat + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_sat(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, + s::cl_int{ 0x00000001 }); + }); // 2^31 * 2^8 = 2^39 -> 0x80 00000000 -> reuslt is saturated in the + // product. + }); + } + assert(r == 0x7FFFFFFF); + } + + // mul_hi + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }); + }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. + }); + } + assert(r == 0x10); + } + + // rotate + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rotate(s::cl_int{ 0x11100000 }, s::cl_int{ 12 }); + }); + }); + } + assert(r == 0x00000111); + } + + // sub_sat + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::sub_sat(s::cl_int{ 10 }, s::cl_int(0x80000000)); + }); // 10 - (-2^31(minimum value)) = saturates on Maximum value + }); + } + assert(r == 0x7FFFFFFF); + } + + // upsample - 1 + { + s::cl_ushort r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uchar{ 0x10 }, s::cl_uchar{ 0x10 }); + }); + }); + } + assert(r == 0x1010); + } + + // upsample - 2 + { + s::cl_short r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_char{ 0x10 }, s::cl_uchar{ 0x10 }); + }); + }); + } + assert(r == 0x1010); + } + + // upsample - 3 + { + s::cl_uint r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_ushort{ 0x0010 }, s::cl_ushort{ 0x0010 }); + }); + }); + } + assert(r == 0x00100010); + } + + // upsample - 4 + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_short{ 0x0010 }, s::cl_ushort{ 0x0010 }); + }); + }); + } + assert(r == 0x00100010); + } + + // upsample - 5 + { + s::cl_ulong r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::upsample(s::cl_uint{ 0x00000010 }, s::cl_uint{ 0x00000010 }); + }); + }); + } + assert(r == 0x0000001000000010); + } + + // upsample - 6 + { + s::cl_long r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::upsample(s::cl_int{ 0x00000010 }, s::cl_uint{ 0x00000010 }); + }); + }); + } + assert(r == 0x0000001000000010); + } + + // popcount + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::popcount(s::cl_int{ 0x000000FF }); + }); + }); + } + assert(r == 8); + } + + // mad24 + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }, s::cl_int{ 20 }); + }); + }); + } + assert(r == 0); + } + + // mul24 + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }); + }); + }); + } + assert(r == -20); + } + return 0; -} \ No newline at end of file +} diff --git a/sycl/test/built-ins/scalar_math.cpp b/sycl/test/built-ins/scalar_math.cpp index f2ccea71c483b..1ab181e89698b 100644 --- a/sycl/test/built-ins/scalar_math.cpp +++ b/sycl/test/built-ins/scalar_math.cpp @@ -10,561 +10,549 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // acos { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acos(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acos(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.047f && r < 1.048f); // ~1.0471975511965979 } + // acosh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acosh(cl::sycl::cl_float{2.4}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acosh(s::cl_float{ 2.4 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.522f && r < 1.523f); // ~1.5220793674636532 } + // acospi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::acospi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::acospi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.333f && r < 0.334f); // ~0.33333333333333337 } - // todo // asin { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asin(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asin(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.523f && r < 0.524f); // ~0.5235987755982989 } + // asinh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asinh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asinh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.481f && r < 0.482f); // ~0.48121182505960347 } + // asinpi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::asinpi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::asinpi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.166f && r < 0.167f); // ~0.16666666666666669 } + // atan { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atan(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atan(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.463f && r < 0.464f); // ~0.4636476090008061 } + // atan2 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::atan2(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.5}); + AccR[0] = s::atan2(s::cl_float{ 0.5 }, s::cl_float{ 0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.785f && r < 0.786f); // ~0.7853981633974483 } + // atanh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atanh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atanh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.549f && r < 0.550f); // ~0.5493061443340549 } + // atanpi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::atanpi(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::atanpi(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.147f && r < 0.148f); // ~0.14758361765043326 } // atan2pi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::atan2pi(cl::sycl::cl_float{0.5}, - cl::sycl::cl_float{0.5}); + AccR[0] = s::atan2pi(s::cl_float{ 0.5 }, s::cl_float{ 0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.249f && r < 0.251f); // ~0.25 } + // cbrt { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cbrt(cl::sycl::cl_float{27.0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cbrt(s::cl_float{ 27.0 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 3.f); } + // ceil { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::ceil(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::ceil(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.f); } + // copysign { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::copysign(cl::sycl::cl_float{1}, - cl::sycl::cl_float{-0.5}); + AccR[0] = s::copysign(s::cl_float{ 1 }, s::cl_float{ -0.5 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == -1.f); } + // cos { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cos(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cos(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.877f && r < 0.878f); // ~0.8775825618903728 } + // cosh { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cosh(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cosh(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.127f && r < 1.128f); // ~1.1276259652063807 } + // cospi { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::cospi(cl::sycl::cl_float{0.1}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cospi(s::cl_float{ 0.1 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.951f && r < 0.952f); // ~0.9510565162951535 } + // erfc { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::erfc(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::erfc(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.479f && r < 0.480f); // ~0.4795001221869535 } + // erf { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::erf(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::erf(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.520f && r < 0.521f); // ~0.5204998778130465 } + // exp { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 1.648f && r < 1.649f); // ~1.6487212707001282 } + // exp2 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp2(cl::sycl::cl_float{8.0}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp2(s::cl_float{ 8.0 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 256.0f); } // exp10 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::exp10(cl::sycl::cl_float{2}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::exp10(s::cl_float{ 2 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 100.0f); } + // expm1 { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::expm1(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::expm1(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r > 0.648f && r < 0.649f); // ~0.6487212707001282 } + // fabs { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::fabs(cl::sycl::cl_float{-0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fabs(s::cl_float{ -0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.5f); } + // fdim { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fdim(cl::sycl::cl_float{1.6}, cl::sycl::cl_float{0.6}); + AccR[0] = s::fdim(s::cl_float{ 1.6 }, s::cl_float{ 0.6 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 1.0f); } + // floor { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task( - [=]() { AccR[0] = cl::sycl::floor(cl::sycl::cl_float{0.5}); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::floor(s::cl_float{ 0.5 }); + }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.f); } + // fma { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fma(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{10.0}, - cl::sycl::cl_float{3.0}); + AccR[0] = s::fma(s::cl_float{ 0.5 }, s::cl_float{ 10.0 }, + s::cl_float{ 3.0 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 8.0f); } + // fmax { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmax(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.8}); + AccR[0] = s::fmax(s::cl_float{ 0.5 }, s::cl_float{ 0.8 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.8f); } + // fmin { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmin(cl::sycl::cl_float{0.5}, cl::sycl::cl_float{0.8}); + AccR[0] = s::fmin(s::cl_float{ 0.5 }, s::cl_float{ 0.8 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 0.5f); } + // fmod { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::fmod(cl::sycl::cl_float{5.1}, cl::sycl::cl_float{3.0}); + AccR[0] = s::fmod(s::cl_float{ 5.1 }, s::cl_float{ 3.0 }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 2.1f); } - // fract // fract with global memory - /*{ - cl::sycl::cl_float r{0}; - cl::sycl::cl_float i{999}; - { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1), - {property::buffer::use_host_ptr()}); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + { + s::cl_float r{ 0 }; + s::cl_float i{ 999 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1), + { s::property::buffer::use_host_ptr() }); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - global_ptr Iptr(AccI); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float{1.5}, Iptr); + s::global_ptr Iptr(AccI); + AccR[0] = s::fract(s::cl_float{ 1.5 }, Iptr); }); }); } - std::cout << "r " << r << " i " << i << std::endl; assert(r == 0.5f); assert(i == 1.0f); } // fract with private memory { - cl::sycl::cl_float r{0}; - cl::sycl::cl_float i{999}; + s::cl_float r{ 0 }; + s::cl_float i{ 999 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1), - {property::buffer::use_host_ptr()}); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1), + { s::property::buffer::use_host_ptr() }); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - cl::sycl::cl_float temp(0.0); - private_ptr Iptr(&temp); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float{1.5f}, Iptr); + s::cl_float temp(0.0); + s::private_ptr Iptr(&temp); + AccR[0] = s::fract(s::cl_float{ 1.5f }, Iptr); AccI[0] = *Iptr; }); }); } - std::cout << "r " << r << " i " << i << std::endl; assert(r == 0.5f); assert(i == 1.0f); - }*/ + } // nan { - cl::sycl::cl_double r{0}; + s::cl_double r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { AccR[0] = cl::sycl::nan(1LLU); }); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { AccR[0] = s::nan(1LLU); }); }); } - std::cout << "r " << r << std::endl; assert(std::isnan(r)); } - // native exp - { - cl::sycl::cl_float r{0}; - { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = cl::sycl::native::exp(cl::sycl::cl_float{1.0f}); - }); - }); - } - std::cout << "r " << r << std::endl; - assert(r > 2.718f && r < 2.719f); // ~2.718281828459045 - } - return 0; } diff --git a/sycl/test/built-ins/scalar_relational.cpp b/sycl/test/built-ins/scalar_relational.cpp index e61b7ef42a2ff..11642e544b300 100644 --- a/sycl/test/built-ins/scalar_relational.cpp +++ b/sycl/test/built-ins/scalar_relational.cpp @@ -6,361 +6,412 @@ #include -#include #include -#include // for NAN +#include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // isequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isequal(cl::sycl::cl_float{10.5f}, - cl::sycl::cl_float{10.5f}); + AccR[0] = s::isequal(s::cl_float{ 10.5f }, s::cl_float{ 10.5f }); }); }); } - std::cout << "garima isequal r \t" << r << std::endl; assert(r == 1); } // isnotequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnotequal(cl::sycl::cl_float{0.4f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isnotequal(s::cl_float{ 0.4f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isnotequal r \t" << r << std::endl; assert(r == 1); } // isgreater-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreater(cl::sycl::cl_float{0.6f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isgreater(s::cl_float{ 0.6f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isgreater r \t" << r << std::endl; assert(r == 1); } // isgreaterequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreaterequal(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isgreaterequal(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isgreaterequal r \t" << r << std::endl; assert(r == 1); } // isless-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isless(cl::sycl::cl_float{0.4f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::isless(s::cl_float{ 0.4f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "isless r \t" << r << std::endl; assert(r == 1); } // islessequal-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessequal(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::islessequal(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "islessequal r \t" << r << std::endl; assert(r == 1); } // islessgreater-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessgreater(cl::sycl::cl_float{0.5f}, - cl::sycl::cl_float{0.5f}); + AccR[0] = s::islessgreater(s::cl_float{ 0.5f }, s::cl_float{ 0.5f }); }); }); } - std::cout << "islessgreater r \t" << r << std::endl; assert(r == 0); } - // isfinite-float : host only + // isfinite-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 0; -#else - AccR[0] = cl::sycl::isfinite(cl::sycl::cl_float{NAN}); -#endif + AccR[0] = s::isfinite(s::cl_float{ NAN }); }); }); } - std::cout << "isfinite r \t" << r << std::endl; assert(r == 0); } - // isinf-float : host only + // isinf-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::isinf(cl::sycl::cl_float{INFINITY}); -#endif + AccR[0] = s::isinf(s::cl_float{ INFINITY }); }); }); } - std::cout << "isinf r \t" << r << std::endl; assert(r == 1); } - // isnan-float : host only + // isnan-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::isnan(cl::sycl::cl_float{NAN}); -#endif + AccR[0] = s::isnan(s::cl_float{ NAN }); }); }); } - std::cout << "isnan r \t" << r << std::endl; assert(r == 1); } - // isnormal-float : host only + // isnormal-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 0; -#else - AccR[0] = cl::sycl::isnormal(cl::sycl::cl_float{INFINITY}); -#endif + AccR[0] = s::isnormal(s::cl_float{ INFINITY }); + }); + }); + } + assert(r == 0); + } + + // isnormal-double + { + s::cl_int r{ 1 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::isnormal(s::cl_double{ INFINITY }); }); }); } - std::cout << "isnormal r \t" << r << std::endl; assert(r == 0); } // isordered-float { - cl::sycl::cl_int r{1}; + s::cl_int r{ 1 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isordered(cl::sycl::cl_float{4.0f}, - cl::sycl::cl_float{NAN}); + AccR[0] = s::isordered(s::cl_float{ 4.0f }, s::cl_float{ NAN }); }); }); } - std::cout << "isordered r \t" << r << std::endl; assert(r == 0); } // isunordered-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isunordered(cl::sycl::cl_float{4.0f}, - cl::sycl::cl_float{NAN}); + AccR[0] = s::isunordered(s::cl_float{ 4.0f }, s::cl_float{ NAN }); }); }); } - std::cout << "isunordered r \t" << r << std::endl; assert(r == 1); } - // signbit-float : host only + // signbit-float { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::signbit(cl::sycl::cl_float{-12.0f}); -#endif + AccR[0] = s::signbit(s::cl_float{ -12.0f }); }); }); } - std::cout << "signbit r \t" << r << std::endl; assert(r == 1); } - // any-integer : host only + // any-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::any(cl::sycl::cl_int{-12}); -#endif + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ 12 }); + }); + }); + } + assert(r == 0); + } + // any-integer + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ 0 }); + }); + }); + } + assert(r == 0); + } + + // any-integer + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int{ -12 }); }); }); } - std::cout << "any r \t" << r << std::endl; assert(r == 1); } - // all-integer : host only + // all-integer + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ 12 }); + }); + }); + } + assert(r == 0); + } + + // all-integer + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ 0 }); + }); + }); + } + assert(r == 0); + } + + // all-integer { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::all(cl::sycl::cl_int{-12}); -#endif + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int{ -12 }); }); }); } - std::cout << "all r \t" << r << std::endl; assert(r == 1); } // bitselect-float { - cl::sycl::cl_float r{0.0f}; + s::cl_float r{ 0.0f }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::bitselect(cl::sycl::cl_float{112.112}, - cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{3.3}); + AccR[0] = s::bitselect(s::cl_float{ 112.112 }, s::cl_float{ 34.34 }, + s::cl_float{ 3.3 }); }); }); } - std::cout << "bitselect r \t" << r << std::endl; assert(r <= 80.5478 && r >= 80.5476); // r = 80.5477 } - // select-float,int : host only + // select-float,int + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ 1 }); + }); + }); + } + assert(r <= 123.124 && r >= 123.122); // r = 123.123 + } + + // select-float,int + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ 0 }); + }); + }); + } + assert(r <= 34.35 && r >= 34.33); // r = 34.34 + } + + // select-float,int { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 123.123; -#else - AccR[0] = cl::sycl::select(cl::sycl::cl_float{34.34}, - cl::sycl::cl_float{123.123}, - cl::sycl::cl_int{1}); -#endif + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::select(s::cl_float{ 34.34 }, s::cl_float{ 123.123 }, + s::cl_int{ -1 }); }); }); } - std::cout << "select r \t" << r << std::endl; assert(r <= 123.124 && r >= 123.122); // r = 123.123 } diff --git a/sycl/test/built-ins/vector_common.cpp b/sycl/test/built-ins/vector_common.cpp index dcf877b7772ac..fe8c3527dc07e 100644 --- a/sycl/test/built-ins/vector_common.cpp +++ b/sycl/test/built-ins/vector_common.cpp @@ -6,50 +6,46 @@ #include -#include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float2{2.3f, 0.4f}); + AccR[0] = + s::max(s::cl_float2{ 0.5f, 3.4f }, s::cl_float2{ 2.3f, 0.4f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 2.3f); assert(r2 == 3.4f); } // max { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float{3.0f}); + AccR[0] = s::max(s::cl_float2{ 0.5f, 3.4f }, s::cl_float{ 3.0f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 3.0f); assert(r2 == 3.4f); } diff --git a/sycl/test/built-ins/vector_geometric.cpp b/sycl/test/built-ins/vector_geometric.cpp index cdcdc8f17752f..4bf353341d2c9 100644 --- a/sycl/test/built-ins/vector_geometric.cpp +++ b/sycl/test/built-ins/vector_geometric.cpp @@ -6,33 +6,163 @@ #include -#include #include +#include -using namespace cl::sycl; +namespace s = cl::sycl; + +bool isFloatEqualTo(float x, float y, float epsilon = 0.005f) { + return std::fabs(x - y) <= epsilon; +} int main() { // dot { - cl::sycl::cl_float r{0}; + s::cl_float r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::dot( - cl::sycl::cl_float2{ - 1.f, - 2.f, - }, - cl::sycl::cl_float2{4.f, 6.f}); + AccR[0] = s::dot(s::cl_float2{ 1.f, 2.f, }, s::cl_float2{ 4.f, 6.f }); }); }); } - std::cout << "r " << r << std::endl; assert(r == 16.f); } + // cross + { + s::cl_float4 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::cross(s::cl_float4{ 2.f, 3.f, 4.f, 0.f, }, + s::cl_float4{ 5.f, 6.f, 7.f, 0.f, }); + }); + }); + } + + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); + + assert(r1 == -3.f); + assert(r2 == 6.f); + assert(r3 == -3.f); + assert(r4 == 0.0f); + } + + // distance + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::distance(s::cl_float2{ 1.f, 2.f, }, s::cl_float2{ 3.f, 4.f, }); + }); + }); + } + assert(isFloatEqualTo(r, 2.82843f)); + } + + // length + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::length(s::cl_float2{ 1.f, 2.f, }); + }); + }); + } + assert(isFloatEqualTo(r, 2.23607f)); + } + + // normalize + { + s::cl_float2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::normalize(s::cl_float2{ 1.f, 2.f, }); + }); + }); + } + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + + assert(isFloatEqualTo(r1, 0.447214f)); + assert(isFloatEqualTo(r2, 0.894427f)); + } + + // fast_distance + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_distance(s::cl_float2{ 1.f, 2.f, }, + s::cl_float2{ 3.f, 4.f, }); + }); + }); + } + assert(isFloatEqualTo(r, 2.82843f)); + } + + // fast_length + { + s::cl_float r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_length(s::cl_float2{ 1.f, 2.f, }); + }); + }); + } + assert(isFloatEqualTo(r, 2.23607f)); + } + + // fast_normalize + { + s::cl_float2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::fast_normalize(s::cl_float2{ 1.f, 2.f, }); + }); + }); + } + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + + assert(isFloatEqualTo(r1, 0.447144)); + assert(isFloatEqualTo(r2, 0.894287)); + } + return 0; } \ No newline at end of file diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index c6e4ee9b0247a..507bd29838066 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -1,7 +1,7 @@ // RUN: %clang -std=c++11 -fsycl %s -o %t.out -lstdc++ -lOpenCL -lsycl // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out #include @@ -9,174 +9,598 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // max { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int2{2, 7}); + AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 5); assert(r2 == 7); } // max { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint2{2, 7}); + AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 5); assert(r2 == 7); } // max { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::max(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int{2}); + AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 5); assert(r2 == 3); } // max { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::max(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint{2}); + AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 5); assert(r2 == 3); } // min { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int2{2, 7}); + AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // min { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint2{2, 7}); + AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // min { - cl::sycl::cl_int2 r{0}; + s::cl_int2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::min(cl::sycl::cl_int2{5, 3}, cl::sycl::cl_int{2}); + AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); assert(r1 == 2); assert(r2 == 2); } // min { - cl::sycl::cl_uint2 r{0}; + s::cl_uint2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::min(cl::sycl::cl_uint2{5, 3}, cl::sycl::cl_uint{2}); + AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); }); }); } - cl::sycl::cl_uint r1 = r.x(); - cl::sycl::cl_uint r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); assert(r1 == 2); assert(r2 == 2); } + // abs + { + s::cl_uint2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs(s::cl_int2{ -5, -2 }); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 5); + assert(r2 == 2); + } + + // abs_diff + { + s::cl_uint2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::abs_diff(s::cl_int2{ -5, -2 }, s::cl_int2{ -1, -1 }); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 4); + assert(r2 == 1); + } + + // add_sat + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::add_sat(s::cl_int2{ 0x7FFFFFFF, 0x7FFFFFFF }, + s::cl_int2{ 100, 90 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // hadd + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::hadd(s::cl_int2{ 0x0000007F, 0x0000007F }, + s::cl_int2{ 0x00000020, 0x00000020 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x0000004F); + assert(r2 == 0x0000004F); + } + + // rhadd + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rhadd(s::cl_int2{ 0x0000007F, 0x0000007F }, + s::cl_int2{ 0x00000020, 0x00000020 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00000050); + assert(r2 == 0x00000050); + } + + // clamp - 1 + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clamp(s::cl_int2{ 5, 5 }, s::cl_int2{ 10, 10 }, + s::cl_int2{ 30, 30 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 10); + assert(r2 == 10); + } + + // clamp - 2 + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = + s::clamp(s::cl_int2{ 5, 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 10); + assert(r2 == 10); + } + + // clz + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::clz(s::cl_int2{ 0x0FFFFFFF, 0x0FFFFFFF }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 4); + assert(r2 == 4); + } + + // mad_hi + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_hi(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }, + s::cl_int2{ 1, 1 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x11); + assert(r2 == 0x11); + } + + // mad_sat + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad_sat(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }, + s::cl_int2{ 1, 1 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // mul_hi + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul_hi(s::cl_int2{ 0x10000000, 0x10000000 }, + s::cl_int2{ 0x00000100, 0x00000100 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x10); + assert(r2 == 0x10); + } + + // rotate + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::rotate(s::cl_int2{ 0x11100000, 0x11100000 }, + s::cl_int2{ 12, 12 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00000111); + assert(r2 == 0x00000111); + } + + // sub_sat + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::sub_sat(s::cl_int2{ 10, 10 }, + s::cl_int2{ int(0x80000000), int(0x80000000) }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x7FFFFFFF); + assert(r2 == 0x7FFFFFFF); + } + + // upsample - 1 + { + s::cl_ushort2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uchar2{ 0x10, 0x10 }, + s::cl_uchar2{ 0x10, 0x10 }); + }); + }); + } + s::cl_ushort r1 = r.x(); + s::cl_ushort r2 = r.y(); + assert(r1 == 0x1010); + assert(r2 == 0x1010); + } + + // upsample - 2 + { + s::cl_short2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_char2{ 0x10, 0x10 }, + s::cl_uchar2{ 0x10, 0x10 }); + }); + }); + } + s::cl_short r1 = r.x(); + s::cl_short r2 = r.y(); + assert(r1 == 0x1010); + assert(r2 == 0x1010); + } + + // upsample - 3 + { + s::cl_uint2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_ushort2{ 0x0010, 0x0010 }, + s::cl_ushort2{ 0x0010, 0x0010 }); + }); + }); + } + s::cl_uint r1 = r.x(); + s::cl_uint r2 = r.y(); + assert(r1 == 0x00100010); + assert(r2 == 0x00100010); + } + + // upsample - 4 + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_short2{ 0x0010, 0x0010 }, + s::cl_ushort2{ 0x0010, 0x0010 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0x00100010); + assert(r2 == 0x00100010); + } + + // upsample - 5 + { + s::cl_ulong2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_uint2{ 0x00000010, 0x00000010 }, + s::cl_uint2{ 0x00000010, 0x00000010 }); + }); + }); + } + s::cl_ulong r1 = r.x(); + s::cl_ulong r2 = r.y(); + assert(r1 == 0x0000001000000010); + assert(r2 == 0x0000001000000010); + } + + // upsample - 6 + { + s::cl_long2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::upsample(s::cl_int2{ 0x00000010, 0x00000010 }, + s::cl_uint2{ 0x00000010, 0x00000010 }); + }); + }); + } + s::cl_long r1 = r.x(); + s::cl_long r2 = r.y(); + assert(r1 == 0x0000001000000010); + assert(r2 == 0x0000001000000010); + } + + // popcount + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::popcount(s::cl_int2{ 0x000000FF, 0x000000FF }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 8); + assert(r2 == 8); + } + + // mad24 + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mad24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, + s::cl_int2{ 20, 20 }, s::cl_int2{ 20, 20 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == 0); + assert(r2 == 0); + } + + // mul24 + { + s::cl_int2 r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::mul24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, + s::cl_int2{ 20, 20 }); + }); + }); + } + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + assert(r1 == -20); + assert(r2 == -20); + } + return 0; } diff --git a/sycl/test/built-ins/vector_math.cpp b/sycl/test/built-ins/vector_math.cpp index 3b1c9d5b1220c..b822a931a34dd 100644 --- a/sycl/test/built-ins/vector_math.cpp +++ b/sycl/test/built-ins/vector_math.cpp @@ -9,115 +9,110 @@ #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // fmin { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fmin(cl::sycl::cl_float2{0.5f, 3.4f}, - cl::sycl::cl_float2{2.3f, 0.4f}); + AccR[0] = + s::fmin(s::cl_float2{ 0.5f, 3.4f }, s::cl_float2{ 2.3f, 0.4f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 0.5f); assert(r2 == 0.4f); } // fabs { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::fabs(cl::sycl::cl_float2{-1.0f, 2.0f}); + AccR[0] = s::fabs(s::cl_float2{ -1.0f, 2.0f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 1.0f); assert(r2 == 2.0f); } // floor { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::floor(cl::sycl::cl_float2{1.4f, 2.8f}); + AccR[0] = s::floor(s::cl_float2{ 1.4f, 2.8f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 1.0f); assert(r2 == 2.0f); } // ceil { - cl::sycl::cl_float2 r{0}; + s::cl_float2 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::ceil(cl::sycl::cl_float2{1.4f, 2.8f}); + AccR[0] = s::ceil(s::cl_float2{ 1.4f, 2.8f }); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); assert(r1 == 2); assert(r2 == 3); } // fract with global memory - /*{ - cl::sycl::cl_float2 r{0, 0}; - cl::sycl::cl_float2 i{0, 0}; + { + s::cl_float2 r{ 0, 0 }; + s::cl_float2 i{ 0, 0 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1)); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - global_ptr Iptr(AccI); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float2{1.5f, 2.5f}, Iptr); + s::global_ptr Iptr(AccI); + AccR[0] = s::fract(s::cl_float2{ 1.5f, 2.5f }, Iptr); }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float i1 = i.x(); - cl::sycl::cl_float i2 = i.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << " i1 " << i1 << " i2 " << i2 - << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float i1 = i.x(); + s::cl_float i2 = i.y(); + assert(r1 == 0.5f); assert(r2 == 0.5f); assert(i1 == 1.0f); @@ -126,35 +121,34 @@ int main() { // fract with private memory { - cl::sycl::cl_float2 r{0, 0}; - cl::sycl::cl_float2 i{0, 0}; + s::cl_float2 r{ 0, 0 }; + s::cl_float2 i{ 0, 0 }; { - buffer BufR(&r, range<1>(1)); - buffer BufI(&i, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); - auto AccI = BufI.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::buffer BufI(&i, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + auto AccI = BufI.get_access(cgh); cgh.single_task([=]() { - cl::sycl::cl_float2 temp(0.0); - private_ptr Iptr(&temp); - AccR[0] = cl::sycl::fract(cl::sycl::cl_float2{1.5f, 2.5f}, Iptr); + s::cl_float2 temp(0.0); + s::private_ptr Iptr(&temp); + AccR[0] = s::fract(s::cl_float2{ 1.5f, 2.5f }, Iptr); AccI[0] = *Iptr; }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float i1 = i.x(); - cl::sycl::cl_float i2 = i.y(); - std::cout << "r1 " << r1 << " r2 " << r2 << " i1 " << i1 << " i2 " << i2 - << std::endl; + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float i1 = i.x(); + s::cl_float i2 = i.y(); + assert(r1 == 0.5f); assert(r2 == 0.5f); assert(i1 == 1.0f); assert(i2 == 2.0f); - }*/ + } return 0; } diff --git a/sycl/test/built-ins/vector_relational.cpp b/sycl/test/built-ins/vector_relational.cpp index 4e0ac2bc37fdc..88a030ae9fcfa 100644 --- a/sycl/test/built-ins/vector_relational.cpp +++ b/sycl/test/built-ins/vector_relational.cpp @@ -6,35 +6,31 @@ #include -#include #include #include -using namespace cl::sycl; +namespace s = cl::sycl; int main() { // isequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isequal(cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == 0); assert(r3 == 0); @@ -43,26 +39,23 @@ int main() { // isnotequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isnotequal( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isnotequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == -1); @@ -71,26 +64,23 @@ int main() { // isgreater { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreater( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isgreater(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -99,26 +89,23 @@ int main() { // isgreaterequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isgreaterequal( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isgreaterequal(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -127,26 +114,23 @@ int main() { // isless { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::isless(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isless(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -155,26 +139,23 @@ int main() { // islessequal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessequal( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::islessequal(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -183,26 +164,24 @@ int main() { // islessgreater { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::islessgreater( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, INFINITY}); + AccR[0] = + s::islessgreater(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); @@ -210,124 +189,96 @@ int main() { // other value except Infinity. } - // isfinite : host only + // isfinite { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{-1, -1, 0, 0}; -#else - AccR[0] = cl::sycl::isfinite( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif + AccR[0] = s::isfinite(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); assert(r4 == 0); } - // isinf : host only + // isinf { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, 0, 0, -1}; -#else - AccR[0] = - cl::sycl::isinf(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif + AccR[0] = s::isinf(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == 0); assert(r4 == -1); } - // isnan : host only + // isnan { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, 0, -1, 0}; -#else - AccR[0] = - cl::sycl::isnan(cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif + AccR[0] = s::isnan(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == -1); assert(r4 == 0); } - // isnormal : host only + // isnormal { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{-1, -1, 0, 0}; -#else - AccR[0] = cl::sycl::isnormal( - cl::sycl::cl_float4{0.5f, 0.4f, NAN, INFINITY}); -#endif + AccR[0] = s::isnormal(s::cl_float4{ 0.5f, 0.4f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -336,26 +287,23 @@ int main() { // isordered { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isordered( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isordered(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == -1); assert(r2 == -1); assert(r3 == 0); @@ -364,177 +312,262 @@ int main() { // isunordered { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = cl::sycl::isunordered( - cl::sycl::cl_float4{0.5f, 0.6f, NAN, INFINITY}, - cl::sycl::cl_float4{0.5f, 0.5f, 0.5f, 0.5f}); + AccR[0] = s::isunordered(s::cl_float4{ 0.5f, 0.6f, NAN, INFINITY }, + s::cl_float4{ 0.5f, 0.5f, 0.5f, 0.5f }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 0); assert(r2 == 0); assert(r3 == -1); assert(r4 == 0); } - // signbit : host only + // signbit { - cl::sycl::cl_int4 r{0}; + s::cl_int4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_int4{0, -1, 0, 0}; -#else - AccR[0] = cl::sycl::signbit( - cl::sycl::cl_float4{0.5f, -12.0f, NAN, INFINITY}); -#endif + AccR[0] = s::signbit(s::cl_float4{ 0.5f, -12.0f, NAN, INFINITY }); }); }); } - cl::sycl::cl_int r1 = r.x(); - cl::sycl::cl_int r2 = r.y(); - cl::sycl::cl_int r3 = r.z(); - cl::sycl::cl_int r4 = r.w(); + s::cl_int r1 = r.x(); + s::cl_int r2 = r.y(); + s::cl_int r3 = r.z(); + s::cl_int r4 = r.w(); - std::cout << "sign r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " - << r4 << std::endl; assert(r1 == 0); assert(r2 == -1); assert(r3 == 0); assert(r4 == 0); } - // any : host only. + // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::any(cl::sycl::cl_int4{-12, -12, 0, 1}); -#endif + AccR[0] = s::any(s::cl_int4{ -12, -12, 0, 1 }); }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; - std::cout << "Any r1 " << r1 << std::endl; assert(r1 == 1); } - // all : host only. + // any. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int4{ -12, -12, -12, -12 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 1); + } + + // any. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int4{ 0, 0, 0, 0 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 0); + } + + // any. // Call to the device function with vector parameters work. Scalars do not. { - cl::sycl::cl_int r{0}; + s::cl_int r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::any(s::cl_int4{ 12, 12, 12, 12 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 0); + } + + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = 1; -#else - AccR[0] = cl::sycl::all(cl::sycl::cl_int4{-12, -12, -12, -12}); + AccR[0] = s::all(s::cl_int4{ -12, -12, -12, -12 }); // Infinity (positive or negative) or Nan are not integers. // Passing them creates inconsistent results between host and device // execution. -#endif }); }); } - cl::sycl::cl_int r1 = r; + s::cl_int r1 = r; - std::cout << "All change r1 " << r1 << std::endl; assert(r1 == 1); } + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int4{ -12, -12, -12, -12 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 1); + } + + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int4{ 0, 0, 0, 0 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 0); + } + + // all. + // Call to the device function with vector parameters work. Scalars do not. + { + s::cl_int r{ 0 }; + { + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); + cgh.single_task([=]() { + AccR[0] = s::all(s::cl_int4{ 12, 12, 12, 12 }); + }); + }); + } + s::cl_int r1 = r; + + assert(r1 == 0); + } + // bitselect { - cl::sycl::cl_float4 r{0}; + s::cl_float4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - cl::sycl::bitselect(cl::sycl::cl_float4{112.112, 12.12, 0, 0.0}, - cl::sycl::cl_float4{34.34, 23.23, 1, 0.0}, - cl::sycl::cl_float4{3.3, 6.6, 1, 0.0}); + AccR[0] = s::bitselect(s::cl_float4{ 112.112, 12.12, 0, 0.0 }, + s::cl_float4{ 34.34, 23.23, 1, 0.0 }, + s::cl_float4{ 3.3, 6.6, 1, 0.0 }); }); // Using NAN/INFINITY as any float produced consistent results // between host and device. }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float r3 = r.z(); - cl::sycl::cl_float r4 = r.w(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(abs(r1 - 80.5477f) < 0.0001); assert(abs(r2 - 18.2322f) < 0.0001); assert(abs(r3 - 1.0f) < 0.01); assert(abs(r4 - 0.0f) < 0.01); } - // select : host only + // select { - cl::sycl::cl_float4 r{0}; + s::cl_float4 r{ 0 }; { - buffer BufR(&r, range<1>(1)); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto AccR = BufR.get_access(cgh); + s::buffer BufR(&r, s::range<1>(1)); + s::queue myQueue; + myQueue.submit([&](s::handler &cgh) { + auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { -#ifdef __SYCL_DEVICE_ONLY__ - AccR[0] = cl::sycl::cl_float4{112.112f, 112.112f, 112.112f, 112.112f}; -#else - AccR[0] = cl::sycl::select( - cl::sycl::cl_float4{112.112f, 34.34f, 112.112f, 34.34f}, - cl::sycl::cl_float4{34.34f, 112.112f, 34.34f, 112.112f}, - cl::sycl::cl_int4{0, -1, 0, -1}); + AccR[0] = + s::select(s::cl_float4{ 112.112f, 34.34f, 112.112f, 34.34f }, + s::cl_float4{ 34.34f, 112.112f, 34.34f, 112.112f }, + s::cl_int4{ 0, -1, 0, 1 }); // Using NAN/infinity as an input, which gets // selected by -1, produces a NAN/infinity as expected. -#endif }); }); } - cl::sycl::cl_float r1 = r.x(); - cl::sycl::cl_float r2 = r.y(); - cl::sycl::cl_float r3 = r.z(); - cl::sycl::cl_float r4 = r.w(); + s::cl_float r1 = r.x(); + s::cl_float r2 = r.y(); + s::cl_float r3 = r.z(); + s::cl_float r4 = r.w(); - std::cout << "r1 " << r1 << " r2 " << r2 << " r3 " << r3 << " r4 " << r4 - << std::endl; assert(r1 == 112.112f); assert(r2 == 112.112f); assert(r3 == 112.112f); - assert(r4 == 112.112f); + assert(r4 == 34.34f); } return 0;