diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index ab1a18f49557e..72687d6f18636 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -424,9 +424,19 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set ( has_distinct_generic_addrspace TRUE ) if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) set( opt_flags -O3 ) + list( APPEND build_flags -DCLC_SPIRV ) set( spvflags --spirv-max-version=1.1 ) + set( MACRO_ARCH SPIRV32 ) + if( ARCH STREQUAL spirv64 ) + set( MACRO_ARCH SPIRV64 ) + endif() elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) set( opt_flags -O3 ) + list( APPEND build_flags -DCLC_CLSPV ) + set( MACRO_ARCH CLSPV32 ) + if( ARCH STREQUAL clspv64 ) + set( MACRO_ARCH CLSPV64 ) + endif() elseif( ARCH STREQUAL nvptx OR ARCH STREQUAL nvptx64 ) set( opt_flags -O3 "--nvvm-reflect-enable=false" ) set( has_distinct_generic_addrspace FALSE ) @@ -437,6 +447,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( has_distinct_generic_addrspace FALSE ) else() set( opt_flags -O3 ) + set( MACRO_ARCH ${ARCH} ) endif() # Enable SPIR-V builtin function declarations, so they don't @@ -483,7 +494,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list(APPEND build_flags -D__unix__) endif() - string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE ) + string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE ) list( APPEND build_flags -D__CLC_INTERNAL diff --git a/libclc/clc/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h index e04a405a94be7..6c95a07b1c184 100644 --- a/libclc/clc/include/clc/clcfunc.h +++ b/libclc/clc/include/clc/clcfunc.h @@ -10,9 +10,9 @@ // avoid inlines for SPIR-V related targets since we'll optimise later in the // chain -#if defined(CLC_SPIRV) || defined(CLC_SPIRV64) +#if defined(CLC_SPIRV) #define _CLC_DEF -#elif defined(CLC_CLSPV) || defined(CLC_CLSPV64) +#elif defined(CLC_CLSPV) #define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin)) #else #define _CLC_DEF __attribute__((always_inline)) diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc similarity index 99% rename from libclc/generic/include/clc/integer/gentype.inc rename to libclc/clc/include/clc/integer/gentype.inc index 032bdc0cadbaf..e8ca005d4ccb9 100644 --- a/libclc/generic/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -//These 2 defines only change when switching between data sizes or base types to -//keep this file manageable. +// These 2 defines only change when switching between data sizes or base types +// to keep this file manageable. #define __CLC_GENSIZE 8 #define __CLC_SCALAR_GENTYPE char diff --git a/libclc/generic/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc similarity index 100% rename from libclc/generic/include/clc/math/gentype.inc rename to libclc/clc/include/clc/math/gentype.inc diff --git a/libclc/clc/include/clc/shared/clc_clamp.h b/libclc/clc/include/clc/shared/clc_clamp.h new file mode 100644 index 0000000000000..5c044c9a1a510 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_clamp.h @@ -0,0 +1,15 @@ +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible clamp +#define __clc_clamp clamp +#else + +#include +#include + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include + +#endif diff --git a/libclc/clc/include/clc/shared/clc_clamp.inc b/libclc/clc/include/clc/shared/clc_clamp.inc new file mode 100644 index 0000000000000..cf6b0b2789bc5 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_clamp.inc @@ -0,0 +1,9 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_SCALAR_GENTYPE y, + __CLC_SCALAR_GENTYPE z); +#endif diff --git a/libclc/clc/include/clc/shared/clc_max.h b/libclc/clc/include/clc/shared/clc_max.h new file mode 100644 index 0000000000000..2825640f6c291 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_max.h @@ -0,0 +1,12 @@ +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible max +#define __clc_max max +#else + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include + +#endif diff --git a/libclc/clc/include/clc/shared/clc_max.inc b/libclc/clc/include/clc/shared/clc_max.inc new file mode 100644 index 0000000000000..bddb3fa3d920c --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_max.inc @@ -0,0 +1,7 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_GENTYPE b); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b); +#endif diff --git a/libclc/clc/include/clc/shared/clc_min.h b/libclc/clc/include/clc/shared/clc_min.h new file mode 100644 index 0000000000000..0b7ee140b8f45 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_min.h @@ -0,0 +1,12 @@ +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible min +#define __clc_min min +#else + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include + +#endif diff --git a/libclc/clc/include/clc/shared/clc_min.inc b/libclc/clc/include/clc/shared/clc_min.inc new file mode 100644 index 0000000000000..3e1da96df43dd --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_min.inc @@ -0,0 +1,7 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_GENTYPE b); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b); +#endif diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index fa2e4f50b99cd..db523adb63836 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -1 +1,4 @@ geometric/clc_dot.cl +shared/clc_clamp.cl +shared/clc_max.cl +shared/clc_min.cl diff --git a/libclc/clc/lib/generic/shared/clc_clamp.cl b/libclc/clc/lib/generic/shared/clc_clamp.cl new file mode 100644 index 0000000000000..1d40da3cf2296 --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_clamp.cl @@ -0,0 +1,7 @@ +#include + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include diff --git a/libclc/generic/libspirv/core/shared/clc_clamp.inc b/libclc/clc/lib/generic/shared/clc_clamp.inc similarity index 62% rename from libclc/generic/libspirv/core/shared/clc_clamp.inc rename to libclc/clc/lib/generic/shared/clc_clamp.inc index 571f6470e5703..da67cd2ad69db 100644 --- a/libclc/generic/libspirv/core/shared/clc_clamp.inc +++ b/libclc/clc/lib/generic/shared/clc_clamp.inc @@ -1,11 +1,3 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) { diff --git a/libclc/clc/lib/generic/shared/clc_max.cl b/libclc/clc/lib/generic/shared/clc_max.cl new file mode 100644 index 0000000000000..e1050ed0007ee --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_max.cl @@ -0,0 +1,7 @@ +#include + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include diff --git a/libclc/generic/libspirv/core/shared/clc_max.inc b/libclc/clc/lib/generic/shared/clc_max.inc similarity index 50% rename from libclc/generic/libspirv/core/shared/clc_max.inc rename to libclc/clc/lib/generic/shared/clc_max.inc index 882f29ce30d94..f4234cb359d86 100644 --- a/libclc/generic/libspirv/core/shared/clc_max.inc +++ b/libclc/clc/lib/generic/shared/clc_max.inc @@ -1,11 +1,3 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, __CLC_GENTYPE b) { return (a > b ? a : b); diff --git a/libclc/clc/lib/generic/shared/clc_min.cl b/libclc/clc/lib/generic/shared/clc_min.cl new file mode 100644 index 0000000000000..12a26f5352407 --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_min.cl @@ -0,0 +1,7 @@ +#include + +#define __CLC_BODY +#include + +#define __CLC_BODY +#include diff --git a/libclc/generic/libspirv/core/shared/clc_min.inc b/libclc/clc/lib/generic/shared/clc_min.inc similarity index 50% rename from libclc/generic/libspirv/core/shared/clc_min.inc rename to libclc/clc/lib/generic/shared/clc_min.inc index d8a51291dbc27..e9c85ddd3affa 100644 --- a/libclc/generic/libspirv/core/shared/clc_min.inc +++ b/libclc/clc/lib/generic/shared/clc_min.inc @@ -1,11 +1,3 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, __CLC_GENTYPE b) { return (b < a ? b : a); diff --git a/libclc/generic/lib/common/smoothstep.cl b/libclc/generic/lib/common/smoothstep.cl index 8d18024c446d6..99553cac901d8 100644 --- a/libclc/generic/lib/common/smoothstep.cl +++ b/libclc/generic/lib/common/smoothstep.cl @@ -49,7 +49,7 @@ _CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, _CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double); -#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64) +#if !defined(CLC_SPIRV) SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D); SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D); diff --git a/libclc/generic/lib/common/step.cl b/libclc/generic/lib/common/step.cl index f8cbd125c0638..46e86249b57e4 100644 --- a/libclc/generic/lib/common/step.cl +++ b/libclc/generic/lib/common/step.cl @@ -44,7 +44,7 @@ STEP_DEF(double, double); _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double); _CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double); -#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64) +#if !defined(CLC_SPIRV) STEP_DEF(float, double); STEP_DEF(double, float); diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl index ae6117b7b2922..438c31835a364 100644 --- a/libclc/generic/lib/math/clc_ldexp.cl +++ b/libclc/generic/lib/math/clc_ldexp.cl @@ -20,76 +20,78 @@ * THE SOFTWARE. */ -#include -#include "config.h" #include "../clcmacro.h" +#include "config.h" #include "math.h" +#include +#include _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { - if (!__clc_fp32_subnormals_supported()) { - - // This treats subnormals as zeros - int i = as_int(x); - int e = (i >> 23) & 0xff; - int m = i & 0x007fffff; - int s = i & 0x80000000; - int v = add_sat(e, n); - v = clamp(v, 0, 0xff); - int mr = e == 0 | v == 0 | v == 0xff ? 0 : m; - int c = e == 0xff; - mr = c ? m : mr; - int er = c ? e : v; - er = e ? er : e; - return as_float( s | (er << 23) | mr ); - } - - /* supports denormal values */ - const int multiplier = 24; - float val_f; - uint val_ui; - uint sign; - int exponent; - val_ui = as_uint(x); - sign = val_ui & 0x80000000; - val_ui = val_ui & 0x7fffffff;/* remove the sign bit */ - int val_x = val_ui; - - exponent = val_ui >> 23; /* get the exponent */ - int dexp = exponent; - - /* denormal support */ - int fbh = 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23); - int dexponent = 25 - fbh; - uint dval_ui = (( (val_ui << fbh) & 0x007fffff) | (dexponent << 23)); - int ex = dexponent + n - multiplier; - dexponent = ex; - uint val = sign | (ex << 23) | (dval_ui & 0x007fffff); - int ex1 = dexponent + multiplier; - ex1 = -ex1 +25; - dval_ui = (((dval_ui & 0x007fffff )| 0x800000) >> ex1); - dval_ui = dexponent > 0 ? val :dval_ui; - dval_ui = dexponent > 254 ? 0x7f800000 :dval_ui; /*overflow*/ - dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/ - dval_ui = dval_ui | sign; - val_f = as_float(dval_ui); - - exponent += n; - - val = sign | (exponent << 23) | (val_ui & 0x007fffff); - ex1 = exponent + multiplier; - ex1 = -ex1 +25; - val_ui = (((val_ui & 0x007fffff )| 0x800000) >> ex1); - val_ui = exponent > 0 ? val :val_ui; - val_ui = exponent > 254 ? 0x7f800000 :val_ui; /*overflow*/ - val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/ - val_ui = val_ui | sign; - - val_ui = dexp == 0? dval_ui : val_ui; - val_f = as_float(val_ui); - - val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f; - return val_f; + if (!__clc_fp32_subnormals_supported()) { + + // This treats subnormals as zeros + int i = as_int(x); + int e = (i >> 23) & 0xff; + int m = i & 0x007fffff; + int s = i & 0x80000000; + int v = add_sat(e, n); + v = __clc_clamp(v, 0, 0xff); + int mr = e == 0 | v == 0 | v == 0xff ? 0 : m; + int c = e == 0xff; + mr = c ? m : mr; + int er = c ? e : v; + er = e ? er : e; + return as_float(s | (er << 23) | mr); + } + + /* supports denormal values */ + const int multiplier = 24; + float val_f; + uint val_ui; + uint sign; + int exponent; + val_ui = as_uint(x); + sign = val_ui & 0x80000000; + val_ui = val_ui & 0x7fffffff; /* remove the sign bit */ + int val_x = val_ui; + + exponent = val_ui >> 23; /* get the exponent */ + int dexp = exponent; + + /* denormal support */ + int fbh = + 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23); + int dexponent = 25 - fbh; + uint dval_ui = (((val_ui << fbh) & 0x007fffff) | (dexponent << 23)); + int ex = dexponent + n - multiplier; + dexponent = ex; + uint val = sign | (ex << 23) | (dval_ui & 0x007fffff); + int ex1 = dexponent + multiplier; + ex1 = -ex1 + 25; + dval_ui = (((dval_ui & 0x007fffff) | 0x800000) >> ex1); + dval_ui = dexponent > 0 ? val : dval_ui; + dval_ui = dexponent > 254 ? 0x7f800000 : dval_ui; /*overflow*/ + dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/ + dval_ui = dval_ui | sign; + val_f = as_float(dval_ui); + + exponent += n; + + val = sign | (exponent << 23) | (val_ui & 0x007fffff); + ex1 = exponent + multiplier; + ex1 = -ex1 + 25; + val_ui = (((val_ui & 0x007fffff) | 0x800000) >> ex1); + val_ui = exponent > 0 ? val : val_ui; + val_ui = exponent > 254 ? 0x7f800000 : val_ui; /*overflow*/ + val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/ + val_ui = val_ui | sign; + + val_ui = dexp == 0 ? dval_ui : val_ui; + val_f = as_float(val_ui); + + val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f; + return val_f; } #ifdef cl_khr_fp64 @@ -97,32 +99,32 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { - long l = as_ulong(x); - int e = (l >> 52) & 0x7ff; - long s = l & 0x8000000000000000; + long l = as_ulong(x); + int e = (l >> 52) & 0x7ff; + long s = l & 0x8000000000000000; - ulong ux = as_ulong(x * 0x1.0p+53); - int de = ((int)(ux >> 52) & 0x7ff) - 53; - int c = e == 0; - e = c ? de: e; + ulong ux = as_ulong(x * 0x1.0p+53); + int de = ((int)(ux >> 52) & 0x7ff) - 53; + int c = e == 0; + e = c ? de : e; - ux = c ? ux : l; + ux = c ? ux : l; - int v = e + n; - v = clamp(v, -0x7ff, 0x7ff); + int v = e + n; + v = __clc_clamp(v, -0x7ff, 0x7ff); - ux &= ~EXPBITS_DP64; + ux &= ~EXPBITS_DP64; - double mr = as_double(ux | ((ulong)(v+53) << 52)); - mr = mr * 0x1.0p-53; + double mr = as_double(ux | ((ulong)(v + 53) << 52)); + mr = mr * 0x1.0p-53; - mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr; + mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr; - mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr; - mr = v < -53 ? as_double(s) : mr; + mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr; + mr = v < -53 ? as_double(s) : mr; - mr = ((n == 0) | isinf(x) | (x == 0) ) ? x : mr; - return mr; + mr = ((n == 0) | isinf(x) | (x == 0)) ? x : mr; + return mr; } #endif @@ -132,7 +134,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { #pragma OPENCL EXTENSION cl_khr_fp16 : enable _CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) { - return (half)__clc_ldexp((float)x, n); + return (half)__clc_ldexp((float)x, n); } _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_ldexp, half, int); diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h index 62a4c925db51b..b1f82b7991ea3 100644 --- a/libclc/generic/lib/math/math.h +++ b/libclc/generic/lib/math/math.h @@ -40,7 +40,7 @@ #if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__ #define HAVE_HW_FMA32() (0) -#elif defined CLC_SPIRV || defined CLC_SPIRV64 +#elif defined(CLC_SPIRV) bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); #define HAVE_HW_FMA32() __clc_runtime_has_hw_fma32() #else diff --git a/libclc/generic/lib/shared/clamp.cl b/libclc/generic/lib/shared/clamp.cl index 51a009281be29..f470fc822f756 100644 --- a/libclc/generic/lib/shared/clamp.cl +++ b/libclc/generic/lib/shared/clamp.cl @@ -1,6 +1,5 @@ #include -#include -#include +#include #define __CLC_BODY #include diff --git a/libclc/generic/lib/shared/max.cl b/libclc/generic/lib/shared/max.cl index a753b702f658d..2266d5905afd6 100644 --- a/libclc/generic/lib/shared/max.cl +++ b/libclc/generic/lib/shared/max.cl @@ -1,6 +1,5 @@ #include -#include -#include +#include #define __CLC_BODY #include diff --git a/libclc/generic/lib/shared/max.inc b/libclc/generic/lib/shared/max.inc index 3c83e29c3f2d9..ec433a89c6e92 100644 --- a/libclc/generic/lib/shared/max.inc +++ b/libclc/generic/lib/shared/max.inc @@ -3,7 +3,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) { } #ifndef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { return __clc_max(a, b); } #endif diff --git a/libclc/generic/lib/shared/min.cl b/libclc/generic/lib/shared/min.cl index 31c47872f6aa4..f5c4d57f4b8d8 100644 --- a/libclc/generic/lib/shared/min.cl +++ b/libclc/generic/lib/shared/min.cl @@ -1,6 +1,5 @@ #include -#include -#include +#include #define __CLC_BODY #include diff --git a/libclc/generic/lib/shared/min.inc b/libclc/generic/lib/shared/min.inc index f70956479d6df..6a00944cbe35e 100644 --- a/libclc/generic/lib/shared/min.inc +++ b/libclc/generic/lib/shared/min.inc @@ -3,7 +3,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) { } #ifndef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { return __clc_min(a, b); } #endif diff --git a/libclc/generic/libspirv/SOURCES b/libclc/generic/libspirv/SOURCES index a222a1f7281a3..10d86bd5a67f8 100644 --- a/libclc/generic/libspirv/SOURCES +++ b/libclc/generic/libspirv/SOURCES @@ -39,9 +39,6 @@ core/integer/clc_mul_hi.cl core/integer/clc_rhadd.cl core/integer/clc_sub_sat.cl core/integer/clc_upsample.cl -core/shared/clc_clamp.cl -core/shared/clc_max.cl -core/shared/clc_min.cl geometric/cross.cl geometric/distance.cl geometric/dot.cl diff --git a/libclc/generic/libspirv/core/shared/clc_clamp.cl b/libclc/generic/libspirv/core/shared/clc_clamp.cl deleted file mode 100644 index d85c0a420f167..0000000000000 --- a/libclc/generic/libspirv/core/shared/clc_clamp.cl +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#define __CLC_BODY -#include - -#define __CLC_BODY -#include diff --git a/libclc/generic/libspirv/core/shared/clc_max.cl b/libclc/generic/libspirv/core/shared/clc_max.cl deleted file mode 100644 index 211c2c3ebba1b..0000000000000 --- a/libclc/generic/libspirv/core/shared/clc_max.cl +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#define __CLC_BODY -#include - -#define __CLC_BODY -#include diff --git a/libclc/generic/libspirv/core/shared/clc_min.cl b/libclc/generic/libspirv/core/shared/clc_min.cl deleted file mode 100644 index 9c7ea87286d8f..0000000000000 --- a/libclc/generic/libspirv/core/shared/clc_min.cl +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#define __CLC_BODY -#include - -#define __CLC_BODY -#include diff --git a/libclc/generic/libspirv/math/clc_hypot.cl b/libclc/generic/libspirv/math/clc_hypot.cl index b34a5e5107b4a..2e0c2737e1642 100644 --- a/libclc/generic/libspirv/math/clc_hypot.cl +++ b/libclc/generic/libspirv/math/clc_hypot.cl @@ -8,82 +8,84 @@ #include +#include +#include #include #include #include #include -// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result warrants it -_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) -{ - uint ux = as_uint(x); - uint aux = ux & EXSIGNBIT_SP32; - uint uy = as_uint(y); - uint auy = uy & EXSIGNBIT_SP32; - float retval; - int c = aux > auy; - ux = c ? aux : auy; - uy = c ? auy : aux; - - int xexp = __spirv_ocl_s_clamp( - (int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126); - float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); - float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); - float fx = as_float(ux) * fi_exp; - float fy = as_float(uy) * fi_exp; - retval = __spirv_ocl_sqrt(__spirv_ocl_mad(fx, fx, fy * fy)) * fx_exp; - - retval = ux > PINFBITPATT_SP32 || uy == 0 ? as_float(ux) : retval; - retval = ux == PINFBITPATT_SP32 || uy == PINFBITPATT_SP32 - ? as_float(PINFBITPATT_SP32) - : retval; - return retval; +// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result +// warrants it +_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) { + uint ux = as_uint(x); + uint aux = ux & EXSIGNBIT_SP32; + uint uy = as_uint(y); + uint auy = uy & EXSIGNBIT_SP32; + float retval; + int c = aux > auy; + ux = c ? aux : auy; + uy = c ? auy : aux; + + int xexp = + __clc_clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126); + float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + float fx = as_float(ux) * fi_exp; + float fy = as_float(uy) * fi_exp; + retval = __spirv_ocl_sqrt(__spirv_ocl_mad(fx, fx, fy * fy)) * fx_exp; + + retval = ux > PINFBITPATT_SP32 || uy == 0 ? as_float(ux) : retval; + retval = ux == PINFBITPATT_SP32 || uy == PINFBITPATT_SP32 + ? as_float(PINFBITPATT_SP32) + : retval; + return retval; } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_hypot, float, float) #ifdef cl_khr_fp64 -_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) -{ - ulong ux = as_ulong(x) & ~SIGNBIT_DP64; - int xexp = ux >> EXPSHIFTBITS_DP64; - x = as_double(ux); +_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) { + ulong ux = as_ulong(x) & ~SIGNBIT_DP64; + int xexp = ux >> EXPSHIFTBITS_DP64; + x = as_double(ux); - ulong uy = as_ulong(y) & ~SIGNBIT_DP64; - int yexp = uy >> EXPSHIFTBITS_DP64; - y = as_double(uy); + ulong uy = as_ulong(y) & ~SIGNBIT_DP64; + int yexp = uy >> EXPSHIFTBITS_DP64; + y = as_double(uy); - int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500; - double preadjust = c ? 0x1.0p-600 : 1.0; - double postadjust = c ? 0x1.0p+600 : 1.0; + int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500; + double preadjust = c ? 0x1.0p-600 : 1.0; + double postadjust = c ? 0x1.0p+600 : 1.0; - c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500; - preadjust = c ? 0x1.0p+600 : preadjust; - postadjust = c ? 0x1.0p-600 : postadjust; + c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500; + preadjust = c ? 0x1.0p+600 : preadjust; + postadjust = c ? 0x1.0p-600 : postadjust; - double ax = x * preadjust; - double ay = y * preadjust; + double ax = x * preadjust; + double ay = y * preadjust; - // The post adjust may overflow, but this can't be avoided in any case - double r = __spirv_ocl_sqrt(__spirv_ocl_fma(ax, ax, ay * ay)) * postadjust; + // The post adjust may overflow, but this can't be avoided in any case + double r = __spirv_ocl_sqrt(__spirv_ocl_fma(ax, ax, ay * ay)) * postadjust; - // If the difference in exponents between x and y is large - double s = x + y; - c = __spirv_ocl_s_abs(xexp - yexp) > MANTLENGTH_DP64 + 1; - r = c ? s : r; + // If the difference in exponents between x and y is large + double s = x + y; + c = __spirv_ocl_s_abs(xexp - yexp) > MANTLENGTH_DP64 + 1; + r = c ? s : r; - // Check for NaN - //c = x != x | y != y; - c = __spirv_IsNan(x) | __spirv_IsNan(y); - r = c ? as_double(QNANBITPATT_DP64) : r; + // Check for NaN + // c = x != x | y != y; + c = __spirv_IsNan(x) | __spirv_IsNan(y); + r = c ? as_double(QNANBITPATT_DP64) : r; - // If either is Inf, we must return Inf - c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64); - r = c ? as_double(PINFBITPATT_DP64) : r; + // If either is Inf, we must return Inf + c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64); + r = c ? as_double(PINFBITPATT_DP64) : r; - return r; + return r; } -_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, double) +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, + double) #endif #ifdef cl_khr_fp16