Skip to content

Commit

Permalink
add clz builtin (AdaptiveCpp#965)
Browse files Browse the repository at this point in the history
* add clz builtin

* fix host signature

* use lidevice header for nv builtins

* change sscp clz code path

* fix hiplike clz by checking bit lenght

* add host clz fallback

* cleaning clz implementation

* fall back clz

* fix comment typo

Co-authored-by: Ronan Keryell <ronan@keryell.fr>

* remove __ for the fallback clz

* implement u16 & u8 variant of clz

* ptx clz u8 & u16

* clz testing

* fix dumb typo

* add fallback to cuda builtins

* remove the __clz host for nvc++ and fallback to fallback_clz

* fix issue in fallback clz

* changing fallback implementation

* print test

* additional prints

* additional cast layer

* printing clz(0)

* fix build

* remove prints & add check to avoid clz(0)

---------

Co-authored-by: Ronan Keryell <ronan@keryell.fr>
  • Loading branch information
tdavidcl and keryell committed May 5, 2023
1 parent dfb6788 commit a54d87b
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 1 deletion.
5 changes: 5 additions & 0 deletions include/hipSYCL/sycl/libkernel/builtin_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,11 @@ HIPSYCL_BUILTIN T __hipsycl_clamp(T x, T minval, T maxval) noexcept {
HIPSYCL_RETURN_DISPATCH_BUILTIN(__hipsycl_clamp, x, minval, maxval);
}

template<class T>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
HIPSYCL_RETURN_DISPATCH_BUILTIN(__hipsycl_clz, x);
}

template<class T>
HIPSYCL_BUILTIN T __hipsycl_max(T x, T y) noexcept {
HIPSYCL_RETURN_DISPATCH_BUILTIN(__hipsycl_max, x, y);
Expand Down
2 changes: 2 additions & 0 deletions include/hipSYCL/sycl/libkernel/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,8 @@ HIPSYCL_BUILTIN VecType clamp(const VecType &a, ScalarType minval,
VecType{static_cast<element_type>(maxval)});
}

HIPSYCL_DEFINE_BUILTIN(clz, HIPSYCL_BUILTIN_OVERLOAD_SET_GENINTEGER,
HIPSYCL_BUILTIN_GENERATOR_UNARY_T)

// TODO clz
// TODO ctz
Expand Down
37 changes: 37 additions & 0 deletions include/hipSYCL/sycl/libkernel/generic/hiplike/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,43 @@ HIPSYCL_HIPLIKE_BUILTIN T __hipsycl_clamp(T x, T minval, T maxval) noexcept {
hiplike_builtins::__hipsycl_max(x, minval), maxval);
}


template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) < 4),
int> = 0>
HIPSYCL_HIPLIKE_BUILTIN T __hipsycl_clz(T x) noexcept {

//we convert to the unsigned type to avoid the typecast creating
//additional ones in front of the value if x is negative
using Usigned = typename std::make_unsigned<T>::type;

constexpr T diff = CHAR_BIT*(sizeof(__hipsycl_int32) - sizeof(Usigned));

return __clz(static_cast<__hipsycl_int32>(static_cast<Usigned>(x)))-diff;

}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 4),
int> = 0>
HIPSYCL_HIPLIKE_BUILTIN T __hipsycl_clz(T x) noexcept {

return __clz(static_cast<__hipsycl_int32>(x));

}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 8),
int> = 0>
HIPSYCL_HIPLIKE_BUILTIN T __hipsycl_clz(T x) noexcept {

return __clzll(static_cast<__hipsycl_int64>(x));

}

template<class T>
HIPSYCL_HIPLIKE_BUILTIN T __hipsycl_mul24(T x, T y) noexcept {
return __mul24(x, y);
Expand Down
64 changes: 64 additions & 0 deletions include/hipSYCL/sycl/libkernel/host/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
#include "hipSYCL/sycl/libkernel/backend.hpp"
#include "hipSYCL/sycl/libkernel/vec.hpp"

#include <bitset>
#include <cstdlib>
#include <cmath>
#include <type_traits>
#include <climits>

#if HIPSYCL_LIBKERNEL_IS_DEVICE_PASS_HOST

Expand Down Expand Up @@ -533,6 +535,68 @@ HIPSYCL_BUILTIN T __hipsycl_clamp(T x, T minval, T maxval) noexcept {
return std::min(std::max(x, minval), maxval);
}

template<class T, std::enable_if_t<std::is_integral_v<T>,int> = 0>
inline T fallback_clz(T x) noexcept {

if(x==0){return sizeof(T)*CHAR_BIT;}
std::bitset<sizeof(T)*CHAR_BIT> bset(x);
int idx = 0;
while(!bset[sizeof(T)*CHAR_BIT - idx -1]){idx++;}
return idx;

}

template <class T,
std::enable_if_t<
(std::is_same_v<T, unsigned int> || std::is_same_v<T, int> ||
std::is_same_v<T, unsigned short> || std::is_same_v<T, short> ||
std::is_same_v<T, unsigned char> ||
std::is_same_v<T, signed char> || std::is_same_v<T, char>),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {

#if __has_builtin(__builtin_clz)
// builtin_clz(0) is UB on some arch
if(x==0){return sizeof(T)*CHAR_BIT;}

//we convert to the unsigned type to avoid the typecast creating
//additional ones in front of the value if x is negative
using Usigned = typename std::make_unsigned<T>::type;
constexpr T diff = CHAR_BIT*(sizeof(unsigned int) - sizeof(Usigned));
return __builtin_clz(static_cast<Usigned>(x)) - diff;
#else
return fallback_clz(x);
#endif
}

template <class T, std::enable_if_t<(std::is_same_v<T, unsigned long> ||
std::is_same_v<T, long>),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
#if __has_builtin(__builtin_clzl)
// builtin_clzl(0) is UB on some arch
if(x==0){return sizeof(T)*CHAR_BIT;}

return __builtin_clzl(static_cast<unsigned long>(x));
#else
return fallback_clz(x);
#endif
}

template <class T, std::enable_if_t<(std::is_same_v<T, unsigned long long> ||
std::is_same_v<T, long long>),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
#if __has_builtin(__builtin_clzll)
// builtin_clzll(0) is UB on some arch
if(x==0){return sizeof(T)*CHAR_BIT;}

return __builtin_clzll(static_cast<unsigned long long>(x));
#else
return fallback_clz(x);
#endif
}

template<class T>
HIPSYCL_BUILTIN T __hipsycl_max(T x, T y) noexcept {
return (x > y) ? x : y;
Expand Down
5 changes: 5 additions & 0 deletions include/hipSYCL/sycl/libkernel/spirv/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ HIPSYCL_BUILTIN T __hipsycl_min(T x, T y) noexcept {
return (x < y) ? x : y;
}

template<class T>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
return return __spirv_ocl_clz(x);
}

template<class T, std::enable_if_t<std::is_integral_v<T>,int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clamp(T x, T minval, T maxval) noexcept {
return spirv_builtins::__hipsycl_min(
Expand Down
33 changes: 33 additions & 0 deletions include/hipSYCL/sycl/libkernel/sscp/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,39 @@ HIPSYCL_BUILTIN T __hipsycl_clamp(T x, T minval, T maxval) noexcept {
sscp_builtins::__hipsycl_max(x, minval), maxval);
}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 1),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
return __hipsycl_sscp_clz_u8(static_cast<__hipsycl_uint8>(x));
}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 2),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
return __hipsycl_sscp_clz_u16(static_cast<__hipsycl_uint16>(x));
}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 4),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
return __hipsycl_sscp_clz_u32(static_cast<__hipsycl_uint32>(x));
}

template <class T,
std::enable_if_t<
(std::is_integral_v<T> && sizeof(T) == 8),
int> = 0>
HIPSYCL_BUILTIN T __hipsycl_clz(T x) noexcept {
return __hipsycl_sscp_clz_u64(static_cast<__hipsycl_uint64>(x));
}


template<class T, std::enable_if_t<std::is_signed_v<T>, int> = 0>
HIPSYCL_BUILTIN T __hipsycl_mul24(T x, T y) noexcept {
return __hipsycl_sscp_mul24_s32(x, y);
Expand Down
5 changes: 5 additions & 0 deletions include/hipSYCL/sycl/libkernel/sscp/builtins/interger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,9 @@
HIPSYCL_SSCP_BUILTIN __hipsycl_int32 __hipsycl_sscp_mul24_s32(__hipsycl_int32 a, __hipsycl_int32 b);
HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_mul24_u32(__hipsycl_uint32 a, __hipsycl_uint32 b);

HIPSYCL_SSCP_BUILTIN __hipsycl_uint8 __hipsycl_sscp_clz_u8(__hipsycl_uint8);
HIPSYCL_SSCP_BUILTIN __hipsycl_uint16 __hipsycl_sscp_clz_u16(__hipsycl_uint16);
HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_clz_u32(__hipsycl_uint32);
HIPSYCL_SSCP_BUILTIN __hipsycl_uint64 __hipsycl_sscp_clz_u64(__hipsycl_uint64);

#endif
13 changes: 13 additions & 0 deletions src/libkernel/sscp/amdgpu/integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,16 @@ HIPSYCL_SSCP_BUILTIN __hipsycl_int32 __hipsycl_sscp_mul24_s32(__hipsycl_int32 a,
HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_mul24_u32(__hipsycl_uint32 a, __hipsycl_uint32 b) {
return __ockl_mul24_u32(a, b);
}

HIPSYCL_SSCP_BUILTIN __hipsycl_uint8 __hipsycl_sscp_clz_u8(__hipsycl_uint8 a){
return __ockl_clz_u8(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint16 __hipsycl_sscp_clz_u16(__hipsycl_uint16 a){
return __ockl_clz_u16(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_clz_u32(__hipsycl_uint32 a){
return __ockl_clz_u32(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint64 __hipsycl_sscp_clz_u64(__hipsycl_uint64 a){
return __ockl_clz_u64(a);
}
16 changes: 16 additions & 0 deletions src/libkernel/sscp/ptx/integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@


#include "hipSYCL/sycl/libkernel/sscp/builtins/interger.hpp"
#include "hipSYCL/sycl/libkernel/sscp/builtins/ptx/libdevice.hpp"

extern "C" __hipsycl_int32 __nv_mul24(__hipsycl_int32, __hipsycl_int32);
extern "C" __hipsycl_uint32 __nv_umul24(__hipsycl_uint32, __hipsycl_uint32);
Expand All @@ -38,3 +39,18 @@ HIPSYCL_SSCP_BUILTIN __hipsycl_int32 __hipsycl_sscp_mul24_s32(__hipsycl_int32 a,
HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_mul24_u32(__hipsycl_uint32 a, __hipsycl_uint32 b) {
return __nv_umul24(a, b);
}



HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_clz_u32(__hipsycl_uint32 a){
return __nv_clz(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint64 __hipsycl_sscp_clz_u64(__hipsycl_uint64 a){
return __nv_clzll(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint8 __hipsycl_sscp_clz_u8(__hipsycl_uint8 a){
return __hipsycl_sscp_clz_u32(a)-24;
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint16 __hipsycl_sscp_clz_u16(__hipsycl_uint16 a){
return __hipsycl_sscp_clz_u32(a)-16;
}
20 changes: 20 additions & 0 deletions src/libkernel/sscp/spirv/integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,24 @@ HIPSYCL_SSCP_BUILTIN __hipsycl_int32 __hipsycl_sscp_mul24_s32(__hipsycl_int32 a,

HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_mul24_u32(__hipsycl_uint32 a, __hipsycl_uint32 b) {
return __spirv_ocl_u_mul24(a, b);
}


__hipsycl_int32 __spirv_ocl_clz(__hipsycl_int32 a);
__hipsycl_int64 __spirv_ocl_clz(__hipsycl_int64 a);
__hipsycl_uint32 __spirv_ocl_clz(__hipsycl_uint32 a);
__hipsycl_uint64 __spirv_ocl_clz(__hipsycl_uint64 a);


HIPSYCL_SSCP_BUILTIN __hipsycl_uint32 __hipsycl_sscp_clz_u32(__hipsycl_uint32 a){
return __spirv_ocl_clz(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint64 __hipsycl_sscp_clz_u64(__hipsycl_uint64 a){
return __spirv_ocl_clz(a);
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint8 __hipsycl_sscp_clz_u8(__hipsycl_uint8 a){
return __hipsycl_sscp_clz_u32(a)-24;
}
HIPSYCL_SSCP_BUILTIN __hipsycl_uint16 __hipsycl_sscp_clz_u16(__hipsycl_uint16 a){
return __hipsycl_sscp_clz_u32(a)-16;
}
14 changes: 13 additions & 1 deletion tests/sycl/math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "sycl_test_suite.hpp"

#include <bitset>
#include <boost/mpl/joint_view.hpp>

#include <cmath>
Expand Down Expand Up @@ -214,6 +215,15 @@ namespace {
auto ref_normalize(T v) {
return v / ref_length(v);
}

template<class T, std::enable_if_t<std::is_integral_v<T>,int> = 0>
inline T ref_clz(T x) noexcept {
if(x==0){return sizeof(T)*CHAR_BIT;}
std::bitset<sizeof(T)*CHAR_BIT> bset(x);
int idx = 0;
while(!bset[sizeof(T)*CHAR_BIT - idx -1]){idx++;}
return idx;
}
}

BOOST_AUTO_TEST_CASE_TEMPLATE(math_genfloat_binary, T,
Expand Down Expand Up @@ -386,7 +396,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(builtin_int_basic, T, math_test_genints::type) {

namespace s = cl::sycl;

constexpr int FUN_COUNT = 3;
constexpr int FUN_COUNT = 4;

// build inputs

Expand All @@ -410,6 +420,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(builtin_int_basic, T, math_test_genints::type) {
acc[i++] = s::abs(acc[0]);
acc[i++] = s::min(acc[0], acc[1]);
acc[i++] = s::max(acc[0], acc[1]);
acc[i++] = s::clz(acc[0]);
});
});

Expand All @@ -426,6 +437,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(builtin_int_basic, T, math_test_genints::type) {
BOOST_TEST(comp(acc[i++], c) == comp(acc[0], c));
BOOST_TEST(comp(acc[i++], c) == std::min(comp(acc[0], c), comp(acc[1], c)));
BOOST_TEST(comp(acc[i++], c) == std::max(comp(acc[0], c), comp(acc[1], c)));
BOOST_TEST(comp(acc[i++], c) == ref_clz(comp(acc[0], c)));
}
}
}
Expand Down

0 comments on commit a54d87b

Please sign in to comment.