diff --git a/libclc/generic/libspirv/atomic/atomic_add.cl b/libclc/generic/libspirv/atomic/atomic_add.cl index fb5a61257b729..403f757c7036b 100644 --- a/libclc/generic/libspirv/atomic/atomic_add.cl +++ b/libclc/generic/libspirv/atomic/atomic_add.cl @@ -10,23 +10,39 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z18__spirv_AtomicIAddPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18__spirv_AtomicIAddP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_fetch_and_add) -IMPL(unsigned int, j, global, AS1, __sync_fetch_and_add) -IMPL(int, i, local, AS3, __sync_fetch_and_add) -IMPL(unsigned int, j, local, AS3, __sync_fetch_and_add) +IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_add) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_add) +IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_add) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_add) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, AS1, __sync_fetch_and_add_8) -IMPL(unsigned long, m, global, AS1, __sync_fetch_and_add_8) -IMPL(long, l, local, AS3, __sync_fetch_and_add_8) -IMPL(unsigned long, m, local, AS3, __sync_fetch_and_add_8) +IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_add_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_add_8) +IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_add_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_add_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_fetch_and_add) +IMPL_GENERIC(unsigned int, j, __sync_fetch_and_add) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_fetch_and_add_8) +IMPL_GENERIC(unsigned long, m, __sync_fetch_and_add_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED + #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_and.cl b/libclc/generic/libspirv/atomic/atomic_and.cl index 59bd617e422a7..40cd616cf88aa 100644 --- a/libclc/generic/libspirv/atomic/atomic_and.cl +++ b/libclc/generic/libspirv/atomic/atomic_and.cl @@ -10,23 +10,39 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z17__spirv_AtomicAndPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z17__spirv_AtomicAndP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_fetch_and_and) -IMPL(unsigned int, j, global, AS1, __sync_fetch_and_and) -IMPL(int, i, local, AS3, __sync_fetch_and_and) -IMPL(unsigned int, j, local, AS3, __sync_fetch_and_and) +IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_and) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_and) +IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_and) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_and) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, AS1, __sync_fetch_and_and_8) -IMPL(unsigned long, m, global, AS1, __sync_fetch_and_and_8) -IMPL(long, l, local, AS3, __sync_fetch_and_and_8) -IMPL(unsigned long, m, local, AS3, __sync_fetch_and_and_8) +IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_and_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_and_8) +IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_and_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_and_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_fetch_and_and) +IMPL_GENERIC(unsigned int, j, __sync_fetch_and_and) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_fetch_and_and_8) +IMPL_GENERIC(unsigned long, m, __sync_fetch_and_and_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED + #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl b/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl index c090e33fed32f..735c83b9548b3 100644 --- a/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl @@ -66,4 +66,41 @@ _Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemantics enum MemorySemanticsMask neq, ulong val, ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } + #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +_CLC_DEF int +_Z29__spirv_AtomicCompareExchangePiN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ii( + volatile int *p, enum Scope scope, enum MemorySemanticsMask eq, + enum MemorySemanticsMask neq, int val, int cmp) { + return __sync_val_compare_and_swap(p, cmp, val); +} + +_CLC_DEF uint +_Z29__spirv_AtomicCompareExchangePjN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_jj( + volatile uint *p, enum Scope scope, enum MemorySemanticsMask eq, + enum MemorySemanticsMask neq, uint val, uint cmp) { + return __sync_val_compare_and_swap(p, cmp, val); +} + +#ifdef cl_khr_int64_base_atomics + +_CLC_DEF long +_Z29__spirv_AtomicCompareExchangePlN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ll( + volatile long *p, enum Scope scope, enum MemorySemanticsMask eq, + enum MemorySemanticsMask neq, long val, long cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} + +_CLC_DEF ulong +_Z29__spirv_AtomicCompareExchangePmN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_mm( + volatile ulong *p, enum Scope scope, enum MemorySemanticsMask eq, + enum MemorySemanticsMask neq, ulong val, ulong cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} + +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/generic/libspirv/atomic/atomic_max.cl b/libclc/generic/libspirv/atomic/atomic_max.cl index a2c37c258760c..c8268cabdf3e7 100644 --- a/libclc/generic/libspirv/atomic/atomic_max.cl +++ b/libclc/generic/libspirv/atomic/atomic_max.cl @@ -10,18 +10,18 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME, PREFIX, SUFFIX) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ _CLC_DEF TYPE \ - _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMax, , umax) -IMPL(int, i, local, AS3, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMax, , umax) +IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMax, , max) +IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMax, , umax) +IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMax, , max) +IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMax, , umax) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long); @@ -29,9 +29,30 @@ unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, AS1, __spirv_AtomicSMax, __clc, max_global_8) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMax, __clc, umax_global_8) -IMPL(long, l, local, AS3, __spirv_AtomicSMax, __clc, max_local_8) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMax, __clc, umax_local_8) +IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMax, __clc, max_global_8) +IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMax, __clc, umax_global_8) +IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMax, __clc, max_local_8) +IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMax, __clc, umax_local_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) + +IMPL_GENERIC(int, i, __spirv_AtomicSMax, , max) +IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMax, , umax) + +#ifdef cl_khr_int64_extended_atomics + +unsigned long __clc__sync_fetch_and_max_generic_8(volatile generic long *, long); +unsigned long __clc__sync_fetch_and_umax_generic_8(volatile __generic unsigned long *, unsigned long); + +IMPL_GENERIC(long, l, __spirv_AtomicSMax, __clc, max_generic_8) +IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMax, __clc, umax_generic_8) +#endif + + +#endif //_CLC_GENERIC_AS_SUPPORTED #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_min.cl b/libclc/generic/libspirv/atomic/atomic_min.cl index 3e3c4dfdf727c..13169cc3c0fa7 100644 --- a/libclc/generic/libspirv/atomic/atomic_min.cl +++ b/libclc/generic/libspirv/atomic/atomic_min.cl @@ -10,18 +10,18 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME, PREFIX, SUFFIX) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ _CLC_DEF TYPE \ - _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMin, , umin) -IMPL(int, i, local, AS3, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMin, , umin) +IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMin, , min) +IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMin, , umin) +IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMin, , min) +IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long); @@ -29,9 +29,30 @@ unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, AS1, __spirv_AtomicSMin, __clc, min_global_8) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMin, __clc, umin_global_8) -IMPL(long, l, local, AS3, __spirv_AtomicSMin, __clc, min_local_8) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMin, __clc, umin_local_8) +IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMin, __clc, min_global_8) +IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMin, __clc, umin_global_8) +IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMin, __clc, min_local_8) +IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMin, __clc, umin_local_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) + +IMPL_GENERIC(int, i, __spirv_AtomicSMin, , min) +IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMin, , umin) + +#ifdef cl_khr_int64_extended_atomics + +unsigned long __clc__sync_fetch_and_min_generic_8(volatile generic long *, long); +unsigned long __clc__sync_fetch_and_umin_generic_8(volatile __generic unsigned long *, unsigned long); + +IMPL_GENERIC(long, l, __spirv_AtomicSMin, __clc, min_generic_8) +IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMin, __clc, umin_generic_8) +#endif + + +#endif //_CLC_GENERIC_AS_SUPPORTED #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_or.cl b/libclc/generic/libspirv/atomic/atomic_or.cl index 1d789ca074d01..4161a33d8b6d6 100644 --- a/libclc/generic/libspirv/atomic/atomic_or.cl +++ b/libclc/generic/libspirv/atomic/atomic_or.cl @@ -10,23 +10,39 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z16__spirv_AtomicOrPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z16__spirv_AtomicOrP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_fetch_and_or) -IMPL(unsigned int, j, global, AS1, __sync_fetch_and_or) -IMPL(int, i, local, AS3, __sync_fetch_and_or) -IMPL(unsigned int, j, local, AS3, __sync_fetch_and_or) +IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_or) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_or) +IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_or) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_or) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, AS1, __sync_fetch_and_or_8) -IMPL(unsigned long, m, global, AS1, __sync_fetch_and_or_8) -IMPL(long, l, local, AS3, __sync_fetch_and_or_8) -IMPL(unsigned long, m, local, AS3, __sync_fetch_and_or_8) +IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_or_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_or_8) +IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_or_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_or_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_fetch_and_or) +IMPL_GENERIC(unsigned int, j, __sync_fetch_and_or) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_fetch_and_or_8) +IMPL_GENERIC(unsigned long, m, __sync_fetch_and_or_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED + #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_sub.cl b/libclc/generic/libspirv/atomic/atomic_sub.cl index 59f78a7cf7c44..f0120ad7e851c 100644 --- a/libclc/generic/libspirv/atomic/atomic_sub.cl +++ b/libclc/generic/libspirv/atomic/atomic_sub.cl @@ -10,23 +10,38 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z18__spirv_AtomicISubPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18__spirv_AtomicISubP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_fetch_and_sub) -IMPL(unsigned int, j, global, AS1, __sync_fetch_and_sub) -IMPL(int, i, local, AS3, __sync_fetch_and_sub) -IMPL(unsigned int, j, local, AS3, __sync_fetch_and_sub) +IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_sub) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_sub) +IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_sub) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_sub) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, AS1, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, global, AS1, __sync_fetch_and_sub_8) -IMPL(long, l, local, AS3, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, local, AS3, __sync_fetch_and_sub_8) +IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_sub_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_sub_8) +IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_sub_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_sub_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_fetch_and_sub) +IMPL_GENERIC(unsigned int, j, __sync_fetch_and_sub) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_fetch_and_sub_8) +IMPL_GENERIC(unsigned long, m, __sync_fetch_and_sub_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_xchg.cl b/libclc/generic/libspirv/atomic/atomic_xchg.cl index 6f22977caa530..3e98a836e9bdf 100644 --- a/libclc/generic/libspirv/atomic/atomic_xchg.cl +++ b/libclc/generic/libspirv/atomic/atomic_xchg.cl @@ -28,23 +28,39 @@ _Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4Fl (volatile local uint *)p, scope, semantics, as_uint(val))); } -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z22__spirv_AtomicExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z22__spirv_AtomicExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_swap_4) -IMPL(unsigned int, j, global, AS1, __sync_swap_4) -IMPL(int, i, local, AS3, __sync_swap_4) -IMPL(unsigned int, j, local, AS3, __sync_swap_4) +IMPL(int, i, global, U3AS1, 1, __sync_swap_4) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_swap_4) +IMPL(int, i, local, U3AS3, 1, __sync_swap_4) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_swap_4) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, AS1, __sync_swap_8) -IMPL(unsigned long, m, global, AS1, __sync_swap_8) -IMPL(long, l, local, AS3, __sync_swap_8) -IMPL(unsigned long, m, local, AS3, __sync_swap_8) +IMPL(long, l, global, U3AS1, 1, __sync_swap_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_swap_8) +IMPL(long, l, local, U3AS3, 1, __sync_swap_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_swap_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_swap_4) +IMPL_GENERIC(unsigned int, j, __sync_swap_4) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_swap_8) +IMPL_GENERIC(unsigned long, m, __sync_swap_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED + #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_xor.cl b/libclc/generic/libspirv/atomic/atomic_xor.cl index 248c438d265d2..428f8f2de1c8c 100644 --- a/libclc/generic/libspirv/atomic/atomic_xor.cl +++ b/libclc/generic/libspirv/atomic/atomic_xor.cl @@ -10,23 +10,38 @@ // TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, FN_NAME) \ +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ _CLC_DEF TYPE \ - _Z17__spirv_AtomicXorPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z17__spirv_AtomicXorP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ return FN_NAME(p, val); \ } -IMPL(int, i, global, AS1, __sync_fetch_and_xor) -IMPL(unsigned int, j, global, AS1, __sync_fetch_and_xor) -IMPL(int, i, local, AS3, __sync_fetch_and_xor) -IMPL(unsigned int, j, local, AS3, __sync_fetch_and_xor) +IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_xor) +IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_xor) +IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_xor) +IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_xor) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, AS1, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, global, AS1, __sync_fetch_and_xor_8) -IMPL(long, l, local, AS3, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, local, AS3, __sync_fetch_and_xor_8) +IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_xor_8) +IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_xor_8) +IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_xor_8) +IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_xor_8) #endif + +#if _CLC_GENERIC_AS_SUPPORTED + +#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ + IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) + +IMPL_GENERIC(int, i, __sync_fetch_and_xor) +IMPL_GENERIC(unsigned int, j, __sync_fetch_and_xor) + +#ifdef cl_khr_int64_base_atomics +IMPL_GENERIC(long, l, __sync_fetch_and_xor_8) +IMPL_GENERIC(unsigned long, m, __sync_fetch_and_xor_8) +#endif + +#endif //_CLC_GENERIC_AS_SUPPORTED #undef IMPL diff --git a/libclc/x86_64-unknown-linux/libspirv/SOURCES b/libclc/x86_64-unknown-linux/libspirv/SOURCES index 5a5536f318379..b5ebcbf787bf6 100644 --- a/libclc/x86_64-unknown-linux/libspirv/SOURCES +++ b/libclc/x86_64-unknown-linux/libspirv/SOURCES @@ -17,3 +17,4 @@ math/rint.cl math/round.cl math/trunc.cl shared/helpers.ll +cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/x86_64-unknown-linux/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll b/libclc/x86_64-unknown-linux/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll new file mode 100644 index 0000000000000..b885cdc76dc2f --- /dev/null +++ b/libclc/x86_64-unknown-linux/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll @@ -0,0 +1,77 @@ +#if __clang_major__ >= 7 +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +#else +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +#endif + +define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_min_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +}