From 8088b0aaf589b6ee959eb5238e6e8a4062f7e3c4 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 14 Mar 2025 04:16:38 -0700 Subject: [PATCH 1/7] [SYCL] Move SPIR-V atomic builtins declarations into clang/lib/Sema/SPIRVBuiltins.td Change scope and memory semantics type from Enum from int, similar as in PR #17438. volatile is removed from function parameter pointer type, to align with SVP-IR and clang/lib/Sema/SPIRVBuiltins.td. --- clang/lib/Sema/SPIRVBuiltins.td | 67 ++++++ libclc/generic/lib/atomic/atomic_add.cl | 17 +- libclc/generic/lib/atomic/atomic_and.cl | 17 +- libclc/generic/lib/atomic/atomic_cmpxchg.cl | 20 +- libclc/generic/lib/atomic/atomic_dec.cl | 18 +- libclc/generic/lib/atomic/atomic_inc.cl | 18 +- libclc/generic/lib/atomic/atomic_max.cl | 17 +- libclc/generic/lib/atomic/atomic_min.cl | 17 +- libclc/generic/lib/atomic/atomic_or.cl | 17 +- libclc/generic/lib/atomic/atomic_sub.cl | 17 +- libclc/generic/lib/atomic/atomic_xchg.cl | 25 +-- libclc/generic/lib/atomic/atomic_xor.cl | 17 +- .../lib/cl_khr_int64_base_atomics/atom_add.cl | 17 +- .../cl_khr_int64_base_atomics/atom_cmpxchg.cl | 18 +- .../lib/cl_khr_int64_base_atomics/atom_sub.cl | 17 +- .../cl_khr_int64_base_atomics/atom_xchg.cl | 15 +- .../cl_khr_int64_extended_atomics/atom_and.cl | 15 +- .../cl_khr_int64_extended_atomics/atom_max.cl | 15 +- .../cl_khr_int64_extended_atomics/atom_min.cl | 15 +- .../cl_khr_int64_extended_atomics/atom_or.cl | 15 +- .../cl_khr_int64_extended_atomics/atom_xor.cl | 15 +- .../include/libspirv/atomic/atomic_add.h | 2 - .../include/libspirv/atomic/atomic_and.h | 2 - .../include/libspirv/atomic/atomic_cmpxchg.h | 50 ++--- .../include/libspirv/atomic/atomic_dec.h | 42 ++-- .../include/libspirv/atomic/atomic_decl.inc | 36 ++-- .../include/libspirv/atomic/atomic_inc.h | 42 ++-- .../include/libspirv/atomic/atomic_load.h | 23 +-- .../include/libspirv/atomic/atomic_max.h | 2 - .../include/libspirv/atomic/atomic_min.h | 2 - .../include/libspirv/atomic/atomic_or.h | 2 - .../include/libspirv/atomic/atomic_store.h | 23 +-- .../include/libspirv/atomic/atomic_sub.h | 2 - .../include/libspirv/atomic/atomic_xchg.h | 16 +- .../include/libspirv/atomic/atomic_xor.h | 2 - .../lib/amdgcn-amdhsa/atomic/atomic_add.cl | 93 ++++----- .../lib/amdgcn-amdhsa/atomic/atomic_and.cl | 2 +- .../amdgcn-amdhsa/atomic/atomic_cmpxchg.cl | 65 +++--- .../lib/amdgcn-amdhsa/atomic/atomic_helpers.h | 65 +++--- .../lib/amdgcn-amdhsa/atomic/atomic_load.cl | 32 ++- .../lib/amdgcn-amdhsa/atomic/atomic_max.cl | 31 ++- .../lib/amdgcn-amdhsa/atomic/atomic_min.cl | 23 +-- .../lib/amdgcn-amdhsa/atomic/atomic_minmax.h | 68 +++---- .../lib/amdgcn-amdhsa/atomic/atomic_or.cl | 2 +- .../lib/amdgcn-amdhsa/atomic/atomic_safe.def | 8 +- .../lib/amdgcn-amdhsa/atomic/atomic_store.cl | 34 ++-- .../lib/amdgcn-amdhsa/atomic/atomic_sub.cl | 35 ++-- .../lib/amdgcn-amdhsa/atomic/atomic_xchg.cl | 10 +- .../lib/amdgcn-amdhsa/atomic/atomic_xor.cl | 2 +- .../libspirv/lib/generic/atomic/atomic_add.cl | 40 ++-- .../libspirv/lib/generic/atomic/atomic_and.cl | 40 ++-- .../lib/generic/atomic/atomic_cmpxchg.cl | 86 ++++---- .../libspirv/lib/generic/atomic/atomic_dec.cl | 50 ++--- .../libspirv/lib/generic/atomic/atomic_inc.cl | 52 ++--- .../lib/generic/atomic/atomic_load.cl | 60 +++--- .../libspirv/lib/generic/atomic/atomic_max.cl | 40 ++-- .../libspirv/lib/generic/atomic/atomic_min.cl | 40 ++-- .../libspirv/lib/generic/atomic/atomic_or.cl | 40 ++-- .../lib/generic/atomic/atomic_store.cl | 78 ++++--- .../libspirv/lib/generic/atomic/atomic_sub.cl | 40 ++-- .../lib/generic/atomic/atomic_xchg.cl | 66 +++--- .../libspirv/lib/generic/atomic/atomic_xor.cl | 40 ++-- .../lib/ptx-nvidiacl/atomic/atomic_add.cl | 190 +++++++++--------- .../lib/ptx-nvidiacl/atomic/atomic_and.cl | 8 +- .../lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl | 49 ++--- .../lib/ptx-nvidiacl/atomic/atomic_dec.cl | 4 +- .../lib/ptx-nvidiacl/atomic/atomic_helpers.h | 26 +-- .../lib/ptx-nvidiacl/atomic/atomic_inc.cl | 4 +- .../atomic/atomic_inc_dec_helpers.h | 31 +-- .../lib/ptx-nvidiacl/atomic/atomic_load.cl | 39 ++-- .../lib/ptx-nvidiacl/atomic/atomic_max.cl | 112 ++++------- .../lib/ptx-nvidiacl/atomic/atomic_min.cl | 109 ++++------ .../lib/ptx-nvidiacl/atomic/atomic_or.cl | 8 +- .../lib/ptx-nvidiacl/atomic/atomic_store.cl | 35 ++-- .../lib/ptx-nvidiacl/atomic/atomic_sub.cl | 41 ++-- .../lib/ptx-nvidiacl/atomic/atomic_xchg.cl | 12 +- .../lib/ptx-nvidiacl/atomic/atomic_xor.cl | 8 +- libdevice/atomic.hpp | 39 ++-- sycl/include/sycl/__spirv/spirv_ops.hpp | 102 ---------- sycl/test/check_device_code/atomic_ref.cpp | 2 +- 80 files changed, 1118 insertions(+), 1498 deletions(-) diff --git a/clang/lib/Sema/SPIRVBuiltins.td b/clang/lib/Sema/SPIRVBuiltins.td index 955c58c5532ab..c1a90e2f081f8 100644 --- a/clang/lib/Sema/SPIRVBuiltins.td +++ b/clang/lib/Sema/SPIRVBuiltins.td @@ -392,10 +392,13 @@ def AIGenTypeNNoScalar : GenericType<"AIGenTypeNNoScalar", TLAllInts, VecNoS // All integer to unsigned def AI2UGenTypeN : GenericType<"AI2UGenTypeN", TLAllUIntsTwice, VecAndScalar>; // Signed integer +def SGenType1 : GenericType<"SGenType1", TLSignedInts, Vec1>; def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar>; // Unsigned integer +def UGenType1 : GenericType<"UGenType1", TLUnsignedInts, Vec1>; def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; // Float +def FGenType1 : GenericType<"FGenType1", TLFloat, Vec1>; def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; // (u)int, (u)long, and all floats def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats, Vec1>; @@ -960,3 +963,67 @@ foreach name = ["GroupSMin", "GroupSMax"] in { foreach name = ["GroupLogicalAndKHR", "GroupLogicalOrKHR"] in { def : SPVBuiltin; } + +// 3.56.18. Atomic Instructions + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS] in { + def : SPVBuiltin< + "AtomicLoad", [AGenType1, PointerType, Int, Int], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicStore", + [Void, PointerType, Int, Int, AGenType1], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicExchange", + [AGenType1, PointerType, Int, Int, AGenType1], + Attr.Convergent>; + + foreach name = ["AtomicCompareExchange", "AtomicCompareExchangeWeak"] in { + def : SPVBuiltin, Int, Int, Int, + AIGenType1, AIGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicIIncrement", "AtomicIDecrement"] in { + def : SPVBuiltin, Int, Int], + Attr.Convergent>; + } + + foreach name = ["AtomicSMin", "AtomicSMax"] in { + def : SPVBuiltin, Int, Int, + SGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicUMin", "AtomicUMax"] in { + def : SPVBuiltin, Int, Int, + UGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicIAdd", "AtomicISub", "AtomicAnd", "AtomicOr", + "AtomicXor"] in { + def : SPVBuiltin, Int, Int, + AIGenType1], + Attr.Convergent>; + } + + def : SPVBuiltin< + "AtomicFlagTestAndSet", [Bool, PointerType, Int, Int], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicFlagClear", [Void, PointerType, Int, Int], + Attr.Convergent>; + + foreach name = ["AtomicFMaxEXT", "AtomicFMinEXT", "AtomicFAddEXT"] in { + def : SPVBuiltin, Int, Int, + FGenType1], + Attr.Convergent>; + } +} diff --git a/libclc/generic/lib/atomic/atomic_add.cl b/libclc/generic/lib/atomic/atomic_add.cl index 00e37399d602c..7d26c50f66d55 100644 --- a/libclc/generic/lib/atomic/atomic_add.cl +++ b/libclc/generic/lib/atomic/atomic_add.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18__spirv_AtomicIAddPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_and.cl b/libclc/generic/lib/atomic/atomic_and.cl index 79369868bb356..3e7fd41f8cf0f 100644 --- a/libclc/generic/lib/atomic/atomic_and.cl +++ b/libclc/generic/lib/atomic/atomic_and.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z17__spirv_AtomicAndPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_cmpxchg.cl b/libclc/generic/lib/atomic/atomic_cmpxchg.cl index 9e0375978bb48..f41f8c07881a9 100644 --- a/libclc/generic/lib/atomic/atomic_cmpxchg.cl +++ b/libclc/generic/lib/atomic/atomic_cmpxchg.cl @@ -1,17 +1,15 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z29__spirv_AtomicCompareExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, SequentiallyConsistent, val, cmp); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, SequentiallyConsistent, val, cmp); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_dec.cl b/libclc/generic/lib/atomic/atomic_dec.cl index f1ff777e5dd44..77555c2243012 100644 --- a/libclc/generic/lib/atomic/atomic_dec.cl +++ b/libclc/generic/lib/atomic/atomic_dec.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z24__spirv_AtomicIDecrementPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ + return __spirv_AtomicIDecrement( \ + (AS TYPE *)p, Device, SequentiallyConsistent); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_inc.cl b/libclc/generic/lib/atomic/atomic_inc.cl index d8120426a7575..65e49aa765d2d 100644 --- a/libclc/generic/lib/atomic/atomic_inc.cl +++ b/libclc/generic/lib/atomic/atomic_inc.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z24__spirv_AtomicIIncrementPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ + return __spirv_AtomicIIncrement( \ + (AS TYPE *)p, Device, SequentiallyConsistent); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_max.cl b/libclc/generic/lib/atomic/atomic_max.cl index 8ec8acb0d3681..4446c780557e8 100644 --- a/libclc/generic/lib/atomic/atomic_max.cl +++ b/libclc/generic/lib/atomic/atomic_max.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18##OP##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMax) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMax) -IMPL(int, i, local, AS3, __spirv_AtomicSMax) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMax) +IMPL(int, global, __spirv_AtomicSMax) +IMPL(unsigned int, global, __spirv_AtomicUMax) +IMPL(int, local, __spirv_AtomicSMax) +IMPL(unsigned int, local, __spirv_AtomicUMax) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_min.cl b/libclc/generic/lib/atomic/atomic_min.cl index af1208035446f..833ba068d810a 100644 --- a/libclc/generic/lib/atomic/atomic_min.cl +++ b/libclc/generic/lib/atomic/atomic_min.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18##OP##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMin) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMin) -IMPL(int, i, local, AS3, __spirv_AtomicSMin) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMin) +IMPL(int, global, __spirv_AtomicSMin) +IMPL(unsigned int, global, __spirv_AtomicUMin) +IMPL(int, local, __spirv_AtomicSMin) +IMPL(unsigned int, local, __spirv_AtomicUMin) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_or.cl b/libclc/generic/lib/atomic/atomic_or.cl index 2eee23494462b..4ec9fe3a1b846 100644 --- a/libclc/generic/lib/atomic/atomic_or.cl +++ b/libclc/generic/lib/atomic/atomic_or.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z16__spirv_AtomicOrPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_sub.cl b/libclc/generic/lib/atomic/atomic_sub.cl index 04d93010f606a..35c772eabff15 100644 --- a/libclc/generic/lib/atomic/atomic_sub.cl +++ b/libclc/generic/lib/atomic/atomic_sub.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18__spirv_AtomicISubPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xchg.cl b/libclc/generic/lib/atomic/atomic_xchg.cl index b658c834b9ecf..7a0838fdc839c 100644 --- a/libclc/generic/lib/atomic/atomic_xchg.cl +++ b/libclc/generic/lib/atomic/atomic_xchg.cl @@ -2,27 +2,20 @@ #include _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. */ - return _Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((global float *)p, Device, SequentiallyConsistent, val); } _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. */ - return _Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((local float *)p, Device, SequentiallyConsistent, val); } -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z22__spirv_AtomicExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xor.cl b/libclc/generic/lib/atomic/atomic_xor.cl index 27b28ac82ed24..7f59038f5cb72 100644 --- a/libclc/generic/lib/atomic/atomic_xor.cl +++ b/libclc/generic/lib/atomic/atomic_xor.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z17__spirv_AtomicXorPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl index 78d8d5b8b6f15..305664b4721d2 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl @@ -1,20 +1,17 @@ #include #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ - return _Z18__spirv_AtomicIAddPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl index e6225d6acfd78..c53909e9353ff 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl @@ -3,17 +3,17 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - return _Z29__spirv_AtomicCompareExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, SequentiallyConsistent, cmp, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, SequentiallyConsistent, cmp, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl index c3a54b161118f..f09bd9db65e02 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl @@ -1,20 +1,17 @@ #include #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ - return _Z18__spirv_AtomicISubPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl index 75ac67c44522a..f3dc41dca4873 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ - return _Z22__spirv_AtomicExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl index cce06ee739b6e..dab117c9ac382 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ - return _Z17__spirv_AtomicAndPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl index e183488fb3a9d..b305c2d66225b 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ - return _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1, __spirv_AtomicSMax) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMax) -IMPL(long, l, local, AS3, __spirv_AtomicSMax) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMax) +IMPL(long, global, __spirv_AtomicSMax) +IMPL(unsigned long, global, __spirv_AtomicUMax) +IMPL(long, local, __spirv_AtomicSMax) +IMPL(unsigned long, local, __spirv_AtomicUMax) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl index 061a94de710de..53a00856923d8 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ - return _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1, __spirv_AtomicSMin) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMin) -IMPL(long, l, local, AS3, __spirv_AtomicSMin) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMin) +IMPL(long, global, __spirv_AtomicSMin) +IMPL(unsigned long, global, __spirv_AtomicUMin) +IMPL(long, local, __spirv_AtomicSMin) +IMPL(unsigned long, local, __spirv_AtomicUMin) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl index c4ada923fb6f1..d9cc70084c0dc 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ - return _Z16__spirv_AtomicOrPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl index bae73ea266820..62782ee5fdbf2 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ - return _Z17__spirv_AtomicXorPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_add.h b/libclc/libspirv/include/libspirv/atomic/atomic_add.h index ff05f7176be3e..af05f0fa80007 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_add.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_add.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicIAdd -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicIAdd -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_BASE #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_and.h b/libclc/libspirv/include/libspirv/atomic/atomic_and.h index 9df1e5050a9e5..948433bc44f34 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_and.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_and.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicAnd -#define __SPIRV_FUNCTION_S_LEN 17 #define __SPIRV_FUNCTION_U __spirv_AtomicAnd -#define __SPIRV_FUNCTION_U_LEN 17 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h b/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h index 9a643d4295857..9c4dac2864a8e 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h @@ -6,40 +6,22 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -_CLC_DECL int -_Z29__spirv_AtomicCompareExchangePU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile local int *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, int, int); -_CLC_DECL int -_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile global int *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, int, int); -_CLC_DECL uint -_Z29__spirv_AtomicCompareExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile local uint *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, uint, uint); -_CLC_DECL uint -_Z29__spirv_AtomicCompareExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile global uint *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, uint, uint); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicCompareExchange( \ + AS TYPE *, int, int, int, TYPE, TYPE); + +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) \ + DECL(TYPE, ) + +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z29__spirv_AtomicCompareExchangePU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile local long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, long, long); -_CLC_DECL long -_Z29__spirv_AtomicCompareExchangePU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile global long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, long, long); -_CLC_DECL unsigned long -_Z29__spirv_AtomicCompareExchangePU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, unsigned long, unsigned long); -_CLC_DECL unsigned long -_Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, unsigned long, unsigned long); +DECL_AS(long) +DECL_AS(unsigned long) #endif + +#undef DECL_AS +#undef DECL diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_dec.h b/libclc/libspirv/include/libspirv/atomic/atomic_dec.h index d1908c1958b12..2335c3d6dfa24 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_dec.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_dec.h @@ -6,30 +6,24 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL int -_Z24__spirv_AtomicIDecrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL int -_Z24__spirv_AtomicIDecrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIDecrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIDecrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(local int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(global int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(local uint *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(global uint *, int Scope, + enum MemorySemanticsMask); #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z24__spirv_AtomicIDecrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL long -_Z24__spirv_AtomicIDecrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIDecrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIDecrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(local long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(global long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIDecrement(local unsigned long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIDecrement(global unsigned long *, int Scope, + enum MemorySemanticsMask); #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc b/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc index 61f386af817c2..21122f9dd5585 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc +++ b/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc @@ -6,39 +6,29 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define __CLC_DECLARE_ATOMIC(ADDRSPACE, ADDRSPACE_MANGLED, TYPE, TYPE_MANGLED, \ - NAME, NAME_LEN) \ - _CLC_DECL TYPE \ - _Z##NAME_LEN##NAME##PU3##ADDRSPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDRSPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); +#define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE, NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(ADDRSPACE TYPE *, int Scope, \ + int MemorySemanticsMask, TYPE); -#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE, TYPE_MANGLED, NAME, NAME_LEN) \ - __CLC_DECLARE_ATOMIC(global, AS1, TYPE, TYPE_MANGLED, NAME, NAME_LEN) \ - __CLC_DECLARE_ATOMIC(local, AS3, TYPE, TYPE_MANGLED, NAME, NAME_LEN) +#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(global, TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(local, TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(, TYPE, NAME) -__CLC_DECLARE_ATOMIC_ADDRSPACE(int, i, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(uint, j, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(int, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(uint, __SPIRV_FUNCTION_U) #ifdef __SPIRV_INT64_EXTENDED #ifdef cl_khr_int64_extended_atomics -__CLC_DECLARE_ATOMIC_ADDRSPACE(long, l, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(long, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, __SPIRV_FUNCTION_U) #endif #endif #ifdef __SPIRV_INT64_BASE #ifdef cl_khr_int64_base_atomics -__CLC_DECLARE_ATOMIC_ADDRSPACE(long, l, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(long, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, __SPIRV_FUNCTION_U) #endif #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_inc.h b/libclc/libspirv/include/libspirv/atomic/atomic_inc.h index 12bf4e2123dc3..d7c6e42bbed66 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_inc.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_inc.h @@ -6,30 +6,24 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL int -_Z24__spirv_AtomicIIncrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL int -_Z24__spirv_AtomicIIncrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIIncrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIIncrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(local int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(global int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(local uint *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(global uint *, int Scope, + enum MemorySemanticsMask); #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z24__spirv_AtomicIIncrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL long -_Z24__spirv_AtomicIIncrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIIncrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIIncrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(local long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(global long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIIncrement(local unsigned long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIIncrement(global unsigned long *, int Scope, + enum MemorySemanticsMask); #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_load.h b/libclc/libspirv/include/libspirv/atomic/atomic_load.h index 75a8ab305e2c6..8db8a859586f7 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_load.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_load.h @@ -6,23 +6,20 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DECL TYPE \ - _Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile AS const TYPE *, enum Scope, enum MemorySemanticsMask); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS const TYPE *, int, int); -#define DECL_AS(TYPE, TYPE_MANGLED) \ - DECL(TYPE, TYPE_MANGLED, global, AS1) \ - DECL(TYPE, TYPE_MANGLED, local, AS3) +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) \ + DECL(TYPE, ) -DECL_AS(int, i) -DECL_AS(unsigned int, j) +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -DECL_AS(long, l) -DECL_AS(unsigned long, m) +DECL_AS(long) +DECL_AS(unsigned long) #endif #undef DECL_AS diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_max.h b/libclc/libspirv/include/libspirv/atomic/atomic_max.h index 8f14d230e42a9..95ebeac7ec19e 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_max.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_max.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicSMax -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicUMax -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_min.h b/libclc/libspirv/include/libspirv/atomic/atomic_min.h index c7b5e73f33bf6..6228d01176e96 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_min.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_min.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicSMin -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicUMin -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_or.h b/libclc/libspirv/include/libspirv/atomic/atomic_or.h index 73cda335dd24b..c69ab3df64a61 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_or.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_or.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicOr -#define __SPIRV_FUNCTION_S_LEN 16 #define __SPIRV_FUNCTION_U __spirv_AtomicOr -#define __SPIRV_FUNCTION_U_LEN 16 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_store.h b/libclc/libspirv/include/libspirv/atomic/atomic_store.h index c56cee7283b85..b513e758e4755 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_store.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_store.h @@ -6,23 +6,20 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DECL void \ - _Z19__spirv_AtomicStorePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore( \ + AS TYPE *, int Scope, int MemorySemanticsMask, TYPE); -#define DECL_AS(TYPE, TYPE_MANGLED) \ - DECL(TYPE, TYPE_MANGLED, global, AS1) \ - DECL(TYPE, TYPE_MANGLED, local, AS3) +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) -DECL_AS(int, i) -DECL_AS(unsigned int, j) +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -DECL_AS(long, l) -DECL_AS(unsigned long, m) +DECL_AS(long) +DECL_AS(unsigned long) #endif #undef DECL_AS diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_sub.h b/libclc/libspirv/include/libspirv/atomic/atomic_sub.h index 62bec07cb8fbe..7ecd3ccd2b806 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_sub.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_sub.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicISub -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicISub -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_BASE #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h b/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h index a274589a40f3d..31dcf8948f3da 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h @@ -7,17 +7,13 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicExchange -#define __SPIRV_FUNCTION_S_LEN 22 #define __SPIRV_FUNCTION_U __spirv_AtomicExchange -#define __SPIRV_FUNCTION_U_LEN 22 #define __SPIRV_INT64_BASE -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -_CLC_DECL float -_Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *, enum Scope, enum MemorySemanticsMask, float); -_CLC_DECL float -_Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *, enum Scope, enum MemorySemanticsMask, float); +_CLC_OVERLOAD _CLC_DECL float __spirv_AtomicExchange(local float *, int Scope, + int MemorySemanticsMask, + float); +_CLC_OVERLOAD _CLC_DECL float __spirv_AtomicExchange(global float *, int Scope, + int MemorySemanticsMask, + float); #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_xor.h b/libclc/libspirv/include/libspirv/atomic/atomic_xor.h index 0e2007bbdb29c..0ab8d1faec680 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_xor.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_xor.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicXor -#define __SPIRV_FUNCTION_S_LEN 17 #define __SPIRV_FUNCTION_U __spirv_AtomicXor -#define __SPIRV_FUNCTION_U_LEN 17 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl index 77c9396210798..2a91f6aa3b66c 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl @@ -12,79 +12,72 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, int, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned int, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, long, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned long, __hip_atomic_fetch_add) -#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, AS_MANGLED, SUB1, CHECK, NEW_BUILTIN) \ - _CLC_DEF float \ - _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \ - volatile AS float *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, float val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL float \ + __spirv_AtomicFAddEXT( \ + AS float *p, int scope, \ + int semantics, float val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ } // Global AS atomics can be unsafe for malloc shared atomics, so should be opt // in AMDGPU_ATOMIC_FP32_ADD_IMPL( - global, U3AS1, 1, + global, AMDGPU_ARCH_BETWEEN(9010, 10000) && __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_FP_ATOMICS"), __builtin_amdgcn_global_atomic_fadd_f32) -AMDGPU_ATOMIC_FP32_ADD_IMPL(local, U3AS3, 1, AMDGPU_ARCH_GEQ(8000), +AMDGPU_ATOMIC_FP32_ADD_IMPL(local, AMDGPU_ARCH_GEQ(8000), __builtin_amdgcn_ds_atomic_fadd_f32) -AMDGPU_ATOMIC_FP32_ADD_IMPL(, , 0, AMDGPU_ARCH_BETWEEN(9400, 10000), +AMDGPU_ATOMIC_FP32_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f32) -#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2, CHECK, \ - NEW_BUILTIN) \ - _CLC_DEF long \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ - volatile AS long *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, long desired, long expected); \ - _CLC_DEF long \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS long *, enum Scope, enum MemorySemanticsMask); \ - _CLC_DEF double \ - _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \ - volatile AS double *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, double val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - volatile AS long *int_pointer = (volatile AS long *)p; \ - long old_int_val = 0, new_int_val = 0; \ - do { \ - old_int_val = \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - int_pointer, scope, semantics); \ - double new_double_val = *(double *)&old_int_val + val; \ - new_int_val = *(long *)&new_double_val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ - int_pointer, scope, semantics, semantics, new_int_val, \ - old_int_val) != old_int_val); \ - \ - return *(double *)&old_int_val; \ +#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, CHECK, \ + NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL double \ + __spirv_AtomicFAddEXT( \ + AS double *p, int scope, \ + int semantics, double val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + AS long *int_pointer = (AS long *)p; \ + long old_int_val = 0, new_int_val = 0; \ + do { \ + old_int_val = \ + __spirv_AtomicLoad( \ + int_pointer, scope, semantics); \ + double new_double_val = *(double *)&old_int_val + val; \ + new_int_val = *(long *)&new_double_val; \ + } while ( \ + __spirv_AtomicCompareExchange( \ + int_pointer, scope, semantics, semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return *(double *)&old_int_val; \ } #ifdef cl_khr_int64_base_atomics // Global AS atomics can be unsafe for malloc shared atomics, so should be opt // in AMDGPU_ATOMIC_FP64_ADD_IMPL( - global, U3AS1, 1, 5, + global, AMDGPU_ARCH_BETWEEN(9010, 10000) && __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_FP_ATOMICS"), __builtin_amdgcn_global_atomic_fadd_f64) -AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5, +AMDGPU_ATOMIC_FP64_ADD_IMPL(local, AMDGPU_ARCH_BETWEEN(9010, 10000), __builtin_amdgcn_ds_atomic_fadd_f64) -AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4, AMDGPU_ARCH_BETWEEN(9400, 10000), +AMDGPU_ATOMIC_FP64_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f64) #endif diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl index 5f8c587de5046..f5e9991221f93 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP & -#define __SPIRV_BUILTIN _Z17__spirv_AtomicAnd +#define __SPIRV_BUILTIN __spirv_AtomicAnd #define __HIP_BUILTIN __hip_atomic_fetch_and #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl index 480a13e405511..96852b22b3dad 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl @@ -10,42 +10,41 @@ #include #include -#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \ - SUB2) \ - _CLC_DEF TYPE \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask success_semantics, \ - enum MemorySemanticsMask failure_semantics, TYPE desired, \ - TYPE expected) { \ - int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ - memory_order_success) \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ - memory_order_failure) \ - __hip_atomic_compare_exchange_strong(p, &expected, desired, \ - memory_order_success, \ - memory_order_failure, atomic_scope); \ - /* If cmpxchg \ - * succeeds: \ - - `expected` is unchanged, holding the old val that was at `p` \ - - `p` is changed to hold `desired` \ - * fails: \ - - `expected` is changed to hold the current val at `p` \ - - `p` is unchanged*/ \ - return expected; \ +#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + __spirv_AtomicCompareExchange( \ + AS TYPE *p, int scope, \ + int success_semantics, \ + int failure_semantics, TYPE desired, \ + TYPE expected) { \ + int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ + memory_order_success) \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ + memory_order_failure) \ + __hip_atomic_compare_exchange_strong(p, &expected, desired, \ + memory_order_success, \ + memory_order_failure, atomic_scope); \ + /* If cmpxchg \ + * succeeds: \ + - `expected` is unchanged, holding the old val that was at `p` \ + - `p` is changed to hold `desired` \ + * fails: \ + - `expected` is changed to hold the current val at `p` \ + - `p` is unchanged*/ \ + return expected; \ } -#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4) +#define AMDGPU_ATOMIC_CMPXCHG(TYPE) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, ) -AMDGPU_ATOMIC_CMPXCHG(int, i) -AMDGPU_ATOMIC_CMPXCHG(unsigned, j) -AMDGPU_ATOMIC_CMPXCHG(long, l) -AMDGPU_ATOMIC_CMPXCHG(unsigned long, m) -AMDGPU_ATOMIC_CMPXCHG(float, f) +AMDGPU_ATOMIC_CMPXCHG(int) +AMDGPU_ATOMIC_CMPXCHG(unsigned) +AMDGPU_ATOMIC_CMPXCHG(long) +AMDGPU_ATOMIC_CMPXCHG(unsigned long) +AMDGPU_ATOMIC_CMPXCHG(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h index bc40d1def1e76..5ca852ce5523c 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h @@ -59,54 +59,49 @@ extern int __oclc_amdgpu_reflect(__constant char *); } \ } -#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - SUB1, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, val, memory_order, atomic_scope); \ } -#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) +#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, global, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, local, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, , BUILTIN) // Safe atomics will either choose a slow CAS atomic impl (default) or a fast // native atomic if --amdgpu-unsafe-int-atomics is passed to LLVM. // // Safe atomics using CAS may be necessary if PCIe does not support atomic // operations such as and, or, xor -#define AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - SUB1, OP, USE_BUILTIN_COND, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - if (USE_BUILTIN_COND) \ - return BUILTIN(p, val, memory_order, atomic_scope); \ - /* CAS atomics*/ \ - TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \ - TYPE newval = 0; \ - do { \ - newval = oldval OP val; \ - } while (!__hip_atomic_compare_exchange_strong( \ - p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \ - return oldval; \ +#define AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, AS, OP, USE_BUILTIN_COND, \ + BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + if (USE_BUILTIN_COND) \ + return BUILTIN(p, val, memory_order, atomic_scope); \ + /* CAS atomics*/ \ + TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \ + TYPE newval = 0; \ + do { \ + newval = oldval OP val; \ + } while (!__hip_atomic_compare_exchange_strong( \ + p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \ + return oldval; \ } -#define AMDGPU_SAFE_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP, BUILTIN) \ +#define AMDGPU_SAFE_ATOMIC(FUNC_NAME, TYPE, OP, BUILTIN) \ AMDGPU_SAFE_ATOMIC_IMPL( \ - FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP, \ + FUNC_NAME, TYPE, global, OP, \ __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_INT_ATOMICS"), BUILTIN) \ - AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP, \ + AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, local, OP, \ true /* local AS should always use builtin*/, \ BUILTIN) \ AMDGPU_SAFE_ATOMIC_IMPL( \ - FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP, \ + FUNC_NAME, TYPE, , OP, \ __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_INT_ATOMICS"), BUILTIN) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl index 9430c21d44031..7ef4f344a33c8 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl @@ -10,26 +10,24 @@ #include #include -#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_load(p, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + __spirv_AtomicLoad(const AS TYPE *p, int scope, int semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_load(p, memory_order, atomic_scope); \ } -#define AMDGPU_ATOMIC_LOAD(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, global, U3AS1) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, local, U3AS3) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, , ) +#define AMDGPU_ATOMIC_LOAD(TYPE) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, ) -AMDGPU_ATOMIC_LOAD(int, Ki) -AMDGPU_ATOMIC_LOAD(unsigned int, Kj) -AMDGPU_ATOMIC_LOAD(long, Kl) -AMDGPU_ATOMIC_LOAD(unsigned long, Km) -AMDGPU_ATOMIC_LOAD(float, Kf) +AMDGPU_ATOMIC_LOAD(int) +AMDGPU_ATOMIC_LOAD(unsigned int) +AMDGPU_ATOMIC_LOAD(long) +AMDGPU_ATOMIC_LOAD(unsigned long) +AMDGPU_ATOMIC_LOAD(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl index f838ae950a991..7b3b47ab1af0b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl @@ -13,26 +13,23 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicSMax, int, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicUMax, unsigned int, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicSMax, long, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicUMax, unsigned long, __hip_atomic_fetch_max) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii, false, ) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, global) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, ) #ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_global_atomic_fmax_f64) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_flat_atomic_fmax_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Max, >, double, long, global, + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_global_atomic_fmax_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, long, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Max, >, double, long, , + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_flat_atomic_fmax_f64) #endif #undef AMDGPU_ATOMIC diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl index 7592c473ea735..29cd1602b1bbb 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl @@ -13,24 +13,21 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicSMin, int, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicUMin, unsigned int, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicSMin, long, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicUMin, unsigned long, __hip_atomic_fetch_min) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii, false, ) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, global) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, ) #ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll, +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, global, AMDGPU_ARCH_BETWEEN(9010, 10000), __builtin_amdgcn_global_atomic_fmin_f64) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll, +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, long, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, , AMDGPU_ARCH_BETWEEN(9010, 10000), __builtin_amdgcn_flat_atomic_fmin_f64) #endif diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h index 1b65cdffb2f65..81c18d1799c78 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h @@ -10,40 +10,36 @@ #include #include -#define AMDGPU_ATOMIC_FP_MINMAX_IMPL( \ - OPNAME, OP, TYPE, TYPE_MANGLED, STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \ - AS_MANGLED, SUB1, SUB2, CHECK, NEW_BUILTIN) \ - _CLC_DEF STORAGE_TYPE \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ - volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, STORAGE_TYPE desired, \ - STORAGE_TYPE expected); \ - _CLC_DEF STORAGE_TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS STORAGE_TYPE *, enum Scope, \ - enum MemorySemanticsMask); \ - _CLC_DEF TYPE \ - _Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \ - STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ - TYPE old_val = 0; \ - do { \ - old_int_val = \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - int_pointer, scope, semantics); \ - old_val = *(TYPE *)&old_int_val; \ - if (old_val OP val) \ - return old_val; \ - new_int_val = *(STORAGE_TYPE *)&val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ - int_pointer, scope, semantics, semantics, new_int_val, \ - old_int_val) != old_int_val); \ - \ - return old_val; \ +#define FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ + { \ + int atomic_scope = 0, memory_order = 0; \ + AS STORAGE_TYPE *int_pointer = (AS STORAGE_TYPE *)p; \ + STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ + TYPE old_val = 0; \ + do { \ + old_int_val = __spirv_AtomicLoad(int_pointer, scope, semantics); \ + old_val = *(TYPE *)&old_int_val; \ + if (old_val OP val) \ + return old_val; \ + new_int_val = *(STORAGE_TYPE *)&val; \ + } while (__spirv_AtomicCompareExchange(int_pointer, scope, semantics, \ + semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return old_val; \ + } + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, STORAGE_TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicF##OPNAME##EXT( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ + } + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(OPNAME, OP, TYPE, STORAGE_TYPE, AS, \ + CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicF##OPNAME##EXT( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl index 4f33d6acbff01..8bfd25483b13b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP | -#define __SPIRV_BUILTIN _Z16__spirv_AtomicOr +#define __SPIRV_BUILTIN __spirv_AtomicOr #define __HIP_BUILTIN __hip_atomic_fetch_or #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def index fb2024869615b..848ff0c30df59 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def @@ -1,9 +1,9 @@ // Before including, define: __SPIRV_BUILTIN, __CLC_OP, __HIP_BUILTIN // and include atomic_helpers.h to get AMDGPU_SAFE_ATOMIC -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, int, i, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned int, j, __CLC_OP, +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, int, __CLC_OP, __HIP_BUILTIN) +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned int, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, long, l, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned long, m, __CLC_OP, +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, long, __CLC_OP, __HIP_BUILTIN) +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned long, __CLC_OP, __HIP_BUILTIN) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl index a8b35e0522ba5..eafbacaefd8bf 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl @@ -10,27 +10,25 @@ #include #include -#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1) \ - _CLC_DEF void \ - _Z19__spirv_AtomicStore##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - __hip_atomic_store(p, val, memory_order, atomic_scope); \ - return; \ +#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF void \ + __spirv_AtomicStore(AS TYPE *p, int scope, int semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ } -#define AMDGPU_ATOMIC_STORE(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, , , 0) +#define AMDGPU_ATOMIC_STORE(TYPE) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, ) -AMDGPU_ATOMIC_STORE(int, i) -AMDGPU_ATOMIC_STORE(unsigned int, j) -AMDGPU_ATOMIC_STORE(long, l) -AMDGPU_ATOMIC_STORE(unsigned long, m) -AMDGPU_ATOMIC_STORE(float, f) +AMDGPU_ATOMIC_STORE(int) +AMDGPU_ATOMIC_STORE(unsigned int) +AMDGPU_ATOMIC_STORE(long) +AMDGPU_ATOMIC_STORE(unsigned long) +AMDGPU_ATOMIC_STORE(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl index 72539460e89ea..7b2162ed3c9e4 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl @@ -10,28 +10,25 @@ #include #include -#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - NOT_GENERIC, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, -val, memory_order); \ +#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + FUNC_NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, -val, memory_order); \ } -#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, \ - BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, \ - BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) +#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, global, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, local, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, , BUILTIN) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, int, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, unsigned int, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, long, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, unsigned long, __atomic_fetch_add) #undef AMDGPU_ATOMIC #undef AMDGPU_ATOMIC_IMPL diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl index 3a1f8ae5f1324..56a2996e6d312 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl @@ -10,13 +10,13 @@ #include #include -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, +AMDGPU_ATOMIC(__spirv_AtomicExchange, int, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned int, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, +AMDGPU_ATOMIC(__spirv_AtomicExchange, long, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned long, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, float, __hip_atomic_exchange) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl index ef17188acaf93..ac34aba3e098a 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP ^ -#define __SPIRV_BUILTIN _Z17__spirv_AtomicXor +#define __SPIRV_BUILTIN __spirv_AtomicXor #define __HIP_BUILTIN __hip_atomic_fetch_xor #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/generic/atomic/atomic_add.cl b/libclc/libspirv/lib/generic/atomic/atomic_add.cl index 5a5fd9018d4db..1f051cc650199 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_add.cl @@ -8,39 +8,35 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicIAddP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + __spirv_AtomicIAdd(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_add) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_add) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_add) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_add) +IMPL(int, global, __sync_fetch_and_add) +IMPL(unsigned int, global, __sync_fetch_and_add) +IMPL(int, local, __sync_fetch_and_add) +IMPL(unsigned int, local, __sync_fetch_and_add) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_add_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_add_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_add_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_add_8) +IMPL(long, global, __sync_fetch_and_add_8) +IMPL(unsigned long, global, __sync_fetch_and_add_8) +IMPL(long, local, __sync_fetch_and_add_8) +IMPL(unsigned long, local, __sync_fetch_and_add_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_add) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_add) +IMPL_GENERIC(int, __sync_fetch_and_add) +IMPL_GENERIC(unsigned int, __sync_fetch_and_add) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_add_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_add_8) +IMPL_GENERIC(long, __sync_fetch_and_add_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_add_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_and.cl b/libclc/libspirv/lib/generic/atomic/atomic_and.cl index 7965ac77e4ed2..e8e1d4d96e55a 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_and.cl @@ -8,39 +8,35 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z17__spirv_AtomicAndP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + __spirv_AtomicAnd(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_and) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_and) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_and) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_and) +IMPL(int, global, __sync_fetch_and_and) +IMPL(unsigned int, global, __sync_fetch_and_and) +IMPL(int, local, __sync_fetch_and_and) +IMPL(unsigned int, local, __sync_fetch_and_and) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_and_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_and_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_and_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_and_8) +IMPL(long, global, __sync_fetch_and_and_8) +IMPL(unsigned long, global, __sync_fetch_and_and_8) +IMPL(long, local, __sync_fetch_and_and_8) +IMPL(unsigned long, local, __sync_fetch_and_and_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_and) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_and) +IMPL_GENERIC(int, __sync_fetch_and_and) +IMPL_GENERIC(unsigned int, __sync_fetch_and_and) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_and_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_and_8) +IMPL_GENERIC(long, __sync_fetch_and_and_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_and_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl index 98da0d36160cc..a16bbe48c8810 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl @@ -8,62 +8,52 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile local int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_AtomicCompareExchange( + local int *p, int scope, int eq, int neq, int val, int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile global int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_AtomicCompareExchange( + global int *p, int scope, int eq, int neq, int val, int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile local uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint +__spirv_AtomicCompareExchange( + local uint *p, int scope, int eq, int neq, uint val, uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile global uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint +__spirv_AtomicCompareExchange( + global uint *p, int scope, int eq, int neq, uint val, uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile local long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long +__spirv_AtomicCompareExchange( + local long *p, int scope, int eq, int neq, long val, long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile global long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long +__spirv_AtomicCompareExchange( + global long *p, int scope, int eq, int neq, long val, long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile local ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong +__spirv_AtomicCompareExchange( + local ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile global ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong +__spirv_AtomicCompareExchange( + global ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } @@ -71,33 +61,29 @@ _Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemantics #if _CLC_GENERIC_AS_SUPPORTED -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePiN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ii( - volatile int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_AtomicCompareExchange( + int *p, int scope, int eq, int neq, int val, int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePjN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_jj( - volatile uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint +__spirv_AtomicCompareExchange( + uint *p, int scope, int eq, int neq, uint val, uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePlN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ll( - volatile long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long +__spirv_AtomicCompareExchange( + long *p, int scope, int eq, int neq, long val, long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePmN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_mm( - volatile ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong +__spirv_AtomicCompareExchange( + ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } diff --git a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl index 4726e52272ef6..c2253992689af 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl @@ -8,62 +8,44 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z24__spirv_AtomicIDecrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int +__spirv_AtomicIDecrement(local int *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_DEF int -_Z24__spirv_AtomicIDecrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int +__spirv_AtomicIDecrement(global int *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIDecrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint +__spirv_AtomicIDecrement(local uint *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (uint)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIDecrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint +__spirv_AtomicIDecrement(global uint *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (uint)1); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z24__spirv_AtomicIDecrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long +__spirv_AtomicIDecrement(local long *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_DEF long -_Z24__spirv_AtomicIDecrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long +__spirv_AtomicIDecrement(global long *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIDecrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong +__spirv_AtomicIDecrement(local ulong *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIDecrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong +__spirv_AtomicIDecrement(global ulong *p, int scope, int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_inc.cl b/libclc/libspirv/lib/generic/atomic/atomic_inc.cl index e4ac238ddce90..5f6e6afd35f06 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_inc.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_inc.cl @@ -8,62 +8,46 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z24__spirv_AtomicIIncrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(local int *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (int)1); } -_CLC_DEF int -_Z24__spirv_AtomicIIncrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(global int *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (int)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIIncrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(local uint *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (uint)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIIncrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(global uint *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (uint)1); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z24__spirv_AtomicIIncrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(local long *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (long)1); } -_CLC_DEF long -_Z24__spirv_AtomicIIncrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(global long *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (long)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIIncrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIIncrement(local ulong *p, + int scope, + int semantics) { return __sync_fetch_and_add(p, (ulong)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIIncrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIIncrement(global ulong *p, + int scope, + int semantics) { return __sync_fetch_and_add(p, (ulong)1); } #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_load.cl b/libclc/libspirv/lib/generic/atomic/atomic_load.cl index 915b8eceab5da..bcde6bd74a100 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_load.cl @@ -8,51 +8,47 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - #define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *); - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile AS const TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - if (semantics & Acquire) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ - } \ - if (semantics & SequentiallyConsistent) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ - } \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ +TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(AS const TYPE *); + +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicLoad(AS const TYPE *p, int scope, int semantics) { \ + if (semantics & Acquire) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ + } \ + if (semantics & SequentiallyConsistent) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ + } \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ } -#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, global, U3AS1, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, local, U3AS3, PREFIX, BYTE_SIZE) +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, local, PREFIX, BYTE_SIZE) -IMPL_AS(int, i, , 4) -IMPL_AS(unsigned int, j, u, 4) +IMPL_AS(int, , 4) +IMPL_AS(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_AS(long, l, , 8) -IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, , 8) +IMPL_AS(unsigned long, u, 8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, , , PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, , PREFIX, BYTE_SIZE) -IMPL_GENERIC(int, i, , 4) -IMPL_GENERIC(unsigned int, j, u, 4) +IMPL_GENERIC(int, , 4) +IMPL_GENERIC(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, , 8) -IMPL_GENERIC(unsigned long, m, u, 8) +IMPL_GENERIC(long, , 8) +IMPL_GENERIC(unsigned long, u, 8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_max.cl b/libclc/libspirv/lib/generic/atomic/atomic_max.cl index 27d694aa12026..fdcca68a23b2f 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_max.cl @@ -8,20 +8,16 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ - _CLC_DEF TYPE \ - _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMax, , umax) -IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMax, , umax) +IMPL(int, global, __spirv_AtomicSMax, , max) +IMPL(unsigned int, global, __spirv_AtomicUMax, , umax) +IMPL(int, local, __spirv_AtomicSMax, , max) +IMPL(unsigned int, local, __spirv_AtomicUMax, , umax) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long); @@ -29,28 +25,28 @@ unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMax, __clc, max_global_8) -IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMax, __clc, umax_global_8) -IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMax, __clc, max_local_8) -IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMax, __clc, umax_local_8) +IMPL(long, global, __spirv_AtomicSMax, __clc, max_global_8) +IMPL(unsigned long, global, __spirv_AtomicUMax, __clc, umax_global_8) +IMPL(long, local, __spirv_AtomicSMax, __clc, max_local_8) +IMPL(unsigned long, local, __spirv_AtomicUMax, __clc, umax_local_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, , NAME, PREFIX, SUFFIX) -IMPL_GENERIC(int, i, __spirv_AtomicSMax, , max) -IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMax, , umax) +IMPL_GENERIC(int, __spirv_AtomicSMax, , max) +IMPL_GENERIC(unsigned int, __spirv_AtomicUMax, , umax) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_max_generic_8(volatile generic long *, long); unsigned long __clc__sync_fetch_and_umax_generic_8(volatile __generic unsigned long *, unsigned long); -IMPL_GENERIC(long, l, __spirv_AtomicSMax, __clc, max_generic_8) -IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMax, __clc, umax_generic_8) +IMPL_GENERIC(long, __spirv_AtomicSMax, __clc, max_generic_8) +IMPL_GENERIC(unsigned long, __spirv_AtomicUMax, __clc, umax_generic_8) #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_min.cl b/libclc/libspirv/lib/generic/atomic/atomic_min.cl index 66a518a75a256..f39a0722a7a90 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_min.cl @@ -8,20 +8,16 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ - _CLC_DEF TYPE \ - _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMin, , umin) -IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMin, , umin) +IMPL(int, global, __spirv_AtomicSMin, , min) +IMPL(unsigned int, global, __spirv_AtomicUMin, , umin) +IMPL(int, local, __spirv_AtomicSMin, , min) +IMPL(unsigned int, local, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long); @@ -29,28 +25,28 @@ unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMin, __clc, min_global_8) -IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMin, __clc, umin_global_8) -IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMin, __clc, min_local_8) -IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMin, __clc, umin_local_8) +IMPL(long, global, __spirv_AtomicSMin, __clc, min_global_8) +IMPL(unsigned long, global, __spirv_AtomicUMin, __clc, umin_global_8) +IMPL(long, local, __spirv_AtomicSMin, __clc, min_local_8) +IMPL(unsigned long, local, __spirv_AtomicUMin, __clc, umin_local_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, , NAME, PREFIX, SUFFIX) -IMPL_GENERIC(int, i, __spirv_AtomicSMin, , min) -IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMin, , umin) +IMPL_GENERIC(int, __spirv_AtomicSMin, , min) +IMPL_GENERIC(unsigned int, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_min_generic_8(volatile generic long *, long); unsigned long __clc__sync_fetch_and_umin_generic_8(volatile __generic unsigned long *, unsigned long); -IMPL_GENERIC(long, l, __spirv_AtomicSMin, __clc, min_generic_8) -IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMin, __clc, umin_generic_8) +IMPL_GENERIC(long, __spirv_AtomicSMin, __clc, min_generic_8) +IMPL_GENERIC(unsigned long, __spirv_AtomicUMin, __clc, umin_generic_8) #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_or.cl b/libclc/libspirv/lib/generic/atomic/atomic_or.cl index 683be8de0a36f..2939869c4e683 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_or.cl @@ -8,39 +8,35 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z16__spirv_AtomicOrP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicOr(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_or) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_or) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_or) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_or) +IMPL(int, global, __sync_fetch_and_or) +IMPL(unsigned int, global, __sync_fetch_and_or) +IMPL(int, local, __sync_fetch_and_or) +IMPL(unsigned int, local, __sync_fetch_and_or) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_or_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_or_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_or_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_or_8) +IMPL(long, global, __sync_fetch_and_or_8) +IMPL(unsigned long, global, __sync_fetch_and_or_8) +IMPL(long, local, __sync_fetch_and_or_8) +IMPL(unsigned long, local, __sync_fetch_and_or_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_or) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_or) +IMPL_GENERIC(int, __sync_fetch_and_or) +IMPL_GENERIC(unsigned int, __sync_fetch_and_or) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_or_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_or_8) +IMPL_GENERIC(long, __sync_fetch_and_or_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_or_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_store.cl b/libclc/libspirv/lib/generic/atomic/atomic_store.cl index 6f102d1df9594..2c29b5eaf9326 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_store.cl @@ -8,67 +8,59 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF void -_Z19__spirv_AtomicStorePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - _Z19__spirv_AtomicStorePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile global uint *)p, scope, semantics, as_uint(val)); +_CLC_OVERLOAD _CLC_DECL void +__spirv_AtomicStore(global float *p, int scope, int semantics, float val) { + __spirv_AtomicStore((global uint *)p, scope, semantics, as_uint(val)); } -_CLC_DEF void -_Z19__spirv_AtomicStorePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - _Z19__spirv_AtomicStorePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile local uint *)p, scope, semantics, as_uint(val)); +_CLC_OVERLOAD _CLC_DECL void +__spirv_AtomicStore(local float *p, int scope, int semantics, float val) { + __spirv_AtomicStore((local uint *)p, scope, semantics, as_uint(val)); } #define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *, TYPE); +TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(AS const TYPE *, TYPE); -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_DEF void \ - _Z19__spirv_AtomicStoreP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - if (semantics == Release) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ - } else if (semantics == SequentiallyConsistent) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ - } else { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ - } \ +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL void \ + __spirv_AtomicStore( \ + AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + if (semantics == Release) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ + } else if (semantics == SequentiallyConsistent) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ + } else { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ + } \ } -#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, PREFIX, BYTE_SIZE) +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, local, PREFIX, BYTE_SIZE) -IMPL_AS(int, i, , 4) -IMPL_AS(unsigned int, j, u, 4) +IMPL_AS(int, , 4) +IMPL_AS(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_AS(long, l, , 8) -IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, , 8) +IMPL_AS(unsigned long, u, 8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, , PREFIX, BYTE_SIZE) -IMPL_GENERIC(int, i, , 4) -IMPL_GENERIC(unsigned int, j, u, 4) +IMPL_GENERIC(int, , 4) +IMPL_GENERIC(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, , 8) -IMPL_GENERIC(unsigned long, m, u, 8) +IMPL_GENERIC(long, , 8) +IMPL_GENERIC(unsigned long, u, 8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl index 9f0c3195555a9..e1d3e5eed67c3 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl @@ -8,39 +8,35 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicISubP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicISub(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_sub) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_sub) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_sub) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_sub) +IMPL(int, global, __sync_fetch_and_sub) +IMPL(unsigned int, global, __sync_fetch_and_sub) +IMPL(int, local, __sync_fetch_and_sub) +IMPL(unsigned int, local, __sync_fetch_and_sub) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_sub_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_sub_8) +IMPL(long, global, __sync_fetch_and_sub_8) +IMPL(unsigned long, global, __sync_fetch_and_sub_8) +IMPL(long, local, __sync_fetch_and_sub_8) +IMPL(unsigned long, local, __sync_fetch_and_sub_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_sub) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_sub) +IMPL_GENERIC(int, __sync_fetch_and_sub) +IMPL_GENERIC(unsigned int, __sync_fetch_and_sub) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_sub_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_sub_8) +IMPL_GENERIC(long, __sync_fetch_and_sub_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_sub_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl index d191b0c1c0b63..c31a771a701cc 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl @@ -8,57 +8,51 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF float -_Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - return as_float( - _Z22__spirv_AtomicExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile global uint *)p, scope, semantics, as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float +__spirv_AtomicExchange( + global float *p, int scope, + int semantics, float val) { + return as_float(__spirv_AtomicExchange( + (global uint *)p, scope, semantics, as_uint(val))); } -_CLC_DEF float -_Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - return as_float( - _Z22__spirv_AtomicExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile local uint *)p, scope, semantics, as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float +__spirv_AtomicExchange( + local float *p, int scope, + int semantics, float val) { + return as_float(__spirv_AtomicExchange( + (local uint *)p, scope, semantics, as_uint(val))); } -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z22__spirv_AtomicExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE \ + __spirv_AtomicExchange(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_swap_4) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_swap_4) -IMPL(int, i, local, U3AS3, 1, __sync_swap_4) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_swap_4) +IMPL(int, global, __sync_swap_4) +IMPL(unsigned int, global, __sync_swap_4) +IMPL(int, local, __sync_swap_4) +IMPL(unsigned int, local, __sync_swap_4) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_swap_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_swap_8) -IMPL(long, l, local, U3AS3, 1, __sync_swap_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_swap_8) +IMPL(long, global, __sync_swap_8) +IMPL(unsigned long, global, __sync_swap_8) +IMPL(long, local, __sync_swap_8) +IMPL(unsigned long, local, __sync_swap_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_swap_4) -IMPL_GENERIC(unsigned int, j, __sync_swap_4) +IMPL_GENERIC(int, __sync_swap_4) +IMPL_GENERIC(unsigned int, __sync_swap_4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_swap_8) -IMPL_GENERIC(unsigned long, m, __sync_swap_8) +IMPL_GENERIC(long, __sync_swap_8) +IMPL_GENERIC(unsigned long, __sync_swap_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl index 6e3df26b67281..6afd5350f2275 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl @@ -8,39 +8,35 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z17__spirv_AtomicXorP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicXor(AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_xor) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_xor) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_xor) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_xor) +IMPL(int, global, __sync_fetch_and_xor) +IMPL(unsigned int, global, __sync_fetch_and_xor) +IMPL(int, local, __sync_fetch_and_xor) +IMPL(unsigned int, local, __sync_fetch_and_xor) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_xor_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_xor_8) +IMPL(long, global, __sync_fetch_and_xor_8) +IMPL(unsigned long, global, __sync_fetch_and_xor_8) +IMPL(long, local, __sync_fetch_and_xor_8) +IMPL(unsigned long, local, __sync_fetch_and_xor_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) \ + IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_xor) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_xor) +IMPL_GENERIC(int, __sync_fetch_and_xor) +IMPL_GENERIC(unsigned int, __sync_fetch_and_xor) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_xor_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_xor_8) +IMPL_GENERIC(long, __sync_fetch_and_xor_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_xor_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl index d64e7ca9a6954..b84077958276b 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl @@ -10,111 +10,101 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(uint, j, int, i, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(long, l, long, l, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(ulong, m, long, l, add, _Z18__spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(int, int, i, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(uint, int, i, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(long, long, l, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(ulong, long, l, add, __spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(float, f, float, f, add, _Z21__spirv_AtomicFAddEXT) +__CLC_NVVM_ATOMIC(float, float, f, add, __spirv_AtomicFAddEXT) #ifdef cl_khr_int64_base_atomics -#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, ADDR_SPACE_MANGLED, \ - ADDR_SPACE_NV, SUBSTITUTION1, \ - SUBSTITUTION2, SUBSTITUTION3) \ - long \ - _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const long *, enum Scope, \ - enum MemorySemanticsMask); \ - long \ - _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \ - volatile ADDR_SPACE long *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, long, long); \ - __attribute__((always_inline)) _CLC_DECL double \ - _Z21__spirv_AtomicFAddEXT##P##ADDR_SPACE_MANGLED##d##N5__spv5Scope4FlagENS##SUBSTITUTION3##_19MemorySemanticsMask4FlagE##d( \ - volatile ADDR_SPACE double *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, double value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 600) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - break; \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case SequentiallyConsistent: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_FENCE_SC_SM70() \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - break; \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ - } else { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE long *pointer_int = \ - (volatile ADDR_SPACE long *)pointer; \ - long old_int; \ - long new_val_int; \ - do { \ - old_int = \ - _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - double new_val = *(double *)&old_int + *(double *)&value; \ - new_val_int = *(long *)&new_val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \ - pointer_int, scope, semantics, semantics, new_val_int, \ - old_int) != old_int); \ - return *(double *)&old_int; \ - } \ +#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL double \ + __spirv_AtomicFAddEXT(ADDR_SPACE double *pointer, int scope, \ + int semantics, double value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 600) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + break; \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } else { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE long *pointer_int = \ + (ADDR_SPACE long *)pointer; \ + long old_int; \ + long new_val_int; \ + do { \ + old_int = \ + __spirv_AtomicLoad( \ + pointer_int, scope, load_order); \ + double new_val = *(double *)&old_int + *(double *)&value; \ + new_val_int = *(long *)&new_val; \ + } while ( \ + __spirv_AtomicCompareExchange( \ + pointer_int, scope, semantics, semantics, new_val_int, \ + old_int) != old_int); \ + return *(double *)&old_int; \ + } \ } -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, , _gen_, 0, 4, 0) -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__global, U3AS1, _global_, 1, 5, 1) -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__local, U3AS3, _shared_, 1, 5, 1) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, _gen_) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__global, _global_) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__local, _shared_) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl index 07190607d04ec..e4986c5f6975b 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(long, l, long, l, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, and, _Z17__spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(int, int, i, and, ___spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(long, long, l, and, __spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(unsigned int, int, i, and, __spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(unsigned long, long, l, and, __spirv_AtomicAnd) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl index 7d7a6ff55bb7c..339f19a0028a0 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl @@ -72,21 +72,15 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -// Type __spirv_AtomicCompareExchange(AS Type *P, __spv::Scope::Flag S, -// __spv::MemorySemanticsMask::Flag E, -// __spv::MemorySemanticsMask::Flag U, +// Type __spirv_AtomicCompareExchange(AS Type *P, int S, int E, int U, // Type V, Type C); -#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, OP, OP_MANGLED, \ - ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, \ - ADDR_SPACE_NV, SUBSTITUTION1, SUBSTITUTION2) \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\ -__spv5Scope4FlagENS##SUBSTITUTION1##_19Memory\ -SemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics1, \ - enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ +#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + OP, OP_MANGLED, ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED( \ + ADDR_SPACE TYPE *pointer, int scope, \ + int semantics1, \ + int semantics2, TYPE cmp, TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics1 & 0x1F; \ @@ -135,21 +129,20 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __global, PU3AS1, _global_, 1, 5) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __local, PU3AS3, _shared_, 1, 5) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, , P, _gen_, 0, 4) +#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ + __global, _global_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ + __local, _shared_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ + , _gen_) -__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(int, int, i, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(long, long, l, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(unsigned int, int, i, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(unsigned long, long, l, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(float, float, f, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(double, double, d, cas, CompareExchange) #undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER #undef __CLC_NVVM_ATOMIC_CAS diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl index c3e40c9496856..aed29d765cde3 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl @@ -10,8 +10,8 @@ #include #include -__CLC_NVVM_ATOMIC_INCDEC(unsigned int, j, IDecrement, -1) -__CLC_NVVM_ATOMIC_INCDEC(unsigned long, m, IDecrement, -1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned int, IDecrement, -1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned long, IDecrement, -1) #undef __CLC_NVVM_ATOMIC_INCDEC_IMPL #undef __CLC_NVVM_ATOMIC_INCDEC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h index 0c7505f6b92e4..7df284055698f 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h @@ -83,14 +83,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); __asm__ __volatile__("fence.sc.cta;"); \ } -#define __CLC_NVVM_ATOMIC_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \ - ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \ - __attribute__((always_inline)) _CLC_DECL TYPE \ - NAME_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ +#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE NAME( \ + ADDR_SPACE TYPE *pointer, int scope, int semantics, TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -140,12 +136,10 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __global, PU3AS1, _global_, 1) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __local, PU3AS3, _shared_, 1) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, , P, _gen_, 0) +#define __CLC_NVVM_ATOMIC(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, __local, \ + _shared_) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, , _gen_) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl index adabb17d6698f..e92da76642c79 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl @@ -10,8 +10,8 @@ #include #include -__CLC_NVVM_ATOMIC_INCDEC(unsigned int, j, IIncrement, 1) -__CLC_NVVM_ATOMIC_INCDEC(unsigned long, m, IIncrement, 1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned int, IIncrement, 1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned long, IIncrement, 1) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h index 8710d59e24f63..478d895c90ed7 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h @@ -12,29 +12,16 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL( \ - TYPE, TYPE_MANGLED, OP_MANGLED, VAL, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION) \ - TYPE _Z21__spirv_\ -AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z24__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - return _Z21__spirv_\ -AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, VAL); \ } -#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \ - PU3AS1, 1) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \ - PU3AS3, 1) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , P, 0) +#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, OP_MANGLED, VAL) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, __global) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, __local) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, ) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl index 6c300e0b28397..2192961fc4e13 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl @@ -37,13 +37,12 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } #define __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5\ -Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ + TYPE, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicLoad( \ + const ADDR_SPACE TYPE *pointer, int scope, \ + int semantics) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -78,22 +77,22 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - __global, PU3AS1, _global_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - __local, PU3AS3, _shared_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ - P, _gen_) +#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + __global, _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + __local, _shared_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , \ + _gen_) -__CLC_NVVM_ATOMIC_LOAD(int, i, int, i) -__CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) -__CLC_NVVM_ATOMIC_LOAD(long, l, long, l) -__CLC_NVVM_ATOMIC_LOAD(ulong, m, long, l) +__CLC_NVVM_ATOMIC_LOAD(int, int, i) +__CLC_NVVM_ATOMIC_LOAD(uint, int, i) +__CLC_NVVM_ATOMIC_LOAD(long, long, l) +__CLC_NVVM_ATOMIC_LOAD(ulong, long, l) -__CLC_NVVM_ATOMIC_LOAD(float, f, float, f) +__CLC_NVVM_ATOMIC_LOAD(float, float, f) #ifdef cl_khr_int64_base_atomics -__CLC_NVVM_ATOMIC_LOAD(double, d, double, d) +__CLC_NVVM_ATOMIC_LOAD(double, double, d) #endif #undef __CLC_NVVM_ATOMIC_LOAD_TYPES diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl index d35a08a7baa2e..ec630a4c3838d 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl @@ -10,79 +10,55 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, max, _Z18__spirv_AtomicSMax) -__CLC_NVVM_ATOMIC(long, l, long, l, max, _Z18__spirv_AtomicSMax) -__CLC_NVVM_ATOMIC(unsigned int, j, unsigned int, ui, max, - _Z18__spirv_AtomicUMax) -__CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, - _Z18__spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(int, int, i, max, __spirv_AtomicSMax) +__CLC_NVVM_ATOMIC(long, long, l, max, __spirv_AtomicSMax) +__CLC_NVVM_ATOMIC(unsigned int, unsigned int, ui, max, + __spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(unsigned long, unsigned long, ul, max, + __spirv_AtomicUMax) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ - TYPE_INT \ - _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ - enum Scope, enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ -4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val > old) { \ - TYPE_INT tmp_int = _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ -4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ + TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE TYPE_INT *pointer_int = \ + (ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } -#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, PU3AS1, 1, 5) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __local, PU3AS3, 1, 5) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, , P, 0, 4) +#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) -__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) +__CLC_NVVM_ATOMIC_MAX(float, int, FMaxEXT) +__CLC_NVVM_ATOMIC_MAX(double, long, FMaxEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl index ccb38e922742d..6eba6c44591cd 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl @@ -10,77 +10,54 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, min, _Z18__spirv_AtomicSMin) -__CLC_NVVM_ATOMIC(long, l, long, l, min, _Z18__spirv_AtomicSMin) -__CLC_NVVM_ATOMIC(uint, j, uint, ui, min, _Z18__spirv_AtomicUMin) -__CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) +__CLC_NVVM_ATOMIC(int, int, i, min, __spirv_AtomicSMin) +__CLC_NVVM_ATOMIC(long, long, l, min, __spirv_AtomicSMin) +__CLC_NVVM_ATOMIC(uint, uint, ui, min, __spirv_AtomicUMin) +__CLC_NVVM_ATOMIC(ulong, ulong, ul, min, __spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ - TYPE_INT \ - _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ - enum Scope, enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ -FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val < old) { \ - TYPE_INT tmp_int = _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ -FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ + TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED( \ + ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE TYPE_INT *pointer_int = \ + (ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } -#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, PU3AS1, 1, 5) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __local, PU3AS3, 1, 5) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, , P, 0, 4) +#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) -__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) +__CLC_NVVM_ATOMIC_MIN(float, int, FMinEXT) +__CLC_NVVM_ATOMIC_MIN(double, long, FMinEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl index c5ade3cc281c0..780fdd529e361 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(long, l, long, l, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, or, _Z16__spirv_AtomicOr) +__CLC_NVVM_ATOMIC(int, int, i, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(long, long, l, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(unsigned int, int, i, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(unsigned long, long, l, or, __spirv_AtomicOr) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl index b8376045df195..e0727999b4693 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl @@ -37,13 +37,11 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } #define __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_MANGLED, SUBSTITUTION, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_DECL void _Z19__spirv_\ -AtomicStore##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ + TYPE, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL void \ + __spirv_AtomicStore(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -82,22 +80,19 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 1, TYPE_NV, TYPE_MANGLED_NV,\ - __global, PU3AS1, _global_) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 1, TYPE_NV, TYPE_MANGLED_NV,\ - __local, PU3AS3, _shared_) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 0, TYPE_NV, TYPE_MANGLED_NV,\ - , P, _gen_) +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, _shared_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , _gen_) -__CLC_NVVM_ATOMIC_STORE(int, i, int, i) -__CLC_NVVM_ATOMIC_STORE(uint, j, int, i) -__CLC_NVVM_ATOMIC_STORE(long, l, long, l) -__CLC_NVVM_ATOMIC_STORE(ulong, m, long, l) +__CLC_NVVM_ATOMIC_STORE(int, int, i) +__CLC_NVVM_ATOMIC_STORE(uint, int, i) +__CLC_NVVM_ATOMIC_STORE(long, long, l) +__CLC_NVVM_ATOMIC_STORE(ulong, long, l) -__CLC_NVVM_ATOMIC_STORE(float, f, float, f) +__CLC_NVVM_ATOMIC_STORE(float, float, f) #ifdef cl_khr_int64_base_atomics -__CLC_NVVM_ATOMIC_STORE(double, d, double, d) +__CLC_NVVM_ATOMIC_STORE(double, double, d) #endif #undef __CLC_NVVM_ATOMIC_STORE_TYPES diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl index 256d600585e47..4b90b4142de58 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl @@ -9,35 +9,24 @@ #include #include -#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, \ - SUBSTITUTION) \ - TYPE _Z18__spirv_\ -AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return _Z18__spirv_\ -AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED(pointer, scope, \ - semantics, -val); \ +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd( \ + ADDR_SPACE TYPE *, int, int, TYPE); \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ } -#define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, PU3AS1, \ - 1) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, PU3AS3, \ - 1) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, , P, 0) +#define __CLC_NVVM_ATOMIC_SUB(TYPE, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_SUB(int, i, ISub) -__CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) -__CLC_NVVM_ATOMIC_SUB(long, l, ISub) -__CLC_NVVM_ATOMIC_SUB(unsigned long, m, ISub) +__CLC_NVVM_ATOMIC_SUB(int, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned int, ISub) +__CLC_NVVM_ATOMIC_SUB(long, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned long, ISub) #undef __CLC_NVVM_ATOMIC_SUB_IMPL #undef __CLC_NVVM_ATOMIC_SUB diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl index 34ea69b4ecb4c..8fe85d1175bfd 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl @@ -10,12 +10,12 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(long, l, long, l, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(float, f, float, f, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(double, d, double, d, xchg, _Z22__spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(int, int, i, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(long, long, l, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(unsigned int, int, i, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(unsigned long, long, l, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(float, float, f, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(double, double, d, xchg, __spirv_AtomicExchange) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl index 1e3e65d1f41e0..f5fc4961db8da 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(long, l, long, l, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xor, _Z17__spirv_AtomicXor) +__CLC_NVVM_ATOMIC(int, int, i, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(long, long, l, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(unsigned int, int, i, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(unsigned long, long, l, xor, __spirv_AtomicXor) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index fd2148875bc39..610efec50b03f 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -57,33 +57,22 @@ struct MemorySemanticsMask { }; } // namespace __spv -extern DEVICE_EXTERNAL int -__spirv_AtomicCompareExchange(int SPIR_GLOBAL *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, int, int); - -extern DEVICE_EXTERNAL int -__spirv_AtomicCompareExchange(int *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, int, int); - -extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, - __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag); - -extern DEVICE_EXTERNAL void -__spirv_AtomicStore(int SPIR_GLOBAL *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, int); - -extern DEVICE_EXTERNAL void -__spirv_AtomicStore(int *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, - int); - -extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, - __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, +extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int SPIR_GLOBAL *, int, + int, int, int, int); + +extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int *, int, int, int, + int, int); + +extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, int, int); +extern DEVICE_EXTERNAL void __spirv_AtomicStore(int SPIR_GLOBAL *, int, int, + int); + +extern DEVICE_EXTERNAL void __spirv_AtomicStore(int *, int, int, int); + +extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, int, int, int); + /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the value which already was in *Ptr static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired, diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 5800190f539a0..ecfa8e40ba449 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -232,99 +232,6 @@ extern __DPCPP_SYCL_EXTERNAL #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy -// Atomic SPIR-V builtins -#define __SPIRV_ATOMIC_LOAD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \ - AS const Type *P, __spv::Scope::Flag S, \ - __spv::MemorySemanticsMask::Flag O); -#define __SPIRV_ATOMIC_STORE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \ - __spv::MemorySemanticsMask::Flag U, Type V, Type C); -#define __SPIRV_ATOMIC_IADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_ISUB(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_FADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_SMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_UMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_FMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_SMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_UMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_FMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_AND(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_OR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); -#define __SPIRV_ATOMIC_XOR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); - -#define __SPIRV_ATOMIC_FLOAT(AS, Type) \ - __SPIRV_ATOMIC_FADD(AS, Type) \ - __SPIRV_ATOMIC_FMIN(AS, Type) \ - __SPIRV_ATOMIC_FMAX(AS, Type) \ - __SPIRV_ATOMIC_LOAD(AS, Type) \ - __SPIRV_ATOMIC_STORE(AS, Type) \ - __SPIRV_ATOMIC_EXCHANGE(AS, Type) - -#define __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_FLOAT(AS, Type) \ - __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - __SPIRV_ATOMIC_IADD(AS, Type) \ - __SPIRV_ATOMIC_ISUB(AS, Type) \ - __SPIRV_ATOMIC_AND(AS, Type) \ - __SPIRV_ATOMIC_OR(AS, Type) \ - __SPIRV_ATOMIC_XOR(AS, Type) - -#define __SPIRV_ATOMIC_SIGNED(AS, Type) \ - __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_SMIN(AS, Type) \ - __SPIRV_ATOMIC_SMAX(AS, Type) - -#define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \ - __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_UMIN(AS, Type) \ - __SPIRV_ATOMIC_UMAX(AS, Type) - // Helper atomic operations which select correct signed/unsigned version // of atomic min/max based on the type #define __SPIRV_ATOMIC_MINMAX(AS, Op) \ @@ -356,15 +263,6 @@ extern __DPCPP_SYCL_EXTERNAL macro(__attribute__((opencl_global)), Arg) \ macro(__attribute__((opencl_local)), Arg) macro(, Arg) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, _Float16) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) diff --git a/sycl/test/check_device_code/atomic_ref.cpp b/sycl/test/check_device_code/atomic_ref.cpp index 648ae01ddb192..a27d712058ba3 100644 --- a/sycl/test/check_device_code/atomic_ref.cpp +++ b/sycl/test/check_device_code/atomic_ref.cpp @@ -7,7 +7,7 @@ // CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1) -// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1iii(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret i32 [[CALL3_I_I]] // SYCL_EXTERNAL auto atomic_ref_global(int &i) { From 0e45a71058302d5d86aa6b6976a4030cde7d354f Mon Sep 17 00:00:00 2001 From: Wenju He Date: Sun, 16 Mar 2025 21:39:55 -0700 Subject: [PATCH 2/7] remove const from pointer argument, add missing ##OP_MANGLED --- .../libspirv/include/libspirv/atomic/atomic_load.h | 2 +- .../lib/amdgcn-amdhsa/atomic/atomic_load.cl | 2 +- libclc/libspirv/lib/generic/atomic/atomic_load.cl | 2 +- .../libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl | 2 +- .../lib/ptx-nvidiacl/atomic/atomic_load.cl | 2 +- .../libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl | 14 +++++++------- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_load.h b/libclc/libspirv/include/libspirv/atomic/atomic_load.h index 8db8a859586f7..bd51ca74aaf23 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_load.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_load.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #define DECL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS const TYPE *, int, int); + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS TYPE *, int, int); #define DECL_AS(TYPE) \ DECL(TYPE, global) \ diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl index 7ef4f344a33c8..ca0c56405c4bb 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl @@ -12,7 +12,7 @@ #define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, AS) \ _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicLoad(const AS TYPE *p, int scope, int semantics) { \ + __spirv_AtomicLoad(AS TYPE *p, int scope, int semantics) { \ int atomic_scope = 0, memory_order = 0; \ GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ return __hip_atomic_load(p, memory_order, atomic_scope); \ diff --git a/libclc/libspirv/lib/generic/atomic/atomic_load.cl b/libclc/libspirv/lib/generic/atomic/atomic_load.cl index bcde6bd74a100..9d42e0e0046ac 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_load.cl @@ -16,7 +16,7 @@ TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(AS const TYPE FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicLoad(AS const TYPE *p, int scope, int semantics) { \ + __spirv_AtomicLoad(AS TYPE *p, int scope, int semantics) { \ if (semantics & Acquire) { \ return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ } \ diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl index e4986c5f6975b..552523a3322f1 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl @@ -10,7 +10,7 @@ #include #include -__CLC_NVVM_ATOMIC(int, int, i, and, ___spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(int, int, i, and, __spirv_AtomicAnd) __CLC_NVVM_ATOMIC(long, long, l, and, __spirv_AtomicAnd) __CLC_NVVM_ATOMIC(unsigned int, int, i, and, __spirv_AtomicAnd) __CLC_NVVM_ATOMIC(unsigned long, long, l, and, __spirv_AtomicAnd) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl index 2192961fc4e13..59642468e51fa 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl @@ -41,7 +41,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); ADDR_SPACE_NV) \ __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ __spirv_AtomicLoad( \ - const ADDR_SPACE TYPE *pointer, int scope, \ + ADDR_SPACE TYPE *pointer, int scope, \ int semantics) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl index 4b90b4142de58..999641e3b65d2 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl @@ -9,13 +9,13 @@ #include #include -#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ - _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd( \ - ADDR_SPACE TYPE *, int, int, TYPE); \ - __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_Atomic(ADDR_SPACE TYPE *pointer, int scope, \ - int semantics, TYPE val) { \ - return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd( \ + ADDR_SPACE TYPE *, int, int, TYPE); \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ } #define __CLC_NVVM_ATOMIC_SUB(TYPE, OP_MANGLED) \ From 6f69afe38a37846316127e9be19637cb7725e4cc Mon Sep 17 00:00:00 2001 From: Wenju He Date: Sun, 16 Mar 2025 22:09:09 -0700 Subject: [PATCH 3/7] add back atomic declarations in spirv_ops.hpp When long long and another type, e.g. long, are used in a same test, there is error: sycl/__spirv/spirv_ops.hpp:337:17: note: candidate function not viable: no known conversion from 'long *' to '__global long long *' for 1st argument --- sycl/include/sycl/__spirv/spirv_ops.hpp | 98 ++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index ecfa8e40ba449..75e08d2be25d8 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -232,30 +232,101 @@ extern __DPCPP_SYCL_EXTERNAL #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy +// Atomic SPIR-V builtins +#define __SPIRV_ATOMIC_LOAD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ + int O); +#define __SPIRV_ATOMIC_STORE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ + AS Type *P, int S, int E, int U, Type V, Type C); +#define __SPIRV_ATOMIC_IADD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_ISUB(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_FADD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_SMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_UMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_FMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_SMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_UMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_FMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_AND(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd(AS Type *P, int S, \ + int O, Type V); +#define __SPIRV_ATOMIC_OR(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr(AS Type *P, int S, int O, \ + Type V); +#define __SPIRV_ATOMIC_XOR(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor(AS Type *P, int S, \ + int O, Type V); + +#define __SPIRV_ATOMIC_FLOAT(AS, Type) \ + __SPIRV_ATOMIC_FADD(AS, Type) \ + __SPIRV_ATOMIC_FMIN(AS, Type) \ + __SPIRV_ATOMIC_FMAX(AS, Type) \ + __SPIRV_ATOMIC_LOAD(AS, Type) \ + __SPIRV_ATOMIC_STORE(AS, Type) \ + __SPIRV_ATOMIC_EXCHANGE(AS, Type) + +#define __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_FLOAT(AS, Type) \ + __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ + __SPIRV_ATOMIC_IADD(AS, Type) \ + __SPIRV_ATOMIC_ISUB(AS, Type) \ + __SPIRV_ATOMIC_AND(AS, Type) \ + __SPIRV_ATOMIC_OR(AS, Type) \ + __SPIRV_ATOMIC_XOR(AS, Type) + +#define __SPIRV_ATOMIC_SIGNED(AS, Type) \ + __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_SMIN(AS, Type) \ + __SPIRV_ATOMIC_SMAX(AS, Type) + +#define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \ + __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_UMIN(AS, Type) \ + __SPIRV_ATOMIC_UMAX(AS, Type) + // Helper atomic operations which select correct signed/unsigned version // of atomic min/max based on the type #define __SPIRV_ATOMIC_MINMAX(AS, Op) \ template \ typename std::enable_if_t< \ std::is_integral::value && std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t< \ std::is_integral::value && !std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ } @@ -263,6 +334,15 @@ extern __DPCPP_SYCL_EXTERNAL macro(__attribute__((opencl_global)), Arg) \ macro(__attribute__((opencl_local)), Arg) macro(, Arg) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, _Float16) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) From ad33aa8e19903b78128763c30d5f25ba0a22bd1d Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 17 Mar 2025 01:11:16 -0700 Subject: [PATCH 4/7] update test amdgpu_unsafe_atomics.cpp: remove volatile --- .../hip/atomic/amdgpu_unsafe_atomics.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp index b7f7e1e26830e..8870cf26f3d7d 100644 --- a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp +++ b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp @@ -12,11 +12,11 @@ SYCL_EXTERNAL void intAtomicFunc(int *i) { atomicInt.fetch_and(1); atomicInt.fetch_or(1); // CHECK: void{{.*}}intAtomicFunc - // CHECK-SAFE: cmpxchg volatile + // CHECK-SAFE: cmpxchg // CHECK-SAFE-NOT: atomicrmw - // CHECK-UNSAFE: atomicrmw volatile xor - // CHECK-UNSAFE: atomicrmw volatile and - // CHECK-UNSAFE: atomicrmw volatile or + // CHECK-UNSAFE: atomicrmw xor + // CHECK-UNSAFE: atomicrmw and + // CHECK-UNSAFE: atomicrmw or // CHECK-UNSAFE-NOT: cmpxchg } @@ -25,15 +25,15 @@ SYCL_EXTERNAL void fpAtomicFunc(float *f, double *d) { sycl::access::address_space::global_space>(*f) .fetch_add(1.0f); // CHECK: void{{.*}}fpAtomicFunc - // CHECK-SAFE: atomicrmw volatile fadd + // CHECK-SAFE: atomicrmw fadd // CHECK-SAFE-NOT: amdgpu.ignore.denormal.mode - // CHECK-UNSAFE-FP: atomicrmw volatile fadd {{.*}}!amdgpu.no.fine.grained.memory{{.*}}!amdgpu.ignore.denormal.mode + // CHECK-UNSAFE-FP: atomicrmw fadd {{.*}}!amdgpu.no.fine.grained.memory{{.*}}!amdgpu.ignore.denormal.mode sycl::atomic_ref(*d) .fetch_add(1.0); // CHECK-SAFE: cmpxchg // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f64 - // CHECK-UNSAFE-FP: atomicrmw volatile fadd {{.*}}!amdgpu.no.fine.grained.memory + // CHECK-UNSAFE-FP: atomicrmw fadd {{.*}}!amdgpu.no.fine.grained.memory // CHECK: __CLANG_OFFLOAD_BUNDLE____END__ sycl-amdgcn-amd-amdhsa- } From e6301322fa243db86d73b50a034e25724f8968a4 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 20 Mar 2025 00:45:27 -0700 Subject: [PATCH 5/7] add clang test --- .../spirv-builtin-lookup-atomic.cpp | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp diff --git a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp new file mode 100644 index 0000000000000..8b596f2f70ed5 --- /dev/null +++ b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp @@ -0,0 +1,237 @@ +// RUN: %clang_cc1 -triple spir64 -fdeclare-spirv-builtins -emit-llvm %s -o - | FileCheck %s + +#define AS_GLOBAL __attribute__((opencl_global)) +#define AS_LOCAL __attribute__((opencl_local)) +#define AS_PRIVATE __attribute__((opencl_private)) +#define AS_GENERIC __attribute__((opencl_generic)) + +void test_flag(int AS_GLOBAL *a, int AS_LOCAL *b, int AS_PRIVATE *c, + int AS_GENERIC *d) { + __spirv_AtomicFlagTestAndSet(a, 1, 16); + __spirv_AtomicFlagTestAndSet(b, 2, 8); + __spirv_AtomicFlagTestAndSet(c, 4, 4); + __spirv_AtomicFlagTestAndSet(d, 2, 0); + + __spirv_AtomicFlagClear(a, 1, 16); + __spirv_AtomicFlagClear(b, 2, 4); + __spirv_AtomicFlagClear(c, 4, 0); + __spirv_AtomicFlagClear(d, 2, 0); +} + +template +void test_signed(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + __spirv_AtomicLoad(a, 1, 16); + __spirv_AtomicLoad(b, 2, 8); + __spirv_AtomicLoad(c, 4, 4); + __spirv_AtomicLoad(d, 2, 0); + + __spirv_AtomicStore(a, 1, 16, (T)0); + __spirv_AtomicStore(b, 2, 8, (T)0); + __spirv_AtomicStore(c, 4, 4, (T)0); + __spirv_AtomicStore(d, 2, 0, (T)0); + + __spirv_AtomicExchange(a, 1, 16, (T)0); + __spirv_AtomicExchange(b, 2, 8, (T)0); + __spirv_AtomicExchange(c, 4, 4, (T)0); + __spirv_AtomicExchange(d, 2, 0, (T)0); + + __spirv_AtomicCompareExchange(a, 1, 16, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(b, 2, 8, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(c, 4, 4, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(d, 2, 0, 0, (T)1, (T)0); + + __spirv_AtomicCompareExchangeWeak(a, 1, 16, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(b, 2, 8, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(c, 4, 4, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(d, 2, 0, 0, (T)1, (T)0); + + __spirv_AtomicIIncrement(a, 1, 16); + __spirv_AtomicIIncrement(b, 2, 8); + __spirv_AtomicIIncrement(c, 4, 4); + __spirv_AtomicIIncrement(d, 2, 0); + + __spirv_AtomicIDecrement(a, 1, 16); + __spirv_AtomicIDecrement(b, 2, 8); + __spirv_AtomicIDecrement(c, 4, 4); + __spirv_AtomicIDecrement(d, 2, 0); + + __spirv_AtomicSMin(a, 1, 16, (T)0); + __spirv_AtomicSMin(b, 2, 8, (T)0); + __spirv_AtomicSMin(c, 4, 4, (T)0); + __spirv_AtomicSMin(d, 2, 0, (T)0); + + __spirv_AtomicSMax(a, 1, 16, (T)0); + __spirv_AtomicSMax(b, 2, 8, (T)0); + __spirv_AtomicSMax(c, 4, 4, (T)0); + __spirv_AtomicSMax(d, 2, 0, (T)0); + + __spirv_AtomicIAdd(a, 1, 16, (T)0); + __spirv_AtomicIAdd(b, 2, 8, (T)0); + __spirv_AtomicIAdd(c, 4, 4, (T)0); + __spirv_AtomicIAdd(d, 2, 0, (T)0); + + __spirv_AtomicISub(a, 1, 16, (T)0); + __spirv_AtomicISub(b, 2, 8, (T)0); + __spirv_AtomicISub(c, 4, 4, (T)0); + __spirv_AtomicISub(d, 2, 0, (T)0); + + __spirv_AtomicAnd(a, 1, 16, (T)0); + __spirv_AtomicAnd(b, 2, 8, (T)0); + __spirv_AtomicAnd(c, 4, 4, (T)0); + __spirv_AtomicAnd(d, 2, 0, (T)0); + + __spirv_AtomicOr(a, 1, 16, (T)0); + __spirv_AtomicOr(b, 2, 8, (T)0); + __spirv_AtomicOr(c, 4, 4, (T)0); + __spirv_AtomicOr(d, 2, 0, (T)0); + + __spirv_AtomicXor(a, 1, 16, (T)0); + __spirv_AtomicXor(b, 2, 8, (T)0); + __spirv_AtomicXor(c, 4, 4, (T)0); + __spirv_AtomicXor(d, 2, 0, (T)0); +} + +template +void test_unsigned(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + + __spirv_AtomicUMin(a, 1, 16, (T)0); + __spirv_AtomicUMin(b, 2, 8, (T)0); + __spirv_AtomicUMin(c, 4, 4, (T)0); + __spirv_AtomicUMin(d, 2, 0, (T)0); + + __spirv_AtomicUMax(a, 1, 16, (T)0); + __spirv_AtomicUMax(b, 2, 8, (T)0); + __spirv_AtomicUMax(c, 4, 4, (T)0); + __spirv_AtomicUMax(d, 2, 0, (T)0); +} + +template +void test_float(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + __spirv_AtomicFMaxEXT(a, 1, 16, (T)0); + __spirv_AtomicFMaxEXT(b, 2, 8, (T)0); + __spirv_AtomicFMaxEXT(c, 4, 4, (T)0); + __spirv_AtomicFMaxEXT(d, 2, 0, (T)0); + + __spirv_AtomicFMinEXT(a, 1, 16, (T)0); + __spirv_AtomicFMinEXT(b, 2, 8, (T)0); + __spirv_AtomicFMinEXT(c, 4, 4, (T)0); + __spirv_AtomicFMinEXT(d, 2, 0, (T)0); + + __spirv_AtomicFAddEXT(a, 1, 16, (T)0); + __spirv_AtomicFAddEXT(b, 2, 8, (T)0); + __spirv_AtomicFAddEXT(c, 4, 4, (T)0); + __spirv_AtomicFAddEXT(d, 2, 0, (T)0); +} + +void foo() { + int AS_GLOBAL *a; + int AS_LOCAL *b; + int AS_PRIVATE *c; + int AS_GENERIC *d; + test_flag(a, b, c, d); + + test_signed(a, b, c, d); + + unsigned int AS_GLOBAL *ua; + unsigned int AS_LOCAL *ub; + unsigned int AS_PRIVATE *uc; + unsigned int AS_GENERIC *ud; + test_unsigned(ua, ub, uc, ud); + + float AS_GLOBAL *fa; + float AS_LOCAL *fb; + float AS_PRIVATE *fc; + float AS_GENERIC *fd; + test_float(fa, fb, fc, fd); +} + +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS1iii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS3iii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPiii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS4iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS1iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS3iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPiii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS4iii( + +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS4iii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS1iiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS3iiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePiiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePiiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS1iiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS3iiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePiiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS4iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS1iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS3iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPiiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS4iiiiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS4iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS4iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPiiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPiiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPiiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS4iiii( + +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS1jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS3jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPjiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS4jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS1jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS3jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPjiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS4jiij( + +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS4fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS4fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS4fiif( From 764b3f374b11f2c60787fc7140f9277ffed500af Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 24 Mar 2025 15:40:51 -0700 Subject: [PATCH 6/7] add noexcept to declarations in header --- libdevice/atomic.hpp | 14 +++--- sycl/include/sycl/__spirv/spirv_ops.hpp | 63 +++++++++++++------------ 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 610efec50b03f..ea3e725ebe48d 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -58,20 +58,22 @@ struct MemorySemanticsMask { } // namespace __spv extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int SPIR_GLOBAL *, int, - int, int, int, int); + int, int, int, + int) noexcept; extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int *, int, int, int, - int, int); + int, int) noexcept; extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, int, - int); + int) noexcept; extern DEVICE_EXTERNAL void __spirv_AtomicStore(int SPIR_GLOBAL *, int, int, - int); + int) noexcept; -extern DEVICE_EXTERNAL void __spirv_AtomicStore(int *, int, int, int); +extern DEVICE_EXTERNAL void __spirv_AtomicStore(int *, int, int, int) noexcept; -extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, int, int, int); +extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, int, int, + int) noexcept; /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the value which already was in *Ptr diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 82d5cdeee60e8..bec092673e7d9 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -239,52 +239,52 @@ extern __DPCPP_SYCL_EXTERNAL // Atomic SPIR-V builtins #define __SPIRV_ATOMIC_LOAD(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ - int O); + int O) noexcept; #define __SPIRV_ATOMIC_STORE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ - AS Type *P, int S, int E, int U, Type V, Type C); + AS Type *P, int S, int E, int U, Type V, Type C) noexcept; #define __SPIRV_ATOMIC_IADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_ISUB(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_SMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_UMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_SMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_UMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT(AS Type *P, int S, \ - int O, Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_AND(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd(AS Type *P, int S, \ - int O, Type V); + int O, Type V) noexcept; #define __SPIRV_ATOMIC_OR(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr(AS Type *P, int S, int O, \ - Type V); + Type V) noexcept; #define __SPIRV_ATOMIC_XOR(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor(AS Type *P, int S, \ - int O, Type V); + int O, Type V) noexcept; #define __SPIRV_ATOMIC_FLOAT(AS, Type) \ __SPIRV_ATOMIC_FADD(AS, Type) \ @@ -319,18 +319,21 @@ extern __DPCPP_SYCL_EXTERNAL template \ typename std::enable_if_t< \ std::is_integral::value && std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t< \ std::is_integral::value && !std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ } From 3b087df2250cb20f66671761c09994bc1ec769e8 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 9 Apr 2025 18:23:52 -0700 Subject: [PATCH 7/7] clang-format --- libclc/generic/lib/atomic/atomic_add.cl | 7 +- libclc/generic/lib/atomic/atomic_and.cl | 7 +- libclc/generic/lib/atomic/atomic_cmpxchg.cl | 11 +- libclc/generic/lib/atomic/atomic_dec.cl | 8 +- libclc/generic/lib/atomic/atomic_inc.cl | 8 +- libclc/generic/lib/atomic/atomic_max.cl | 6 +- libclc/generic/lib/atomic/atomic_min.cl | 6 +- libclc/generic/lib/atomic/atomic_or.cl | 7 +- libclc/generic/lib/atomic/atomic_sub.cl | 7 +- libclc/generic/lib/atomic/atomic_xchg.cl | 13 +- libclc/generic/lib/atomic/atomic_xor.cl | 7 +- .../lib/cl_khr_int64_base_atomics/atom_add.cl | 7 +- .../cl_khr_int64_base_atomics/atom_cmpxchg.cl | 11 +- .../lib/cl_khr_int64_base_atomics/atom_sub.cl | 7 +- .../cl_khr_int64_base_atomics/atom_xchg.cl | 7 +- .../cl_khr_int64_extended_atomics/atom_and.cl | 7 +- .../cl_khr_int64_extended_atomics/atom_max.cl | 6 +- .../cl_khr_int64_extended_atomics/atom_min.cl | 6 +- .../cl_khr_int64_extended_atomics/atom_or.cl | 7 +- .../cl_khr_int64_extended_atomics/atom_xor.cl | 7 +- .../lib/amdgcn-amdhsa/atomic/atomic_add.cl | 61 +++---- .../amdgcn-amdhsa/atomic/atomic_cmpxchg.cl | 49 +++--- .../lib/amdgcn-amdhsa/atomic/atomic_load.cl | 18 +- .../lib/amdgcn-amdhsa/atomic/atomic_min.cl | 8 +- .../lib/amdgcn-amdhsa/atomic/atomic_store.cl | 20 +-- .../lib/amdgcn-amdhsa/atomic/atomic_sub.cl | 20 +-- .../lib/amdgcn-amdhsa/atomic/atomic_xchg.cl | 6 +- .../libspirv/lib/generic/atomic/atomic_add.cl | 11 +- .../libspirv/lib/generic/atomic/atomic_and.cl | 11 +- .../lib/generic/atomic/atomic_cmpxchg.cl | 81 +++++---- .../libspirv/lib/generic/atomic/atomic_dec.cl | 34 ++-- .../lib/generic/atomic/atomic_load.cl | 40 ++--- .../libspirv/lib/generic/atomic/atomic_max.cl | 11 +- .../libspirv/lib/generic/atomic/atomic_min.cl | 15 +- .../libspirv/lib/generic/atomic/atomic_or.cl | 13 +- .../lib/generic/atomic/atomic_store.cl | 48 +++--- .../libspirv/lib/generic/atomic/atomic_sub.cl | 11 +- .../lib/generic/atomic/atomic_xchg.cl | 31 ++-- .../libspirv/lib/generic/atomic/atomic_xor.cl | 11 +- .../lib/ptx-nvidiacl/atomic/atomic_add.cl | 159 +++++++++--------- .../lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl | 15 +- .../lib/ptx-nvidiacl/atomic/atomic_load.cl | 22 +-- .../lib/ptx-nvidiacl/atomic/atomic_max.cl | 74 ++++---- .../lib/ptx-nvidiacl/atomic/atomic_min.cl | 71 ++++---- .../lib/ptx-nvidiacl/atomic/atomic_store.cl | 17 +- .../lib/ptx-nvidiacl/atomic/atomic_sub.cl | 20 +-- 46 files changed, 505 insertions(+), 524 deletions(-) diff --git a/libclc/generic/lib/atomic/atomic_add.cl b/libclc/generic/lib/atomic/atomic_add.cl index 7d26c50f66d55..340e4445241f1 100644 --- a/libclc/generic/lib/atomic/atomic_add.cl +++ b/libclc/generic/lib/atomic/atomic_add.cl @@ -1,9 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_and.cl b/libclc/generic/lib/atomic/atomic_and.cl index 3e7fd41f8cf0f..5e162e7e66901 100644 --- a/libclc/generic/lib/atomic/atomic_and.cl +++ b/libclc/generic/lib/atomic/atomic_and.cl @@ -1,9 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_cmpxchg.cl b/libclc/generic/lib/atomic/atomic_cmpxchg.cl index f41f8c07881a9..25ceaf6cd8d4f 100644 --- a/libclc/generic/lib/atomic/atomic_cmpxchg.cl +++ b/libclc/generic/lib/atomic/atomic_cmpxchg.cl @@ -1,11 +1,12 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ - SequentiallyConsistent, SequentiallyConsistent, val, cmp); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, \ + SequentiallyConsistent, val, cmp); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_dec.cl b/libclc/generic/lib/atomic/atomic_dec.cl index 77555c2243012..68fc37fad022a 100644 --- a/libclc/generic/lib/atomic/atomic_dec.cl +++ b/libclc/generic/lib/atomic/atomic_dec.cl @@ -1,10 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ - return __spirv_AtomicIDecrement( \ - (AS TYPE *)p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ + return __spirv_AtomicIDecrement((AS TYPE *)p, Device, \ + SequentiallyConsistent); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_inc.cl b/libclc/generic/lib/atomic/atomic_inc.cl index 65e49aa765d2d..5c92c4d800cf2 100644 --- a/libclc/generic/lib/atomic/atomic_inc.cl +++ b/libclc/generic/lib/atomic/atomic_inc.cl @@ -1,10 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ - return __spirv_AtomicIIncrement( \ - (AS TYPE *)p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ + return __spirv_AtomicIIncrement((AS TYPE *)p, Device, \ + SequentiallyConsistent); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_max.cl b/libclc/generic/lib/atomic/atomic_max.cl index 4446c780557e8..b2ec938c43638 100644 --- a/libclc/generic/lib/atomic/atomic_max.cl +++ b/libclc/generic/lib/atomic/atomic_max.cl @@ -1,9 +1,9 @@ #include #include -#define IMPL(TYPE, AS, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ - return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } IMPL(int, global, __spirv_AtomicSMax) diff --git a/libclc/generic/lib/atomic/atomic_min.cl b/libclc/generic/lib/atomic/atomic_min.cl index 833ba068d810a..86208e21ab110 100644 --- a/libclc/generic/lib/atomic/atomic_min.cl +++ b/libclc/generic/lib/atomic/atomic_min.cl @@ -1,9 +1,9 @@ #include #include -#define IMPL(TYPE, AS, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ - return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } IMPL(int, global, __spirv_AtomicSMin) diff --git a/libclc/generic/lib/atomic/atomic_or.cl b/libclc/generic/lib/atomic/atomic_or.cl index 4ec9fe3a1b846..3454d5ea7c4cd 100644 --- a/libclc/generic/lib/atomic/atomic_or.cl +++ b/libclc/generic/lib/atomic/atomic_or.cl @@ -1,9 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_sub.cl b/libclc/generic/lib/atomic/atomic_sub.cl index 35c772eabff15..a2a7d06ebda43 100644 --- a/libclc/generic/lib/atomic/atomic_sub.cl +++ b/libclc/generic/lib/atomic/atomic_sub.cl @@ -1,9 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_xchg.cl b/libclc/generic/lib/atomic/atomic_xchg.cl index 7a0838fdc839c..706e8c76aacff 100644 --- a/libclc/generic/lib/atomic/atomic_xchg.cl +++ b/libclc/generic/lib/atomic/atomic_xchg.cl @@ -2,16 +2,19 @@ #include _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { - return __spirv_AtomicExchange((global float *)p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((global float *)p, Device, + SequentiallyConsistent, val); } _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { - return __spirv_AtomicExchange((local float *)p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((local float *)p, Device, + SequentiallyConsistent, val); } -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicExchange((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/atomic/atomic_xor.cl b/libclc/generic/lib/atomic/atomic_xor.cl index 7f59038f5cb72..571a0d0282484 100644 --- a/libclc/generic/lib/atomic/atomic_xor.cl +++ b/libclc/generic/lib/atomic/atomic_xor.cl @@ -1,9 +1,10 @@ #include #include -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(int, global) diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl index 305664b4721d2..c48a11e5aa23c 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl index c53909e9353ff..377f64d06b582 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl @@ -3,11 +3,12 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ - SequentiallyConsistent, SequentiallyConsistent, cmp, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, \ + SequentiallyConsistent, cmp, val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl index f09bd9db65e02..3ccd958cc2de9 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl index f3dc41dca4873..00fd573376965 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicExchange((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl index dab117c9ac382..0f5c3057bcb19 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl index b305c2d66225b..0462bed336dcf 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl @@ -3,9 +3,9 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, AS, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ - return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } IMPL(long, global, __spirv_AtomicSMax) diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl index 53a00856923d8..f9ea77fad0b77 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl @@ -3,9 +3,9 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, AS, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ - return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } IMPL(long, global, __spirv_AtomicSMin) diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl index d9cc70084c0dc..c130a9468635a 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(long, global) diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl index 62782ee5fdbf2..af3d6452470ca 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl @@ -3,9 +3,10 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ - return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } IMPL(long, global) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl index 2a91f6aa3b66c..be59bec58fa77 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl @@ -17,16 +17,14 @@ AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned int, __hip_atomic_fetch_add) AMDGPU_ATOMIC(__spirv_AtomicIAdd, long, __hip_atomic_fetch_add) AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned long, __hip_atomic_fetch_add) -#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ - _CLC_OVERLOAD _CLC_DECL float \ - __spirv_AtomicFAddEXT( \ - AS float *p, int scope, \ - int semantics, float val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL float __spirv_AtomicFAddEXT( \ + AS float *p, int scope, int semantics, float val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ } // Global AS atomics can be unsafe for malloc shared atomics, so should be opt @@ -41,29 +39,23 @@ AMDGPU_ATOMIC_FP32_ADD_IMPL(local, AMDGPU_ARCH_GEQ(8000), AMDGPU_ATOMIC_FP32_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f32) -#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, CHECK, \ - NEW_BUILTIN) \ - _CLC_OVERLOAD _CLC_DECL double \ - __spirv_AtomicFAddEXT( \ - AS double *p, int scope, \ - int semantics, double val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - AS long *int_pointer = (AS long *)p; \ - long old_int_val = 0, new_int_val = 0; \ - do { \ - old_int_val = \ - __spirv_AtomicLoad( \ - int_pointer, scope, semantics); \ - double new_double_val = *(double *)&old_int_val + val; \ - new_int_val = *(long *)&new_double_val; \ - } while ( \ - __spirv_AtomicCompareExchange( \ - int_pointer, scope, semantics, semantics, new_int_val, \ - old_int_val) != old_int_val); \ - \ - return *(double *)&old_int_val; \ +#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL double __spirv_AtomicFAddEXT( \ + AS double *p, int scope, int semantics, double val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + AS long *int_pointer = (AS long *)p; \ + long old_int_val = 0, new_int_val = 0; \ + do { \ + old_int_val = __spirv_AtomicLoad(int_pointer, scope, semantics); \ + double new_double_val = *(double *)&old_int_val + val; \ + new_int_val = *(long *)&new_double_val; \ + } while (__spirv_AtomicCompareExchange(int_pointer, scope, semantics, \ + semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return *(double *)&old_int_val; \ } #ifdef cl_khr_int64_base_atomics @@ -74,8 +66,7 @@ AMDGPU_ATOMIC_FP64_ADD_IMPL( AMDGPU_ARCH_BETWEEN(9010, 10000) && __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_FP_ATOMICS"), __builtin_amdgcn_global_atomic_fadd_f64) -AMDGPU_ATOMIC_FP64_ADD_IMPL(local, - AMDGPU_ARCH_BETWEEN(9010, 10000), +AMDGPU_ATOMIC_FP64_ADD_IMPL(local, AMDGPU_ARCH_BETWEEN(9010, 10000), __builtin_amdgcn_ds_atomic_fadd_f64) AMDGPU_ATOMIC_FP64_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f64) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl index 96852b22b3dad..6a6b39f9c70ca 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl @@ -10,34 +10,31 @@ #include #include -#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicCompareExchange( \ - AS TYPE *p, int scope, \ - int success_semantics, \ - int failure_semantics, TYPE desired, \ - TYPE expected) { \ - int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ - memory_order_success) \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ - memory_order_failure) \ - __hip_atomic_compare_exchange_strong(p, &expected, desired, \ - memory_order_success, \ - memory_order_failure, atomic_scope); \ - /* If cmpxchg \ - * succeeds: \ - - `expected` is unchanged, holding the old val that was at `p` \ - - `p` is changed to hold `desired` \ - * fails: \ - - `expected` is changed to hold the current val at `p` \ - - `p` is unchanged*/ \ - return expected; \ +#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicCompareExchange( \ + AS TYPE *p, int scope, int success_semantics, int failure_semantics, \ + TYPE desired, TYPE expected) { \ + int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ + memory_order_success) \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ + memory_order_failure) \ + __hip_atomic_compare_exchange_strong(p, &expected, desired, \ + memory_order_success, \ + memory_order_failure, atomic_scope); \ + /* If cmpxchg \ + * succeeds: \ + - `expected` is unchanged, holding the old val that was at `p` \ + - `p` is changed to hold `desired` \ + * fails: \ + - `expected` is changed to hold the current val at `p` \ + - `p` is unchanged*/ \ + return expected; \ } -#define AMDGPU_ATOMIC_CMPXCHG(TYPE) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, global) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, local) \ +#define AMDGPU_ATOMIC_CMPXCHG(TYPE) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, local) \ AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, ) AMDGPU_ATOMIC_CMPXCHG(int) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl index ca0c56405c4bb..6e03d2fd9e0e9 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl @@ -10,17 +10,17 @@ #include #include -#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicLoad(AS TYPE *p, int scope, int semantics) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_load(p, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicLoad(AS TYPE *p, int scope, \ + int semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_load(p, memory_order, atomic_scope); \ } -#define AMDGPU_ATOMIC_LOAD(TYPE) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, global) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, local) \ +#define AMDGPU_ATOMIC_LOAD(TYPE) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, local) \ AMDGPU_ATOMIC_LOAD_IMPL(TYPE, ) AMDGPU_ATOMIC_LOAD(int) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl index 29cd1602b1bbb..71b7382fec8d4 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl @@ -24,12 +24,12 @@ AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, ) #ifdef cl_khr_int64_base_atomics AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, global, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_global_atomic_fmin_f64) + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_global_atomic_fmin_f64) AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, long, local) AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, , - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_flat_atomic_fmin_f64) + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_flat_atomic_fmin_f64) #endif #undef AMDGPU_ATOMIC diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl index eafbacaefd8bf..80adc8f7058e0 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl @@ -10,18 +10,18 @@ #include #include -#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, AS) \ - _CLC_OVERLOAD _CLC_DEF void \ - __spirv_AtomicStore(AS TYPE *p, int scope, int semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - __hip_atomic_store(p, val, memory_order, atomic_scope); \ - return; \ +#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF void __spirv_AtomicStore(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ } -#define AMDGPU_ATOMIC_STORE(TYPE) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, global) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, local) \ +#define AMDGPU_ATOMIC_STORE(TYPE) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, local) \ AMDGPU_ATOMIC_STORE_IMPL(TYPE, ) AMDGPU_ATOMIC_STORE(int) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl index 7b2162ed3c9e4..2df7315844d1b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl @@ -10,19 +10,17 @@ #include #include -#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - FUNC_NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, -val, memory_order); \ +#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, -val, memory_order); \ } -#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, global, \ - BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, local, \ - BUILTIN) \ +#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, global, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, local, BUILTIN) \ AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, , BUILTIN) AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, int, __atomic_fetch_add) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl index 56a2996e6d312..13922673e092a 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl @@ -11,11 +11,9 @@ #include AMDGPU_ATOMIC(__spirv_AtomicExchange, int, __hip_atomic_exchange) -AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned int, - __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned int, __hip_atomic_exchange) AMDGPU_ATOMIC(__spirv_AtomicExchange, long, __hip_atomic_exchange) -AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned long, - __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned long, __hip_atomic_exchange) AMDGPU_ATOMIC(__spirv_AtomicExchange, float, __hip_atomic_exchange) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/generic/atomic/atomic_add.cl b/libclc/libspirv/lib/generic/atomic/atomic_add.cl index 1f051cc650199..96b170d263f70 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_add.cl @@ -8,10 +8,10 @@ #include -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicIAdd(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicIAdd(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_fetch_and_add) @@ -28,8 +28,7 @@ IMPL(unsigned long, local, __sync_fetch_and_add_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_fetch_and_add) IMPL_GENERIC(unsigned int, __sync_fetch_and_add) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_and.cl b/libclc/libspirv/lib/generic/atomic/atomic_and.cl index e8e1d4d96e55a..a332fe17d793e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_and.cl @@ -8,10 +8,10 @@ #include -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicAnd(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicAnd(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_fetch_and_and) @@ -28,8 +28,7 @@ IMPL(unsigned long, local, __sync_fetch_and_and_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_fetch_and_and) IMPL_GENERIC(unsigned int, __sync_fetch_and_and) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl index a16bbe48c8810..cce74d0e7b91e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl @@ -8,52 +8,60 @@ #include -_CLC_OVERLOAD _CLC_DEF int -__spirv_AtomicCompareExchange( - local int *p, int scope, int eq, int neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(local int *p, + int scope, int eq, + int neq, int val, + int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF int -__spirv_AtomicCompareExchange( - global int *p, int scope, int eq, int neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(global int *p, + int scope, int eq, + int neq, int val, + int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF uint -__spirv_AtomicCompareExchange( - local uint *p, int scope, int eq, int neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(local uint *p, + int scope, int eq, + int neq, uint val, + uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF uint -__spirv_AtomicCompareExchange( - global uint *p, int scope, int eq, int neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(global uint *p, + int scope, int eq, + int neq, uint val, + uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_OVERLOAD _CLC_DEF long -__spirv_AtomicCompareExchange( - local long *p, int scope, int eq, int neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(local long *p, + int scope, int eq, + int neq, long val, + long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF long -__spirv_AtomicCompareExchange( - global long *p, int scope, int eq, int neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(global long *p, + int scope, int eq, + int neq, long val, + long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF ulong -__spirv_AtomicCompareExchange( - local ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(local ulong *p, + int scope, int eq, + int neq, ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF ulong -__spirv_AtomicCompareExchange( - global ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(global ulong *p, + int scope, int eq, + int neq, ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } @@ -61,29 +69,30 @@ __spirv_AtomicCompareExchange( #if _CLC_GENERIC_AS_SUPPORTED -_CLC_OVERLOAD _CLC_DEF int -__spirv_AtomicCompareExchange( - int *p, int scope, int eq, int neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(int *p, int scope, + int eq, int neq, + int val, int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF uint -__spirv_AtomicCompareExchange( - uint *p, int scope, int eq, int neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(uint *p, int scope, + int eq, int neq, + uint val, uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_OVERLOAD _CLC_DEF long -__spirv_AtomicCompareExchange( - long *p, int scope, int eq, int neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(long *p, int scope, + int eq, int neq, + long val, long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_OVERLOAD _CLC_DEF ulong -__spirv_AtomicCompareExchange( - ulong *p, int scope, int eq, int neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(ulong *p, int scope, + int eq, int neq, + ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } diff --git a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl index c2253992689af..6363432bc4958 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl @@ -8,44 +8,46 @@ #include -_CLC_OVERLOAD _CLC_DECL int -__spirv_AtomicIDecrement(local int *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(local int *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_OVERLOAD _CLC_DECL int -__spirv_AtomicIDecrement(global int *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(global int *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_OVERLOAD _CLC_DECL uint -__spirv_AtomicIDecrement(local uint *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(local uint *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (uint)1); } -_CLC_OVERLOAD _CLC_DECL uint -__spirv_AtomicIDecrement(global uint *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(global uint *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (uint)1); } #ifdef cl_khr_int64_base_atomics -_CLC_OVERLOAD _CLC_DECL long -__spirv_AtomicIDecrement(local long *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(local long *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_OVERLOAD _CLC_DECL long -__spirv_AtomicIDecrement(global long *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(global long *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_OVERLOAD _CLC_DECL ulong -__spirv_AtomicIDecrement(local ulong *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIDecrement(local ulong *p, + int scope, + int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } -_CLC_OVERLOAD _CLC_DECL ulong -__spirv_AtomicIDecrement(global ulong *p, int scope, int semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIDecrement(global ulong *p, + int scope, + int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_load.cl b/libclc/libspirv/lib/generic/atomic/atomic_load.cl index 9d42e0e0046ac..7e6b341a9c483 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_load.cl @@ -8,26 +8,27 @@ #include -#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(AS const TYPE *); - -#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicLoad(AS TYPE *p, int scope, int semantics) { \ - if (semantics & Acquire) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ - } \ - if (semantics & SequentiallyConsistent) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ - } \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ + TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER( \ + AS const TYPE *); + +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS TYPE *p, int scope, \ + int semantics) { \ + if (semantics & Acquire) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ + } \ + if (semantics & SequentiallyConsistent) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ + } \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ } -#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ IMPL(TYPE, local, PREFIX, BYTE_SIZE) IMPL_AS(int, , 4) @@ -40,8 +41,7 @@ IMPL_AS(unsigned long, u, 8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, , PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) IMPL(TYPE, , PREFIX, BYTE_SIZE) IMPL_GENERIC(int, , 4) IMPL_GENERIC(unsigned int, u, 4) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_max.cl b/libclc/libspirv/lib/generic/atomic/atomic_max.cl index fdcca68a23b2f..90f0f340bb3d2 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_max.cl @@ -8,10 +8,10 @@ #include -#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } IMPL(int, global, __spirv_AtomicSMax, , max) @@ -33,8 +33,7 @@ IMPL(unsigned long, local, __spirv_AtomicUMax, __clc, umax_local_8) #if _CLC_GENERIC_AS_SUPPORTED - -#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ IMPL(TYPE, , NAME, PREFIX, SUFFIX) IMPL_GENERIC(int, __spirv_AtomicSMax, , max) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_min.cl b/libclc/libspirv/lib/generic/atomic/atomic_min.cl index f39a0722a7a90..d3df63524c916 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_min.cl @@ -8,15 +8,15 @@ #include -#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - NAME(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } IMPL(int, global, __spirv_AtomicSMin, , min) IMPL(unsigned int, global, __spirv_AtomicUMin, , umin) -IMPL(int, local, __spirv_AtomicSMin, , min) +IMPL(int, local, __spirv_AtomicSMin, , min) IMPL(unsigned int, local, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics @@ -33,8 +33,7 @@ IMPL(unsigned long, local, __spirv_AtomicUMin, __clc, umin_local_8) #if _CLC_GENERIC_AS_SUPPORTED - -#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ IMPL(TYPE, , NAME, PREFIX, SUFFIX) IMPL_GENERIC(int, __spirv_AtomicSMin, , min) @@ -46,7 +45,7 @@ unsigned long __clc__sync_fetch_and_min_generic_8(volatile generic long *, long) unsigned long __clc__sync_fetch_and_umin_generic_8(volatile __generic unsigned long *, unsigned long); IMPL_GENERIC(long, __spirv_AtomicSMin, __clc, min_generic_8) -IMPL_GENERIC(unsigned long, __spirv_AtomicUMin, __clc, umin_generic_8) +IMPL_GENERIC(unsigned long, __spirv_AtomicUMin, __clc, umin_generic_8) #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_or.cl b/libclc/libspirv/lib/generic/atomic/atomic_or.cl index 2939869c4e683..d2e78a062d78b 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_or.cl @@ -8,15 +8,15 @@ #include -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicOr(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicOr(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_fetch_and_or) IMPL(unsigned int, global, __sync_fetch_and_or) -IMPL(int, local, __sync_fetch_and_or) +IMPL(int, local, __sync_fetch_and_or) IMPL(unsigned int, local, __sync_fetch_and_or) #ifdef cl_khr_int64_extended_atomics @@ -28,8 +28,7 @@ IMPL(unsigned long, local, __sync_fetch_and_or_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_fetch_and_or) IMPL_GENERIC(unsigned int, __sync_fetch_and_or) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_store.cl b/libclc/libspirv/lib/generic/atomic/atomic_store.cl index 62c673442a804..00c732da06b3e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_store.cl @@ -8,38 +8,37 @@ #include -_CLC_OVERLOAD _CLC_DECL void -__spirv_AtomicStore(global float *p, int scope, int semantics, float val) { +_CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(global float *p, int scope, + int semantics, float val) { __spirv_AtomicStore((global uint *)p, scope, semantics, __clc_as_uint(val)); } -_CLC_OVERLOAD _CLC_DECL void -__spirv_AtomicStore(local float *p, int scope, int semantics, float val) { +_CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(local float *p, int scope, + int semantics, float val) { __spirv_AtomicStore((local uint *)p, scope, semantics, __clc_as_uint(val)); } -#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(AS const TYPE *, TYPE); +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ + TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER( \ + AS const TYPE *, TYPE); -#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_OVERLOAD _CLC_DECL void \ - __spirv_AtomicStore( \ - AS TYPE *p, int scope, \ - int semantics, TYPE val) { \ - if (semantics == Release) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ - } else if (semantics == SequentiallyConsistent) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ - } else { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ - } \ +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + if (semantics == Release) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ + } else if (semantics == SequentiallyConsistent) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ + } else { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ + } \ } -#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ IMPL(TYPE, local, PREFIX, BYTE_SIZE) IMPL_AS(int, , 4) @@ -52,8 +51,7 @@ IMPL_AS(unsigned long, u, 8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, , PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) IMPL(TYPE, , PREFIX, BYTE_SIZE) IMPL_GENERIC(int, , 4) IMPL_GENERIC(unsigned int, u, 4) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl index e1d3e5eed67c3..42cdd416aa119 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl @@ -8,10 +8,10 @@ #include -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicISub(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicISub(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_fetch_and_sub) @@ -28,8 +28,7 @@ IMPL(unsigned long, local, __sync_fetch_and_sub_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_fetch_and_sub) IMPL_GENERIC(unsigned int, __sync_fetch_and_sub) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl index 605d2852d2696..f71d7d5596310 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl @@ -8,26 +8,22 @@ #include -_CLC_OVERLOAD _CLC_DEF float -__spirv_AtomicExchange( - global float *p, int scope, - int semantics, float val) { - return __clc_as_float(__spirv_AtomicExchange( - (global uint *)p, scope, semantics, __clc_as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float __spirv_AtomicExchange(global float *p, int scope, + int semantics, float val) { + return __clc_as_float(__spirv_AtomicExchange((global uint *)p, scope, + semantics, __clc_as_uint(val))); } -_CLC_OVERLOAD _CLC_DEF float -__spirv_AtomicExchange( - local float *p, int scope, - int semantics, float val) { - return __clc_as_float(__spirv_AtomicExchange( - (local uint *)p, scope, semantics, __clc_as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float __spirv_AtomicExchange(local float *p, int scope, + int semantics, float val) { + return __clc_as_float(__spirv_AtomicExchange((local uint *)p, scope, + semantics, __clc_as_uint(val))); } -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE \ - __spirv_AtomicExchange(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicExchange( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_swap_4) @@ -44,8 +40,7 @@ IMPL(unsigned long, local, __sync_swap_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_swap_4) IMPL_GENERIC(unsigned int, __sync_swap_4) diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl index 6afd5350f2275..860dcb189e1ff 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl @@ -8,10 +8,10 @@ #include -#define IMPL(TYPE, AS, FN_NAME) \ - _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicXor(AS TYPE *p, int scope, int semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicXor(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } IMPL(int, global, __sync_fetch_and_xor) @@ -28,8 +28,7 @@ IMPL(unsigned long, local, __sync_fetch_and_xor_8) #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, FN_NAME) \ - IMPL(TYPE, , FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) IMPL_GENERIC(int, __sync_fetch_and_xor) IMPL_GENERIC(unsigned int, __sync_fetch_and_xor) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl index b84077958276b..2ecc9762bdf46 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl @@ -18,88 +18,83 @@ __CLC_NVVM_ATOMIC(ulong, long, l, add, __spirv_AtomicIAdd) __CLC_NVVM_ATOMIC(float, float, f, add, __spirv_AtomicFAddEXT) #ifdef cl_khr_int64_base_atomics -#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, \ - ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL double \ - __spirv_AtomicFAddEXT(ADDR_SPACE double *pointer, int scope, \ - int semantics, double value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 600) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - break; \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case SequentiallyConsistent: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_FENCE_SC_SM70() \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - break; \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ - } else { \ - int load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - ADDR_SPACE long *pointer_int = \ - (ADDR_SPACE long *)pointer; \ - long old_int; \ - long new_val_int; \ - do { \ - old_int = \ - __spirv_AtomicLoad( \ - pointer_int, scope, load_order); \ - double new_val = *(double *)&old_int + *(double *)&value; \ - new_val_int = *(long *)&new_val; \ - } while ( \ - __spirv_AtomicCompareExchange( \ - pointer_int, scope, semantics, semantics, new_val_int, \ - old_int) != old_int); \ - return *(double *)&old_int; \ - } \ +#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL double \ + __spirv_AtomicFAddEXT(ADDR_SPACE double *pointer, int scope, int semantics, \ + double value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 600) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + break; \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } else { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE long *pointer_int = (ADDR_SPACE long *)pointer; \ + long old_int; \ + long new_val_int; \ + do { \ + old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + double new_val = *(double *)&old_int + *(double *)&value; \ + new_val_int = *(long *)&new_val; \ + } while (__spirv_AtomicCompareExchange(pointer_int, scope, semantics, \ + semantics, new_val_int, \ + old_int) != old_int); \ + return *(double *)&old_int; \ + } \ } __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, _gen_) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl index 495b5ba3a9898..ab8f06a4acbe5 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl @@ -74,13 +74,12 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); // Type __spirv_AtomicCompareExchange(AS Type *P, int S, int E, int U, // Type V, Type C); -#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED, ADDR_SPACE, ADDR_SPACE_NV) \ +#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + OP_MANGLED, ADDR_SPACE, ADDR_SPACE_NV) \ __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_Atomic##OP_MANGLED( \ - ADDR_SPACE TYPE *pointer, int scope, \ - int semantics1, \ - int semantics2, TYPE cmp, TYPE value) { \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics1, int semantics2, TYPE cmp, \ + TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics1 & 0x1F; \ @@ -134,8 +133,8 @@ Memory order is stored in the lowest 5 bits */ \ __global, _global_) \ __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ __local, _shared_) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ - , _gen_) + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, , \ + _gen_) __CLC_NVVM_ATOMIC_CAS(int, int, i, cas, CompareExchange) __CLC_NVVM_ATOMIC_CAS(long, long, l, cas, CompareExchange) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl index 9c2ccd70cfaab..da23ef23d3635 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl @@ -36,13 +36,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); } \ } -#define __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_NV) \ +#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV) \ __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_AtomicLoad( \ - ADDR_SPACE TYPE *pointer, int scope, \ - int semantics) { \ + __spirv_AtomicLoad(ADDR_SPACE TYPE *pointer, int scope, int semantics) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -77,13 +74,12 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - __global, _global_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - __local, _shared_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , \ - _gen_) +#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, \ + _shared_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , _gen_) __CLC_NVVM_ATOMIC_LOAD(int, int, i) __CLC_NVVM_ATOMIC_LOAD(uint, int, i) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl index ec630a4c3838d..f67a257965bb1 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl @@ -12,52 +12,48 @@ __CLC_NVVM_ATOMIC(int, int, i, max, __spirv_AtomicSMax) __CLC_NVVM_ATOMIC(long, long, l, max, __spirv_AtomicSMax) -__CLC_NVVM_ATOMIC(unsigned int, unsigned int, ui, max, - __spirv_AtomicUMax) -__CLC_NVVM_ATOMIC(unsigned long, unsigned long, ul, max, - __spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(unsigned int, unsigned int, ui, max, __spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(unsigned long, unsigned long, ul, max, __spirv_AtomicUMax) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ - TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ - __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ - int semantics, TYPE val) { \ - int load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - ADDR_SPACE TYPE_INT *pointer_int = \ - (ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val > old) { \ - TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE TYPE_INT *pointer_int = (ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } -#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_INT, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ +#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) __CLC_NVVM_ATOMIC_MAX(float, int, FMaxEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl index 6eba6c44591cd..60fe3d0329c65 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl @@ -19,45 +19,42 @@ __CLC_NVVM_ATOMIC(ulong, ulong, ul, min, __spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ - TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ - __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_Atomic##OP_MANGLED( \ - ADDR_SPACE TYPE *pointer, int scope, \ - int semantics, TYPE val) { \ - int load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - ADDR_SPACE TYPE_INT *pointer_int = \ - (ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val < old) { \ - TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE TYPE_INT *pointer_int = (ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } -#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_INT, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) +#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) __CLC_NVVM_ATOMIC_MIN(float, int, FMinEXT) __CLC_NVVM_ATOMIC_MIN(double, long, FMinEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl index daa14beaf69c5..3382366a38ade 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl @@ -36,12 +36,11 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); } \ } -#define __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_NV) \ +#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV) \ __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL void \ - __spirv_AtomicStore(ADDR_SPACE TYPE *pointer, int scope, \ - int semantics, TYPE value) { \ + __spirv_AtomicStore(ADDR_SPACE TYPE *pointer, int scope, int semantics, \ + TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -80,9 +79,11 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, _global_) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, _shared_) \ +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, \ + _shared_) \ __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , _gen_) __CLC_NVVM_ATOMIC_STORE(int, int, i) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl index 999641e3b65d2..1aa10828bee28 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl @@ -9,18 +9,18 @@ #include #include -#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ - _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd( \ - ADDR_SPACE TYPE *, int, int, TYPE); \ - __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ - __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ - int semantics, TYPE val) { \ - return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd(ADDR_SPACE TYPE *, int, int, \ + TYPE); \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ } -#define __CLC_NVVM_ATOMIC_SUB(TYPE, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __global) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __local) \ +#define __CLC_NVVM_ATOMIC_SUB(TYPE, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __local) \ __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ) __CLC_NVVM_ATOMIC_SUB(int, ISub)