From d79ea0a533fdfc206d77ef1e36550f71e9ca07a8 Mon Sep 17 00:00:00 2001 From: haonanya Date: Thu, 19 Aug 2021 16:39:04 +0800 Subject: [PATCH] [OpenCL] Fix bugs on cl_ext_float_atomics patch 1. Update patch because of OpenCL-Docs changes. 2. Fix bugs about incorrect atomic function translation on SPIRV. Signed-off-by: haonanya Signed-off-by: Haonan Yang --- ...-OpenCL-support-cl_ext_float_atomics.patch | 36 +++-- ...-cl_ext_float_atomics-in-SPIRVWriter.patch | 127 +++++++----------- 2 files changed, 63 insertions(+), 100 deletions(-) diff --git a/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch b/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch index 215ac279..0d358520 100644 --- a/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch +++ b/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch @@ -1,26 +1,25 @@ -From baa0fc843cd55f9da25afbc576c5ae56c0b20536 Mon Sep 17 00:00:00 2001 +From 9b48f70bae77fdc752ee5e98949a7ed2c9373037 Mon Sep 17 00:00:00 2001 From: haonanya Date: Fri, 13 Aug 2021 10:00:02 +0800 Subject: [PATCH] [OpenCL] support cl_ext_float_atomics Signed-off-by: haonanya +Signed-off-by: Haonan Yang --- - clang/lib/Headers/opencl-c-base.h | 25 ++++ + clang/lib/Headers/opencl-c-base.h | 22 +++ clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++ clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++ - 3 files changed, 329 insertions(+) + 3 files changed, 326 insertions(+) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h -index 2cc688ccc3da..86bbee12fdf8 100644 +index 2cc688ccc3da..18d367de68ec 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h -@@ -14,6 +14,31 @@ +@@ -14,6 +14,28 @@ #define CL_VERSION_3_0 300 #endif +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+// For SPIR all extensions are supported. -+#if defined(__SPIR__) +#define cl_ext_float_atomics 1 +#ifdef cl_khr_fp16 +#define __opencl_c_ext_fp16_global_atomic_load_store 1 @@ -30,7 +29,7 @@ index 2cc688ccc3da..86bbee12fdf8 100644 +#define __opencl_c_ext_fp16_global_atomic_min_max 1 +#define __opencl_c_ext_fp16_local_atomic_min_max 1 +#endif -+#ifdef __opencl_c_fp64 ++#ifdef cl_khr_fp64 +#define __opencl_c_ext_fp64_global_atomic_add 1 +#define __opencl_c_ext_fp64_local_atomic_add 1 +#define __opencl_c_ext_fp64_global_atomic_min_max 1 @@ -40,14 +39,13 @@ index 2cc688ccc3da..86bbee12fdf8 100644 +#define __opencl_c_ext_fp32_local_atomic_add 1 +#define __opencl_c_ext_fp32_global_atomic_min_max 1 +#define __opencl_c_ext_fp32_local_atomic_min_max 1 -+#endif // defined(__SPIR__) +#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + // Define features for 2.0 for header backward compatibility #ifndef __opencl_c_int64 #define __opencl_c_int64 1 diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index d8173f0aa843..90944fe2d7e6 100644 +index d8173f0aa843..50515ac17a0c 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit( @@ -90,7 +88,7 @@ index d8173f0aa843..90944fe2d7e6 100644 + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) + -+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ ++#if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); @@ -104,7 +102,7 @@ index d8173f0aa843..90944fe2d7e6 100644 +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ ++#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) @@ -141,7 +139,7 @@ index d8173f0aa843..90944fe2d7e6 100644 + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) + -+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ ++#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); @@ -155,7 +153,7 @@ index d8173f0aa843..90944fe2d7e6 100644 +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ ++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) @@ -192,7 +190,7 @@ index d8173f0aa843..90944fe2d7e6 100644 + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_add) + -+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ ++#if defined(__opencl_c_ext_fp32_global_atomic_add) && \ + defined(__opencl_c_ext_fp32_local_atomic_add) +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); @@ -206,7 +204,7 @@ index d8173f0aa843..90944fe2d7e6 100644 +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \ ++#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ + defined(__opencl_c_ext_fp32_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) @@ -243,7 +241,7 @@ index d8173f0aa843..90944fe2d7e6 100644 + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_add) + -+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ ++#if defined(__opencl_c_ext_fp64_global_atomic_add) && \ + defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); @@ -257,7 +255,7 @@ index d8173f0aa843..90944fe2d7e6 100644 +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \ ++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \ + defined(__opencl_c_ext_fp64_local_atomic_add) + +#endif // cl_ext_float_atomics @@ -370,5 +368,5 @@ index 2716076acdcf..7f720cf28142 100644 + +#endif // defined(__SPIR__) -- -2.17.1 +2.18.1 diff --git a/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch b/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch index 174dac9d..43d99be1 100644 --- a/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch +++ b/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch @@ -1,11 +1,12 @@ -From 3f41fcc74ab5f8a153bd04850b7001aadc10be62 Mon Sep 17 00:00:00 2001 +From 5b21454c542aea71a447afb5a652a713cf53b111 Mon Sep 17 00:00:00 2001 From: haonanya Date: Mon, 19 Jul 2021 10:14:20 +0800 Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter Signed-off-by: haonanya --- - lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++-- + lib/SPIRV/OCL20ToSPIRV.cpp | 25 +++++- + lib/SPIRV/OCLUtil.cpp | 4 - lib/SPIRV/SPIRVToOCL.h | 3 + lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++ lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++- @@ -18,84 +19,24 @@ Signed-off-by: haonanya test/AtomicFMinEXT.ll | 113 +++++++------------------- test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++ test/InvalidAtomicBuiltins.cl | 8 -- - 13 files changed, 417 insertions(+), 260 deletions(-) + 14 files changed, 366 insertions(+), 261 deletions(-) create mode 100644 test/AtomicFAddEXTForOCL.ll create mode 100644 test/AtomicFMaxEXTForOCL.ll create mode 100644 test/AtomicFMinEXTForOCL.ll diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp -index e30aa5be..b676a009 100644 +index e30aa5be..faa5be2b 100644 --- a/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/lib/SPIRV/OCL20ToSPIRV.cpp -@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { +@@ -407,7 +407,6 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { + } if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 || DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) { - -- // Compute atomic builtins do not support floating types. -- if (CI.getType()->isFloatingPointTy() && -- isComputeAtomicOCLBuiltin(DemangledName)) -- return; -+ // Compute "atom" prefixed builtins do not support floating types. -+ if (CI.getType()->isFloatingPointTy()) { -+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) -+ return; -+ // handle functions which are "atomic_" prefixed. -+ StringRef Stem = DemangledName; -+ Stem = Stem.drop_front(strlen("atomic_")); -+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor, -+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit, -+ // fetch_and_explicit} should be identified as function call -+ bool IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("add", true) -+ .Case("sub", true) -+ .Case("inc", true) -+ .Case("dec", true) -+ .Case("cmpxchg", true) -+ .Case("min", true) -+ .Case("max", true) -+ .Case("or", true) -+ .Case("xor", true) -+ .Case("and", true) -+ .Case("fetch_or", true) -+ .Case("fetch_and", true) -+ .Case("fetch_xor", true) -+ .Case("fetch_or_explicit", true) -+ .Case("fetch_xor_explicit", true) -+ .Case("fetch_and_explicit", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ if (F->arg_size() != 2) { -+ IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("exchange", true) -+ .Case("fetch_add", true) -+ .Case("fetch_sub", true) -+ .Case("fetch_min", true) -+ .Case("fetch_max", true) -+ .Case("load", true) -+ .Case("store", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ } -+ if (F->arg_size() != 3 && F->arg_size() != 4) { -+ IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("exchange_explicit", true) -+ .Case("fetch_add_explicit", true) -+ .Case("fetch_sub_explicit", true) -+ .Case("fetch_min_explicit", true) -+ .Case("fetch_max_explicit", true) -+ .Case("load_explicit", true) -+ .Case("store_explicit", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ } -+ } - - auto PCI = &CI; - if (DemangledName == kOCLBuiltinName::AtomicInit) { -@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { +- + // Compute atomic builtins do not support floating types. + if (CI.getType()->isFloatingPointTy() && + isComputeAtomicOCLBuiltin(DemangledName)) +@@ -819,7 +818,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstSPIRV( M, CI, @@ -104,7 +45,7 @@ index e30aa5be..b676a009 100644 Info.PostProc(Args); // Order of args in OCL20: // object, 0-2 other args, 1-2 order, scope -@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { +@@ -864,7 +863,27 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { std::rotate(Args.begin() + 2, Args.begin() + OrderIdx, Args.end() - Offset); } @@ -119,19 +60,43 @@ index e30aa5be..b676a009 100644 + getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName)); + if (!IsFPType(AtomicBuiltinsReturnType)) + return SPIRVFunctionName; -+ // Translate FP-typed atomic builtins. -+ return llvm::StringSwitch(SPIRVFunctionName) -+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT") -+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT") -+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT"); ++ // Translate FP-typed atomic builtins. Currently we only need to ++ // translate atomic_fetch_[add, max, min]* to related float instructions ++ auto SPIRFunctionNameForFloatAtomics = ++ llvm::StringSwitch(SPIRVFunctionName) ++ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT") ++ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT") ++ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT") ++ .Default("others"); ++ return SPIRFunctionNameForFloatAtomics == "others" ++ ? SPIRVFunctionName ++ : SPIRFunctionNameForFloatAtomics; }, &Attrs); } +diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp +index c7232623..9a4c8ab9 100644 +--- a/lib/SPIRV/OCLUtil.cpp ++++ b/lib/SPIRV/OCLUtil.cpp +@@ -136,13 +136,9 @@ bool isComputeAtomicOCLBuiltin(StringRef DemangledName) { + .EndsWith("and", true) + .EndsWith("or", true) + .EndsWith("xor", true) +- .EndsWith("add_explicit", true) +- .EndsWith("sub_explicit", true) + .EndsWith("or_explicit", true) + .EndsWith("xor_explicit", true) + .EndsWith("and_explicit", true) +- .EndsWith("min_explicit", true) +- .EndsWith("max_explicit", true) + .Default(false); + } + diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h -index f75195d4..64bf0f84 100644 +index ddeec0b6..006fb0b1 100644 --- a/lib/SPIRV/SPIRVToOCL.h +++ b/lib/SPIRV/SPIRVToOCL.h -@@ -171,6 +171,9 @@ public: +@@ -178,6 +178,9 @@ public: /// using separate maps for OpenCL 1.2 and OpenCL 2.0 virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0; @@ -246,7 +211,7 @@ index d829ff42..01d088e9 100644 auto ScopeIdx = Ptr + 1; auto OrderIdx = Ptr + 2; diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h -index cc1dd1ab..63180888 100644 +index 13f93fbe..7b707993 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -521,7 +521,6 @@ template <> inline void SPIRVMap::init() { @@ -258,7 +223,7 @@ index cc1dd1ab..63180888 100644 add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL"); add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL"); diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h -index 9e520512..cc2ad200 100644 +index feec70f6..8e595e83 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -54,11 +54,17 @@ template <> inline void SPIRVMap::init() {