intel · haonanya · Aug 26, 2021 · Aug 19, 2021
diff --git a/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch b/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch
@@ -1,26 +1,25 @@
-From baa0fc843cd55f9da25afbc576c5ae56c0b20536 Mon Sep 17 00:00:00 2001
+From 9b48f70bae77fdc752ee5e98949a7ed2c9373037 Mon Sep 17 00:00:00 2001
 From: haonanya <haonan.yang@intel.com>
 Date: Fri, 13 Aug 2021 10:00:02 +0800
 Subject: [PATCH] [OpenCL] support cl_ext_float_atomics
 
 Signed-off-by: haonanya <haonan.yang@intel.com>
+Signed-off-by: Haonan Yang <haonan.yang@intel.com>
 ---
- clang/lib/Headers/opencl-c-base.h     |  25 ++++
+ clang/lib/Headers/opencl-c-base.h     |  22 +++
  clang/lib/Headers/opencl-c.h          | 208 ++++++++++++++++++++++++++
  clang/test/Headers/opencl-c-header.cl |  96 ++++++++++++
- 3 files changed, 329 insertions(+)
+ 3 files changed, 326 insertions(+)
 
 diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
-index 2cc688ccc3da..86bbee12fdf8 100644
+index 2cc688ccc3da..18d367de68ec 100644
 --- a/clang/lib/Headers/opencl-c-base.h
 +++ b/clang/lib/Headers/opencl-c-base.h
-@@ -14,6 +14,31 @@
+@@ -14,6 +14,28 @@
    #define CL_VERSION_3_0 300
  #endif
 
 +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
-+// For SPIR all extensions are supported.
-+#if defined(__SPIR__)
 +#define cl_ext_float_atomics 1
 +#ifdef cl_khr_fp16
 +#define __opencl_c_ext_fp16_global_atomic_load_store 1
@@ -30,7 +29,7 @@ index 2cc688ccc3da..86bbee12fdf8 100644
 +#define __opencl_c_ext_fp16_global_atomic_min_max 1
 +#define __opencl_c_ext_fp16_local_atomic_min_max 1
 +#endif
-+#ifdef __opencl_c_fp64
++#ifdef cl_khr_fp64
 +#define __opencl_c_ext_fp64_global_atomic_add 1
 +#define __opencl_c_ext_fp64_local_atomic_add 1
 +#define __opencl_c_ext_fp64_global_atomic_min_max 1
@@ -40,14 +39,13 @@ index 2cc688ccc3da..86bbee12fdf8 100644
 +#define __opencl_c_ext_fp32_local_atomic_add 1
 +#define __opencl_c_ext_fp32_global_atomic_min_max 1
 +#define __opencl_c_ext_fp32_local_atomic_min_max 1
-+#endif // defined(__SPIR__)
 +#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 +
  // Define features for 2.0 for header backward compatibility
  #ifndef __opencl_c_int64
    #define __opencl_c_int64 1
 diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
-index d8173f0aa843..90944fe2d7e6 100644
+index d8173f0aa843..50515ac17a0c 100644
 --- a/clang/lib/Headers/opencl-c.h
 +++ b/clang/lib/Headers/opencl-c.h
 @@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit(
@@ -90,7 +88,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +                                       memory_scope scope);
 +#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
 +
-+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) ||                      \
++#if defined(__opencl_c_ext_fp32_global_atomic_min_max) &&                      \
 +    defined(__opencl_c_ext_fp32_local_atomic_min_max)
 +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
 +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
@@ -104,7 +102,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
 +                                       float operand, memory_order order,
 +                                       memory_scope scope);
-+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) ||                      \
++#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) &&                      \
 +    defined(__opencl_c_ext_fp32_local_atomic_min_max)
 +
 +#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
@@ -141,7 +139,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +                                        memory_scope scope);
 +#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
 +
-+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) ||                      \
++#if defined(__opencl_c_ext_fp64_global_atomic_min_max) &&                      \
 +    defined(__opencl_c_ext_fp64_local_atomic_min_max)
 +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
 +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
@@ -155,7 +153,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
 +                                        double operand, memory_order order,
 +                                        memory_scope scope);
-+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) ||                      \
++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) &&                      \
 +    defined(__opencl_c_ext_fp64_local_atomic_min_max)
 +
 +#if defined(__opencl_c_ext_fp32_global_atomic_add)
@@ -192,7 +190,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +                                       memory_scope scope);
 +#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
 +
-+#if defined(__opencl_c_ext_fp32_global_atomic_add) ||                          \
++#if defined(__opencl_c_ext_fp32_global_atomic_add) &&                          \
 +    defined(__opencl_c_ext_fp32_local_atomic_add)
 +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
 +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
@@ -206,7 +204,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
 +                                       float operand, memory_order order,
 +                                       memory_scope scope);
-+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) ||                          \
++#endif // defined(__opencl_c_ext_fp32_global_atomic_add) &&                          \
 +    defined(__opencl_c_ext_fp32_local_atomic_add)
 +
 +#if defined(__opencl_c_ext_fp64_global_atomic_add)
@@ -243,7 +241,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +                                        memory_scope scope);
 +#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
 +
-+#if defined(__opencl_c_ext_fp64_global_atomic_add) ||                          \
++#if defined(__opencl_c_ext_fp64_global_atomic_add) &&                          \
 +    defined(__opencl_c_ext_fp64_local_atomic_add)
 +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
 +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
@@ -257,7 +255,7 @@ index d8173f0aa843..90944fe2d7e6 100644
 +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
 +                                        double operand, memory_order order,
 +                                        memory_scope scope);
-+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) ||                          \
++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) &&                          \
 +    defined(__opencl_c_ext_fp64_local_atomic_add)
 +
 +#endif // cl_ext_float_atomics
@@ -370,5 +368,5 @@ index 2716076acdcf..7f720cf28142 100644
 +
 +#endif // defined(__SPIR__)
 -- 
-2.17.1
+2.18.1
 
diff --git a/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch b/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch
@@ -1,11 +1,12 @@
-From 3f41fcc74ab5f8a153bd04850b7001aadc10be62 Mon Sep 17 00:00:00 2001
+From 5b21454c542aea71a447afb5a652a713cf53b111 Mon Sep 17 00:00:00 2001
 From: haonanya <haonan.yang@intel.com>
 Date: Mon, 19 Jul 2021 10:14:20 +0800
 Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter
 
 Signed-off-by: haonanya <haonan.yang@intel.com>
 ---
- lib/SPIRV/OCL20ToSPIRV.cpp            |  79 ++++++++++++++++--
+ lib/SPIRV/OCL20ToSPIRV.cpp            |  25 +++++-
+ lib/SPIRV/OCLUtil.cpp                 |   4 -
  lib/SPIRV/SPIRVToOCL.h                |   3 +
  lib/SPIRV/SPIRVToOCL12.cpp            |  21 +++++
  lib/SPIRV/SPIRVToOCL20.cpp            |  28 ++++++-
@@ -18,84 +19,24 @@ Signed-off-by: haonanya <haonan.yang@intel.com>
  test/AtomicFMinEXT.ll                 | 113 +++++++-------------------
  test/AtomicFMinEXTForOCL.ll           |  64 +++++++++++++++
  test/InvalidAtomicBuiltins.cl         |   8 --
- 13 files changed, 417 insertions(+), 260 deletions(-)
+ 14 files changed, 366 insertions(+), 261 deletions(-)
  create mode 100644 test/AtomicFAddEXTForOCL.ll
  create mode 100644 test/AtomicFMaxEXTForOCL.ll
  create mode 100644 test/AtomicFMinEXTForOCL.ll
 
 diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
-index e30aa5be..b676a009 100644
+index e30aa5be..faa5be2b 100644
 --- a/lib/SPIRV/OCL20ToSPIRV.cpp
 +++ b/lib/SPIRV/OCL20ToSPIRV.cpp
-@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
+@@ -407,7 +407,6 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
+   }
    if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 ||
        DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) {
-
--    // Compute atomic builtins do not support floating types.
--    if (CI.getType()->isFloatingPointTy() &&
--        isComputeAtomicOCLBuiltin(DemangledName))
--      return;
-+    // Compute "atom" prefixed builtins do not support floating types.
-+    if (CI.getType()->isFloatingPointTy()) {
-+      if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0)
-+        return;
-+      // handle functions which are "atomic_" prefixed.
-+      StringRef Stem = DemangledName;
-+      Stem = Stem.drop_front(strlen("atomic_"));
-+      // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor,
-+      // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit,
-+      // fetch_and_explicit} should be identified as function call
-+      bool IsFunctionCall = llvm::StringSwitch<bool>(Stem)
-+                                .Case("add", true)
-+                                .Case("sub", true)
-+                                .Case("inc", true)
-+                                .Case("dec", true)
-+                                .Case("cmpxchg", true)
-+                                .Case("min", true)
-+                                .Case("max", true)
-+                                .Case("or", true)
-+                                .Case("xor", true)
-+                                .Case("and", true)
-+                                .Case("fetch_or", true)
-+                                .Case("fetch_and", true)
-+                                .Case("fetch_xor", true)
-+                                .Case("fetch_or_explicit", true)
-+                                .Case("fetch_xor_explicit", true)
-+                                .Case("fetch_and_explicit", true)
-+                                .Default(false);
-+      if (IsFunctionCall)
-+        return;
-+      if (F->arg_size() != 2) {
-+        IsFunctionCall = llvm::StringSwitch<bool>(Stem)
-+                             .Case("exchange", true)
-+                             .Case("fetch_add", true)
-+                             .Case("fetch_sub", true)
-+                             .Case("fetch_min", true)
-+                             .Case("fetch_max", true)
-+                             .Case("load", true)
-+                             .Case("store", true)
-+                             .Default(false);
-+        if (IsFunctionCall)
-+          return;
-+      }
-+      if (F->arg_size() != 3 && F->arg_size() != 4) {
-+        IsFunctionCall = llvm::StringSwitch<bool>(Stem)
-+                             .Case("exchange_explicit", true)
-+                             .Case("fetch_add_explicit", true)
-+                             .Case("fetch_sub_explicit", true)
-+                             .Case("fetch_min_explicit", true)
-+                             .Case("fetch_max_explicit", true)
-+                             .Case("load_explicit", true)
-+                             .Case("store_explicit", true)
-+                             .Default(false);
-+        if (IsFunctionCall)
-+          return;
-+      }
-+    }
-
-     auto PCI = &CI;
-     if (DemangledName == kOCLBuiltinName::AtomicInit) {
-@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
+-
+     // Compute atomic builtins do not support floating types.
+     if (CI.getType()->isFloatingPointTy() &&
+         isComputeAtomicOCLBuiltin(DemangledName))
+@@ -819,7 +818,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
    AttributeList Attrs = CI->getCalledFunction()->getAttributes();
    mutateCallInstSPIRV(
        M, CI,
@@ -104,7 +45,7 @@ index e30aa5be..b676a009 100644
          Info.PostProc(Args);
          // Order of args in OCL20:
          // object, 0-2 other args, 1-2 order, scope
-@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
+@@ -864,7 +863,27 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
            std::rotate(Args.begin() + 2, Args.begin() + OrderIdx,
                        Args.end() - Offset);
          }
@@ -119,19 +60,43 @@ index e30aa5be..b676a009 100644
 +            getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
 +        if (!IsFPType(AtomicBuiltinsReturnType))
 +          return SPIRVFunctionName;
-+        // Translate FP-typed atomic builtins.
-+        return llvm::StringSwitch<std::string>(SPIRVFunctionName)
-+            .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
-+            .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
-+            .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT");
++        // Translate FP-typed atomic builtins. Currently we only need to
++        // translate atomic_fetch_[add, max, min]* to related float instructions
++        auto SPIRFunctionNameForFloatAtomics =
++            llvm::StringSwitch<std::string>(SPIRVFunctionName)
++                .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
++                .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
++                .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT")
++                .Default("others");
++        return SPIRFunctionNameForFloatAtomics == "others"
++                   ? SPIRVFunctionName
++                   : SPIRFunctionNameForFloatAtomics;
        },
        &Attrs);
  }
+diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
+index c7232623..9a4c8ab9 100644
+--- a/lib/SPIRV/OCLUtil.cpp
++++ b/lib/SPIRV/OCLUtil.cpp
+@@ -136,13 +136,9 @@ bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
+       .EndsWith("and", true)
+       .EndsWith("or", true)
+       .EndsWith("xor", true)
+-      .EndsWith("add_explicit", true)
+-      .EndsWith("sub_explicit", true)
+       .EndsWith("or_explicit", true)
+       .EndsWith("xor_explicit", true)
+       .EndsWith("and_explicit", true)
+-      .EndsWith("min_explicit", true)
+-      .EndsWith("max_explicit", true)
+       .Default(false);
+ }
+
 diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h
-index f75195d4..64bf0f84 100644
+index ddeec0b6..006fb0b1 100644
 --- a/lib/SPIRV/SPIRVToOCL.h
 +++ b/lib/SPIRV/SPIRVToOCL.h
-@@ -171,6 +171,9 @@ public:
+@@ -178,6 +178,9 @@ public:
    /// using separate maps for OpenCL 1.2 and OpenCL 2.0
    virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0;
 
@@ -246,7 +211,7 @@ index d829ff42..01d088e9 100644
          auto ScopeIdx = Ptr + 1;
          auto OrderIdx = Ptr + 2;
 diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
-index cc1dd1ab..63180888 100644
+index 13f93fbe..7b707993 100644
 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
 +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
 @@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
@@ -258,7 +223,7 @@ index cc1dd1ab..63180888 100644
    add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL");
    add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL");
 diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
-index 9e520512..cc2ad200 100644
+index feec70f6..8e595e83 100644
 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h
 +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
 @@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() {