diff --git a/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch b/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch index a2e70b73..4eb95445 100644 --- a/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch +++ b/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch @@ -1,14 +1,17 @@ -From e601cdf68ecb1a1ce4111a515b22a05845bb07c7 Mon Sep 17 00:00:00 2001 +From d1b4758460cfd04d35ae332033a8d18614e6a7ff Mon Sep 17 00:00:00 2001 From: haonanya Date: Wed, 28 Jul 2021 14:20:08 +0800 -Subject: [PATCH] [OpenCL] Support cl_ext_float_atomics +Subject: [PATCH] Support cl_ext_float_atomics + +This backports https://reviews.llvm.org/D106343 and https://reviews.llvm.org/D109740 Signed-off-by: haonanya --- - clang/lib/Headers/opencl-c-base.h | 19 +++ - clang/lib/Headers/opencl-c.h | 232 ++++++++++++++++++++++++++ - clang/test/Headers/opencl-c-header.cl | 84 ++++++++++ - 3 files changed, 335 insertions(+) + clang/lib/Headers/opencl-c-base.h | 19 ++ + clang/lib/Headers/opencl-c.h | 372 ++++++++++++++++++++++++++ + clang/lib/Sema/Sema.cpp | 3 + + clang/test/Headers/opencl-c-header.cl | 84 ++++++ + 4 files changed, 478 insertions(+) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index e8dcd70377e5..d94d64372dbb 100644 @@ -41,15 +44,128 @@ index e8dcd70377e5..d94d64372dbb 100644 #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index ab665628c8e1..501a04f6e82b 100644 +index ab665628c8e1..63a5155fe221 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h -@@ -13531,6 +13531,238 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint +@@ -13531,6 +13531,378 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); #endif ++// The functionality added by cl_ext_float_atomics extension +#if defined(cl_ext_float_atomics) + ++#if defined(__opencl_c_ext_fp16_global_atomic_load_store) ++void __ovld atomic_store(volatile __global atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++void __ovld atomic_store_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_load(volatile __global atomic_half *object); ++half __ovld atomic_load_explicit(volatile __global atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile __global atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_exchange_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_exchange_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_load_store) ++void __ovld atomic_store(volatile __local atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++void __ovld atomic_store_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_load(volatile __local atomic_half *object); ++half __ovld atomic_load_explicit(volatile __local atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile __local atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile __local atomic_half *object, half operand); ++half __ovld atomic_exchange_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_exchange_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_load_store) ++void __ovld atomic_store(volatile atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile atomic_half *object, half operand, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_half *object, half operand, ++ memory_order order, memory_scope scope); ++half __ovld atomic_load(volatile atomic_half *object); ++half __ovld atomic_load_explicit(volatile atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile atomic_half *object, half operand); ++half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand, ++ memory_order order); ++half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand, ++ memory_order order, memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) && ++ // defined(__opencl_c_ext_fp16_local_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_min_max) ++half __ovld atomic_fetch_min(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_max(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_min_max) ++half __ovld atomic_fetch_min(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_max(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_min_max) ++half __ovld atomic_fetch_min(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_max(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_min_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_min_max) ++ +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) +float __ovld atomic_fetch_min(volatile __global atomic_float *object, + float operand); @@ -98,12 +214,10 @@ index ab665628c8e1..501a04f6e82b 100644 +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ ++#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) + -+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_global_atomic_min_max) +double __ovld atomic_fetch_min(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __global atomic_double *object, @@ -118,13 +232,9 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) + -+#if defined(__opencl_c_ext_fp64_local_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __local atomic_double *object, @@ -139,14 +249,10 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ -+ defined(__opencl_c_ext_fp64_local_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++ defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, @@ -159,10 +265,59 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && -+ // defined(__opencl_c_ext_fp64_local_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp64_local_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_add) ++half __ovld atomic_fetch_add(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_sub(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_add) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_add) ++half __ovld atomic_fetch_add(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_sub(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_add) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_add) ++half __ovld atomic_fetch_add(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_sub(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_add_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_add) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) +float __ovld atomic_fetch_add(volatile __global atomic_float *object, @@ -212,12 +367,10 @@ index ab665628c8e1..501a04f6e82b 100644 +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ ++#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ + defined(__opencl_c_ext_fp32_local_atomic_add) + -+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_global_atomic_add) +double __ovld atomic_fetch_add(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, @@ -232,13 +385,9 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) + -+#if defined(__opencl_c_ext_fp64_local_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, @@ -253,14 +402,10 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) && \ -+ defined(__opencl_c_ext_fp64_local_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++ defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, @@ -273,16 +418,28 @@ index ab665628c8e1..501a04f6e82b 100644 +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && -+ // defined(__opencl_c_ext_fp64_local_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp64_local_atomic_add) + +#endif // cl_ext_float_atomics + // atomic_store() void __ovld atomic_store(volatile atomic_int *object, int desired); +diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp +index 450f9c020f7f..a91291c7af38 100644 +--- a/clang/lib/Sema/Sema.cpp ++++ b/clang/lib/Sema/Sema.cpp +@@ -313,6 +313,9 @@ void Sema::Initialize() { + addImplicitTypedef("atomic_long", AtomicLongT); + auto AtomicULongT = Context.getAtomicType(Context.UnsignedLongTy); + addImplicitTypedef("atomic_ulong", AtomicULongT); ++ auto AtomicHalfT = Context.getAtomicType(Context.HalfTy); ++ addImplicitTypedef("atomic_half", AtomicHalfT); ++ setOpenCLExtensionForType(AtomicHalfT, "cl_khr_fp16"); + addImplicitTypedef("atomic_float", + Context.getAtomicType(Context.FloatTy)); + auto AtomicDoubleT = Context.getAtomicType(Context.DoubleTy); diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl index 13a3b62481ec..443f682c711a 100644 --- a/clang/test/Headers/opencl-c-header.cl