From d45d5e70da17ddc513a6b562368042c8f01668d7 Mon Sep 17 00:00:00 2001 From: haonanya Date: Fri, 14 Jan 2022 11:17:30 +0800 Subject: [PATCH] Add missing atomic_half type builtins This backports https://reviews.llvm.org/D109740 Signed-off-by: haonanya --- ...-OpenCL-support-cl_ext_float_atomics.patch | 259 ++++++++++++++---- 1 file changed, 208 insertions(+), 51 deletions(-) diff --git a/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch b/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch index 9957f162..80164c2f 100644 --- a/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch +++ b/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch @@ -1,14 +1,17 @@ -From 6ef0d9afd03c80671393f4d749ddbeb08f7291fe Mon Sep 17 00:00:00 2001 +From 389fc0f80da472af9746d3b34e77ad8c7ee2f70e Mon Sep 17 00:00:00 2001 From: haonanya Date: Fri, 13 Aug 2021 10:00:02 +0800 -Subject: [PATCH] [OpenCL] support cl_ext_float_atomics +Subject: [PATCH] support cl_ext_float_atomics + +This backports https://reviews.llvm.org/D106343 and https://reviews.llvm.org/D109740 Signed-off-by: haonanya --- - clang/lib/Headers/opencl-c-base.h | 22 +++ - clang/lib/Headers/opencl-c.h | 232 ++++++++++++++++++++++++++ - clang/test/Headers/opencl-c-header.cl | 96 +++++++++++ - 3 files changed, 350 insertions(+) + clang/lib/Headers/opencl-c-base.h | 22 ++ + clang/lib/Headers/opencl-c.h | 372 ++++++++++++++++++++++++++ + clang/lib/Sema/Sema.cpp | 3 + + clang/test/Headers/opencl-c-header.cl | 96 +++++++ + 4 files changed, 493 insertions(+) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index 2cc688ccc3da..18d367de68ec 100644 @@ -44,15 +47,128 @@ index 2cc688ccc3da..18d367de68ec 100644 #ifndef __opencl_c_int64 #define __opencl_c_int64 1 diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index d8173f0aa843..454469991d59 100644 +index d8173f0aa843..be39b1ca8c78 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h -@@ -14354,6 +14354,238 @@ intptr_t __ovld atomic_fetch_max_explicit( +@@ -14354,6 +14354,378 @@ intptr_t __ovld atomic_fetch_max_explicit( // defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++// The functionality added by cl_ext_float_atomics extension +#if defined(cl_ext_float_atomics) + ++#if defined(__opencl_c_ext_fp16_global_atomic_load_store) ++void __ovld atomic_store(volatile __global atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++void __ovld atomic_store_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_load(volatile __global atomic_half *object); ++half __ovld atomic_load_explicit(volatile __global atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile __global atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_exchange_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_exchange_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_load_store) ++void __ovld atomic_store(volatile __local atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++void __ovld atomic_store_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_load(volatile __local atomic_half *object); ++half __ovld atomic_load_explicit(volatile __local atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile __local atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile __local atomic_half *object, half operand); ++half __ovld atomic_exchange_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_exchange_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_load_store) ++void __ovld atomic_store(volatile atomic_half *object, half operand); ++void __ovld atomic_store_explicit(volatile atomic_half *object, half operand, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_half *object, half operand, ++ memory_order order, memory_scope scope); ++half __ovld atomic_load(volatile atomic_half *object); ++half __ovld atomic_load_explicit(volatile atomic_half *object, ++ memory_order order); ++half __ovld atomic_load_explicit(volatile atomic_half *object, ++ memory_order order, memory_scope scope); ++half __ovld atomic_exchange(volatile atomic_half *object, half operand); ++half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand, ++ memory_order order); ++half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand, ++ memory_order order, memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) && ++ // defined(__opencl_c_ext_fp16_local_atomic_load_store) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_min_max) ++half __ovld atomic_fetch_min(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_max(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_min_max) ++half __ovld atomic_fetch_min(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_max(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_min_max) ++half __ovld atomic_fetch_min(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_max(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_min_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_max_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_min_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_max_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_min_max) ++ +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) +float __ovld atomic_fetch_min(volatile __global atomic_float *object, + float operand); @@ -101,12 +217,10 @@ index d8173f0aa843..454469991d59 100644 +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && -+ // defined(__opencl_c_ext_fp32_local_atomic_min_max) ++#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp32_local_atomic_min_max) + -+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_global_atomic_min_max) +double __ovld atomic_fetch_min(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __global atomic_double *object, @@ -121,13 +235,9 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) + -+#if defined(__opencl_c_ext_fp64_local_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __local atomic_double *object, @@ -142,14 +252,10 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ -+ defined(__opencl_c_ext_fp64_local_atomic_min_max) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++ defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, @@ -162,10 +268,59 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && -+ // defined(__opencl_c_ext_fp64_local_atomic_min_max) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ ++ defined(__opencl_c_ext_fp64_local_atomic_min_max) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_add) ++half __ovld atomic_fetch_add(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_sub(volatile __global atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_add) ++ ++#if defined(__opencl_c_ext_fp16_local_atomic_add) ++half __ovld atomic_fetch_add(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_sub(volatile __local atomic_half *object, ++ half operand); ++half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_local_atomic_add) ++ ++#if defined(__opencl_c_ext_fp16_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_add) ++half __ovld atomic_fetch_add(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_sub(volatile atomic_half *object, half operand); ++half __ovld atomic_fetch_add_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object, ++ half operand, memory_order order); ++half __ovld atomic_fetch_add_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object, ++ half operand, memory_order order, ++ memory_scope scope); ++#endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp16_local_atomic_add) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) +float __ovld atomic_fetch_add(volatile __global atomic_float *object, @@ -215,12 +370,10 @@ index d8173f0aa843..454469991d59 100644 +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && -+ // defined(__opencl_c_ext_fp32_local_atomic_add) ++#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp32_local_atomic_add) + -+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_global_atomic_add) +double __ovld atomic_fetch_add(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, @@ -235,13 +388,9 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) + -+#if defined(__opencl_c_ext_fp64_local_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++#if defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, @@ -256,14 +405,10 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) && \ -+ defined(__opencl_c_ext_fp64_local_atomic_add) && \ -+ defined(cl_khr_int64_base_atomics) && \ -+ defined(cl_khr_int64_extended_atomics) ++ defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, @@ -276,16 +421,28 @@ index d8173f0aa843..454469991d59 100644 +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && -+ // defined(__opencl_c_ext_fp64_local_atomic_add) && -+ // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \ ++ defined(__opencl_c_ext_fp64_local_atomic_add) + +#endif // cl_ext_float_atomics + // atomic_store() #if defined(__opencl_c_atomic_scope_device) && \ +diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp +index c26f45f62668..12c5e6f94842 100644 +--- a/clang/lib/Sema/Sema.cpp ++++ b/clang/lib/Sema/Sema.cpp +@@ -303,6 +303,9 @@ void Sema::Initialize() { + addImplicitTypedef("atomic_long", AtomicLongT); + auto AtomicULongT = Context.getAtomicType(Context.UnsignedLongTy); + addImplicitTypedef("atomic_ulong", AtomicULongT); ++ auto AtomicHalfT = Context.getAtomicType(Context.HalfTy); ++ addImplicitTypedef("atomic_half", AtomicHalfT); ++ setOpenCLExtensionForType(AtomicHalfT, "cl_khr_fp16"); + addImplicitTypedef("atomic_float", + Context.getAtomicType(Context.FloatTy)); + auto AtomicDoubleT = Context.getAtomicType(Context.DoubleTy); diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl index 2716076acdcf..7f720cf28142 100644 --- a/clang/test/Headers/opencl-c-header.cl @@ -391,5 +548,5 @@ index 2716076acdcf..7f720cf28142 100644 + +#endif // defined(__SPIR__) -- -2.18.1 +2.17.1