diff --git a/patches/clang/0006-OpenCL-3.0-feature-macro-support.patch b/patches/clang/0006-OpenCL-3.0-feature-macro-support.patch deleted file mode 100644 index 3458c54c..00000000 --- a/patches/clang/0006-OpenCL-3.0-feature-macro-support.patch +++ /dev/null @@ -1,2790 +0,0 @@ -From fcf738e8d6a015ed4481b1f4a67abcb1e412e1ce Mon Sep 17 00:00:00 2001 -From: Anton Zabaznov -Date: Thu, 28 May 2020 00:46:49 +0300 -Subject: [PATCH] OpenCL 3.0 feature macro support - ---- - docs/CommandGuide/clang.rst | 12 +- - include/clang/Basic/Builtins.def | 48 ++--- - include/clang/Basic/Builtins.h | 11 +- - include/clang/Basic/DiagnosticSemaKinds.td | 18 ++ - include/clang/Basic/OpenCLFeatures.def | 42 ++++ - include/clang/Basic/OpenCLImageTypes.def | 26 +-- - include/clang/Basic/OpenCLOptions.h | 192 +++++++++++++----- - include/clang/Basic/TargetInfo.h | 8 +- - include/clang/Basic/TargetOptions.h | 4 + - include/clang/Driver/CC1Options.td | 5 + - include/clang/Driver/Options.td | 2 +- - include/clang/Frontend/LangStandards.def | 4 + - include/clang/Sema/Sema.h | 22 +- - lib/AST/ASTContext.cpp | 4 +- - lib/Basic/Builtins.cpp | 7 +- - lib/Basic/TargetInfo.cpp | 5 + - lib/Basic/Targets.cpp | 1 + - lib/Basic/Targets/AMDGPU.h | 32 +-- - lib/Basic/Targets/NVPTX.h | 20 +- - lib/Basic/Targets/SPIR.h | 2 +- - lib/Basic/Targets/X86.h | 2 +- - lib/Frontend/CompilerInvocation.cpp | 7 +- - lib/Frontend/InitPreprocessor.cpp | 9 + - lib/Headers/opencl-c.h | 156 +++++++------- - lib/Parse/ParseDecl.cpp | 4 +- - lib/Parse/ParsePragma.cpp | 10 +- - lib/Sema/DeclSpec.cpp | 2 +- - lib/Sema/Sema.cpp | 35 +++- - lib/Sema/SemaCast.cpp | 2 +- - lib/Sema/SemaChecking.cpp | 163 +++++++++++++-- - lib/Sema/SemaDecl.cpp | 25 ++- - lib/Sema/SemaExpr.cpp | 15 +- - lib/Sema/SemaInit.cpp | 4 +- - lib/Sema/SemaType.cpp | 7 +- - lib/Serialization/ASTReader.cpp | 1 + - lib/Serialization/ASTWriter.cpp | 1 + - test/CodeGenOpenCL/address-spaces.cl | 4 + - test/CodeGenOpenCL/feature-address-spaces.cl | 186 +++++++++++++++++ - test/CodeGenOpenCL/to_addr_builtin.cl | 2 + - test/Driver/unknown-std.cl | 1 + - .../address-spaces-conversions-cl2.0.cl | 3 + - test/SemaOpenCL/address-spaces.cl | 1 + - test/SemaOpenCL/feature-device-enqueue.cl | 29 +++ - test/SemaOpenCL/feature-images.cl | 28 +++ - test/SemaOpenCL/feature-memory-scope.cl | 118 +++++++++++ - test/SemaOpenCL/feature-pipes.cl | 71 +++++++ - test/SemaOpenCL/invalid-block.cl | 1 - - test/SemaOpenCL/storageclass-cl20.cl | 1 + - test/SemaOpenCL/storageclass.cl | 1 - - test/SemaOpenCL/to_addr_builtin.cl | 2 +- - 50 files changed, 1096 insertions(+), 260 deletions(-) - create mode 100644 include/clang/Basic/OpenCLFeatures.def - create mode 100644 test/CodeGenOpenCL/feature-address-spaces.cl - create mode 100644 test/SemaOpenCL/feature-device-enqueue.cl - create mode 100644 test/SemaOpenCL/feature-images.cl - create mode 100644 test/SemaOpenCL/feature-memory-scope.cl - create mode 100644 test/SemaOpenCL/feature-pipes.cl - -diff --git a/docs/CommandGuide/clang.rst b/docs/CommandGuide/clang.rst -index a75b6c9115..6b43f479ed 100644 ---- a/docs/CommandGuide/clang.rst -+++ b/docs/CommandGuide/clang.rst -@@ -199,19 +199,23 @@ Language Selection and Mode Options - - | ``cl1.0`` - -- OpenCL 1.0 -+ OpenCL C 1.0 - - | ``cl1.1`` - -- OpenCL 1.1 -+ OpenCL C 1.1 - - | ``cl1.2`` - -- OpenCL 1.2 -+ OpenCL C 1.2 - - | ``cl2.0`` - -- OpenCL 2.0 -+ OpenCL C 2.0 -+ -+ | ``cl3.0`` -+ -+ OpenCL C 3.0 - - The default OpenCL language standard is ``cl1.0``. - -diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def -index fa031ce09f..f38478ad7a 100644 ---- a/include/clang/Basic/Builtins.def -+++ b/include/clang/Basic/Builtins.def -@@ -1460,42 +1460,42 @@ BUILTIN(__builtin_coro_suspend, "cIb", "n") - BUILTIN(__builtin_coro_param, "bv*v*", "n") - // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. - // We need the generic prototype, since the packet type could be anything. --LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) -+LANGBUILTIN(read_pipe, "i.", "tn", OCL20_30_LANG) -+LANGBUILTIN(write_pipe, "i.", "tn", OCL20_30_LANG) - --LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCL20_30_LANG) -+LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCL20_30_LANG) - --LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) -+LANGBUILTIN(commit_write_pipe, "v.", "tn", OCL20_30_LANG) -+LANGBUILTIN(commit_read_pipe, "v.", "tn", OCL20_30_LANG) - --LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCL20_30_LANG) -+LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCL20_30_LANG) - --LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) -+LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCL20_30_LANG) -+LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCL20_30_LANG) - --LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCL20_30_LANG) -+LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCL20_30_LANG) - --LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) -+LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCL20_30_LANG) -+LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCL20_30_LANG) - --LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) -+LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCL20_30_LANG) -+LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCL20_30_LANG) - - // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. - // Custom builtin check allows to perform special check of passed block arguments. --LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) -+LANGBUILTIN(enqueue_kernel, "i.", "tn", OCL20_30_LANG) -+LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCL20_30_LANG) -+LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCL20_30_LANG) -+LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCL20_30_LANG) -+LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCL20_30_LANG) - - // OpenCL v2.0 s6.13.9 - Address space qualifier functions. --LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) --LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) --LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) -+LANGBUILTIN(to_global, "v*v*", "tn", OCL20_30_LANG) -+LANGBUILTIN(to_local, "v*v*", "tn", OCL20_30_LANG) -+LANGBUILTIN(to_private, "v*v*", "tn", OCL20_30_LANG) - - // OpenCL half load/store builtin - LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) -diff --git a/include/clang/Basic/Builtins.h b/include/clang/Basic/Builtins.h -index fa2bcc4c7a..811e05aa3e 100644 ---- a/include/clang/Basic/Builtins.h -+++ b/include/clang/Basic/Builtins.h -@@ -36,13 +36,16 @@ enum LanguageID { - CXX_LANG = 0x4, // builtin for cplusplus only. - OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ - MS_LANG = 0x10, // builtin requires MS mode. -- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. -- OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. -- OMP_LANG = 0x80, // builtin requires OpenMP. -+ OCLC30_LANG = 0x20, // builtin for OpenCL C 3.0 only -+ OCLC20_LANG = 0x40, // builtin for OpenCL C 2.0 only. -+ OCLC1X_LANG = 0x80, // builtin for OpenCL C 1.x only. -+ OMP_LANG = 0x100, // builtin requires OpenMP. -+ OCL20_30_LANG = OCLC30_LANG | OCLC20_LANG, // builtin for OCL2.0 and OCLC3.0 - ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. - ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. - ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. -- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. -+ ALL_OCLC_LANGUAGES = -+ OCLC1X_LANG | OCLC20_LANG | OCLC30_LANG // builtin for OCLC languages. - }; - - namespace Builtin { -diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td -index 7ef57b02fe..c75bf1ae8e 100644 ---- a/include/clang/Basic/DiagnosticSemaKinds.td -+++ b/include/clang/Basic/DiagnosticSemaKinds.td -@@ -746,6 +746,18 @@ def err_opencl_half_param : Error< - "declaring function parameter of type %0 is not allowed; did you forget * ?">; - def err_opencl_invalid_return : Error< - "declaring function return value of type %0 is not allowed %select{; did you forget * ?|}1">; -+def err_opencl_unsupported_memory_order : Error< -+ "OpenCL memory order requires feature support">; -+def err_opencl_unsupported_memory_scope : Error< -+ "OpenCL memory scope requires feature support">; -+def err_opencl_builtin_enqueue_requires_feature : Error< -+ "OpenCL builtin enqueue kernels require feature support">; -+def err_opencl_builtin_address_space_requires_feature : Error< -+ "OpenCL address space qualifier builtins require feature support">; -+def err_opencl_builtin_subgroup_query_requires_feature : Error< -+ "OpenCL builtin subgroup kernel query require feature support">; -+def err_opencl_blocks_support_requires_feature : Error< -+ "OpenCL blocks usage requires feature support">; - def warn_enum_value_overflow : Warning<"overflow in enumeration value">; - def warn_pragma_options_align_reset_failed : Warning< - "#pragma options align=reset failed: %0">, -@@ -8620,6 +8632,8 @@ def err_opencl_variadic_function : Error< - "invalid prototype, variadic arguments are not allowed in OpenCL">; - def err_opencl_requires_extension : Error< - "use of %select{type|declaration}0 %1 requires %2 extension to be enabled">; -+def err_opencl_requires_feature : Error< -+ "use of %select{type|declaration}0 %1 requires %2 feature to be supported">; - def warn_opencl_generic_address_space_arg : Warning< - "passing non-generic address space pointer to %0" - " may cause dynamic conversion affecting performance">, -@@ -8635,6 +8649,10 @@ def err_opencl_builtin_pipe_invalid_arg : Error< - def err_opencl_builtin_pipe_invalid_access_modifier : Error< - "invalid pipe access modifier (expecting %0)">; - -+// OpenCL v3.0 s6.13.6 -- Builtin Pipe Functions -+def err_opencl_builtin_pipe_requires_feature : Error< -+ "pipe functions require __opencl_c_pipes feature to be supported">; -+ - // OpenCL access qualifier - def err_opencl_invalid_access_qualifier : Error< - "access qualifier can only be used for pipe and image type">; -diff --git a/include/clang/Basic/OpenCLFeatures.def b/include/clang/Basic/OpenCLFeatures.def -new file mode 100644 -index 0000000000..75e859c25b ---- /dev/null -+++ b/include/clang/Basic/OpenCLFeatures.def -@@ -0,0 +1,42 @@ -+//===--- OpenCLFeatures.def - OpenCL 3.0 feature list -----------*- C++ -*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines the list of supported OpenCL features. Features are -+// supported only since OpenCL 3.0. -+// -+//===----------------------------------------------------------------------===// -+ -+// Macro OPENCLFEATURE or OPENCLFEATURE_INTERNAL can be defined to enumerate the -+// OpenCL extensions listed in this file. -+ -+#ifndef OPENCLFEATURE_INTERNAL -+#ifndef OPENCLFEATURE -+#pragma error "macro OPENCLFEATURE or OPENCLFEATURE_INTERNAL is required" -+#else -+#define OPENCLFEATURE_INTERNAL(feat, ...) OPENCLFEATURE(feat) -+#endif // OPENCLFEATURE -+#endif // OPENCLFEATURE_INTERNAL -+ -+OPENCLFEATURE_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_atomic_order_acq_rel, 100, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_atomic_order_seq_cst, 100, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_atomic_scope_device, 100, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_atomic_scope_all_devices, 100, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_pipes, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_read_write_images, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_subgroups, 200, ~0U) -+OPENCLFEATURE_INTERNAL(__opencl_c_work_group_collective_functions, 100, ~0U) -+ -+#undef OPENCLFEATURE_INTERNAL -+ -+#ifdef OPENCLFEATURE -+#undef OPENCLFEATURE -+#endif -diff --git a/include/clang/Basic/OpenCLImageTypes.def b/include/clang/Basic/OpenCLImageTypes.def -index 0efed996ab..7b9615c734 100644 ---- a/include/clang/Basic/OpenCLImageTypes.def -+++ b/include/clang/Basic/OpenCLImageTypes.def -@@ -66,20 +66,20 @@ IMAGE_WRITE_TYPE(image2d_msaa, OCLImage2dMSAA, "cl_khr_gl_msaa_sharing") - IMAGE_WRITE_TYPE(image2d_array_msaa, OCLImage2dArrayMSAA, "cl_khr_gl_msaa_sharing") - IMAGE_WRITE_TYPE(image2d_msaa_depth, OCLImage2dMSAADepth, "cl_khr_gl_msaa_sharing") - IMAGE_WRITE_TYPE(image2d_array_msaa_depth, OCLImage2dArrayMSAADepth, "cl_khr_gl_msaa_sharing") --IMAGE_WRITE_TYPE(image3d, OCLImage3d, "cl_khr_3d_image_writes") -+IMAGE_WRITE_TYPE(image3d, OCLImage3d, "cl_khr_3d_image_writes __opencl_c_3d_image_writes") - --IMAGE_READ_WRITE_TYPE(image1d, OCLImage1d, "") --IMAGE_READ_WRITE_TYPE(image1d_array, OCLImage1dArray, "") --IMAGE_READ_WRITE_TYPE(image1d_buffer, OCLImage1dBuffer, "") --IMAGE_READ_WRITE_TYPE(image2d, OCLImage2d, "") --IMAGE_READ_WRITE_TYPE(image2d_array, OCLImage2dArray, "") --IMAGE_READ_WRITE_TYPE(image2d_depth, OCLImage2dDepth, "") --IMAGE_READ_WRITE_TYPE(image2d_array_depth, OCLImage2dArrayDepth, "") --IMAGE_READ_WRITE_TYPE(image2d_msaa, OCLImage2dMSAA, "cl_khr_gl_msaa_sharing") --IMAGE_READ_WRITE_TYPE(image2d_array_msaa, OCLImage2dArrayMSAA, "cl_khr_gl_msaa_sharing") --IMAGE_READ_WRITE_TYPE(image2d_msaa_depth, OCLImage2dMSAADepth, "cl_khr_gl_msaa_sharing") --IMAGE_READ_WRITE_TYPE(image2d_array_msaa_depth, OCLImage2dArrayMSAADepth, "cl_khr_gl_msaa_sharing") --IMAGE_READ_WRITE_TYPE(image3d, OCLImage3d, "") -+IMAGE_READ_WRITE_TYPE(image1d, OCLImage1d, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image1d_array, OCLImage1dArray, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image1d_buffer, OCLImage1dBuffer, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d, OCLImage2d, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_array, OCLImage2dArray, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_depth, OCLImage2dDepth, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_array_depth, OCLImage2dArrayDepth, "__opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_msaa, OCLImage2dMSAA, "cl_khr_gl_msaa_sharing __opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_array_msaa, OCLImage2dArrayMSAA, "cl_khr_gl_msaa_sharing __opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_msaa_depth, OCLImage2dMSAADepth, "cl_khr_gl_msaa_sharing __opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image2d_array_msaa_depth, OCLImage2dArrayMSAADepth, "cl_khr_gl_msaa_sharing __opencl_c_read_write_images") -+IMAGE_READ_WRITE_TYPE(image3d, OCLImage3d, "__opencl_c_read_write_images") - - #undef IMAGE_TYPE - #undef GENERIC_IMAGE_TYPE -diff --git a/include/clang/Basic/OpenCLOptions.h b/include/clang/Basic/OpenCLOptions.h -index c76fa88092..9eb8d94c10 100644 ---- a/include/clang/Basic/OpenCLOptions.h -+++ b/include/clang/Basic/OpenCLOptions.h -@@ -17,28 +17,106 @@ - - #include "clang/Basic/LangOptions.h" - #include "llvm/ADT/StringMap.h" -+#include "llvm/Support/Debug.h" - - namespace clang { - - /// OpenCL supported extensions and optional core features - class OpenCLOptions { -+ // There are two types of OpenCL options: -+ // extensions and (since OpenCL 3.0) features -+ enum OpenCLOptionType { Extension, Feature }; -+ - struct Info { -+ OpenCLOptionType OptType; - bool Supported; // Is this option supported -- bool Enabled; // Is this option enabled -+ bool Enabled; // Is this option enabled (used only for extensions) - unsigned Avail; // Option starts to be available in this OpenCL version - unsigned Core; // Option becomes (optional) core feature in this OpenCL - // version -- Info(bool S = false, bool E = false, unsigned A = 100, unsigned C = ~0U) -- :Supported(S), Enabled(E), Avail(A), Core(C){} -+ -+ Info(OpenCLOptionType Ty = OpenCLOptionType::Extension, bool S = false, -+ bool E = false, unsigned A = 100, unsigned C = ~0U) -+ : OptType(Ty), Supported(S), Enabled(E), Avail(A), Core(C) {} -+ bool isFeature() const { return OptType == OpenCLOptionType::Feature; } -+ bool isExtension() const { return OptType == OpenCLOptionType::Extension; } - }; -+ - llvm::StringMap OptMap; -+ -+ /// Enable or disable support for OpenCL extensions or -+ /// feature macro. Option name optionally prefixed with '+' or '-' -+ -+ void supportOption(llvm::StringRef Option, -+ OpenCLOptionType OCLOptType = OpenCLOptionType::Extension, -+ bool V = true) { -+ assert(!Option.empty() && "Option is empty."); -+ -+ switch (Option[0]) { -+ case '+': -+ V = true; -+ Option = Option.drop_front(); -+ break; -+ case '-': -+ V = false; -+ Option = Option.drop_front(); -+ break; -+ } -+ -+ if (Option.equals("all")) { -+ if (OCLOptType == OpenCLOptionType::Extension) -+ supportAllExtensions(V); -+ // Not supported option for features -+ return; -+ } -+ -+ OptMap[Option].Supported = V; -+ OptMap[Option].OptType = OCLOptType; -+ // Enabled flag for features must be the same as Supported -+ if (OCLOptType == OpenCLOptionType::Feature) -+ OptMap[Option].Enabled = V; -+ } -+ -+ template void disableAllOptions() { -+ llvm::for_each(OptMap, [&](llvm::StringMapEntry &OptVal) { -+ if (OptVal.getValue().OptType == OCLOptType) -+ OptVal.getValue().Enabled = false; -+ }); -+ } -+ -+ template -+ bool isKnownOption(llvm::StringRef Opt) const { -+ auto It = OptMap.find(Opt); -+ return It != OptMap.end() && (It->second.OptType == OCLOptType); -+ } -+ -+ // Turn on or off support of all options. -+ template -+ void supportAllOptions(bool On = true) { -+ llvm::for_each(OptMap, [&](llvm::StringMapEntry &OptVal) { -+ if (OptVal.getValue().OptType == OCLOptType) -+ supportOption(OptVal.getKey(), OCLOptType, On); -+ }); -+ } -+ - public: -- bool isKnown(llvm::StringRef Ext) const { -- return OptMap.find(Ext) != OptMap.end(); -+ -+ bool isKnownExtension(llvm::StringRef Opt) const { -+ return isKnownOption(Opt); -+ } -+ -+ bool isKnownFeature(llvm::StringRef Opt) const { -+ return isKnownOption(Opt); -+ } -+ -+ // Check if extension is enabled or disabled, feature supported or -+ // unssupported -+ bool isAvailableOption(llvm::StringRef OptName) { -+ return OptMap.find(OptName)->getValue().Enabled; - } - -- bool isEnabled(llvm::StringRef Ext) const { -- return OptMap.find(Ext)->second.Enabled; -+ bool isAvailableOption(llvm::StringRef OptName) const { -+ return OptMap.find(OptName)->getValue().Enabled; - } - - // Is supported as either an extension or an (optional) core feature for -@@ -65,73 +143,81 @@ public: - // In C++ mode all extensions should work at least as in v2.0. - auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; - auto I = OptMap.find(Ext)->getValue(); -- return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); -+ return I.isExtension() && I.Supported && I.Avail <= CLVer && -+ (I.Core == ~0U || CLVer < I.Core); - } - -- void enable(llvm::StringRef Ext, bool V = true) { -- OptMap[Ext].Enabled = V; -+ // All features since OpenCL 3.0 version must be supported explicitly -+ bool isSupportedFeature(llvm::StringRef Feat, const LangOptions &LO) const { -+ auto I = OptMap.find(Feat)->getValue(); -+ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; -+ // Till 3.0 all features are supported implicitly -+ // since appropriate version -+ if (CLVer < 300) -+ return I.isFeature() && I.Avail <= CLVer; -+ return I.isFeature() && I.Supported; - } - -- /// Enable or disable support for OpenCL extensions -- /// \param Ext name of the extension optionally prefixed with -- /// '+' or '-' -- /// \param V used when \p Ext is not prefixed by '+' or '-' -- void support(llvm::StringRef Ext, bool V = true) { -- assert(!Ext.empty() && "Extension is empty."); -+ void enable(llvm::StringRef Ext, bool V = true) { OptMap[Ext].Enabled = V; } - -- switch (Ext[0]) { -- case '+': -- V = true; -- Ext = Ext.drop_front(); -- break; -- case '-': -- V = false; -- Ext = Ext.drop_front(); -- break; -- } -+ void supportExtension(llvm::StringRef Option, bool V = true) { -+ supportOption(Option, OpenCLOptionType::Extension, V); -+ } - -- if (Ext.equals("all")) { -- supportAll(V); -- return; -- } -- OptMap[Ext].Supported = V; -+ void supportFeature(llvm::StringRef Option, bool V = true) { -+ supportOption(Option, OpenCLOptionType::Feature, V); - } - -- OpenCLOptions(){ --#define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ -- OptMap[#Ext].Avail = AvailVer; \ -- OptMap[#Ext].Core = CoreVer; -+ OpenCLOptions() { -+#define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ -+ OptMap[#Ext].OptType = OpenCLOptionType::Extension; \ -+ OptMap[#Ext].Avail = AvailVer; \ -+ OptMap[#Ext].Core = CoreVer; - #include "clang/Basic/OpenCLExtensions.def" -+ -+ // OpenCL features supported only since 3.0 -+#define OPENCLFEATURE_INTERNAL(Feat, AvailVer, CoreVer) \ -+ OptMap[#Feat].OptType = OpenCLOptionType::Feature; \ -+ OptMap[#Feat].Avail = AvailVer; \ -+ OptMap[#Feat].Core = CoreVer; -+#include "clang/Basic/OpenCLFeatures.def" - } - - void addSupport(const OpenCLOptions &Opts) { -- for (auto &I:Opts.OptMap) -+ for (auto &I : Opts.OptMap) - if (I.second.Supported) -- OptMap[I.getKey()].Supported = true; -+ supportOption(I.getKey(), I.getValue().OptType, true); - } - -- void copy(const OpenCLOptions &Opts) { -- OptMap = Opts.OptMap; -+ void copy(const OpenCLOptions &Opts) { OptMap = Opts.OptMap; } -+ -+ // Turn on or off support of all extensions. -+ void supportAllExtensions(bool On = true) { -+ supportAllOptions(On); - } - -- // Turn on or off support of all options. -- void supportAll(bool On = true) { -- for (llvm::StringMap::iterator I = OptMap.begin(), -- E = OptMap.end(); I != E; ++I) -- I->second.Supported = On; -+ // Turn on or off support of all features. -+ void supportAllFeatures(bool On = true) { -+ supportAllOptions(On); -+ } -+ -+ void disableAllExtensions() { -+ disableAllOptions(); - } - -- void disableAll() { -- for (llvm::StringMap::iterator I = OptMap.begin(), -- E = OptMap.end(); I != E; ++I) -- I->second.Enabled = false; -+ void enableSupportedCoreExtensions(LangOptions LO) { -+ llvm::for_each(OptMap, [&](llvm::StringMapEntry &OptVal) { -+ if (OptVal.getValue().isExtension() && -+ isSupportedCore(OptVal.getKey(), LO)) -+ enable(OptVal.getKey()); -+ }); - } - -- void enableSupportedCore(LangOptions LO) { -- for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); -- I != E; ++I) -- if (isSupportedCore(I->getKey(), LO)) -- I->second.Enabled = true; -+ void supportCoreFeatures(LangOptions LO) { -+ llvm::for_each(OptMap, [&](llvm::StringMapEntry &OptVal) { -+ if (OptVal.getValue().isFeature() && isSupportedCore(OptVal.getKey(), LO)) -+ supportFeature(OptVal.getKey(), true); -+ }); - } - - friend class ASTWriter; -diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h -index c95cf599ff..160e517df4 100644 ---- a/include/clang/Basic/TargetInfo.h -+++ b/include/clang/Basic/TargetInfo.h -@@ -1284,7 +1284,13 @@ public: - /// Set supported OpenCL extensions as written on command line - virtual void setOpenCLExtensionOpts() { - for (const auto &Ext : getTargetOpts().OpenCLExtensionsAsWritten) { -- getTargetOpts().SupportedOpenCLOptions.support(Ext); -+ getTargetOpts().SupportedOpenCLOptions.supportExtension(Ext); -+ } -+ } -+ -+ virtual void setOpenCLFeatureOpts() { -+ for (const auto &Feat : getTargetOpts().OpenCLFeaturesAsWritten) { -+ getTargetOpts().SupportedOpenCLOptions.supportFeature(Feat); - } - } - -diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h -index fcccc5331a..7c7bf49846 100644 ---- a/include/clang/Basic/TargetOptions.h -+++ b/include/clang/Basic/TargetOptions.h -@@ -62,6 +62,10 @@ public: - /// the command line. - std::vector OpenCLExtensionsAsWritten; - -+ /// The list of OpenCL features to enable or disable, as written on -+ /// the command line. -+ std::vector OpenCLFeaturesAsWritten; -+ - /// If given, enables support for __int128_t and __uint128_t types. - bool ForceEnableInt128 = false; - -diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td -index 07c7688406..3b790b896a 100644 ---- a/include/clang/Driver/CC1Options.td -+++ b/include/clang/Driver/CC1Options.td -@@ -813,6 +813,11 @@ def detailed_preprocessing_record : Flag<["-"], "detailed-preprocessing-record"> - def cl_ext_EQ : CommaJoined<["-"], "cl-ext=">, - HelpText<"OpenCL only. Enable or disable OpenCL extensions. The argument is a comma-separated sequence of one or more extension names, each prefixed by '+' or '-'.">; - -+ -+def cl_feature_EQ : CommaJoined<["-"], "cl-feature=">, -+ HelpText<"OpenCL only. Enable or disable OpenCL features. The argument is a comma-separated sequence of one or more feature names, each prefixed by '+' or '-'.">; -+ -+ - //===----------------------------------------------------------------------===// - // CUDA Options - //===----------------------------------------------------------------------===// -diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td -index d02d9744d7..2543d11dd4 100644 ---- a/include/clang/Driver/Options.td -+++ b/include/clang/Driver/Options.td -@@ -519,7 +519,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC - def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; - def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, -- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,c++">; -+ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,c++">; - def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; - def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, -diff --git a/include/clang/Frontend/LangStandards.def b/include/clang/Frontend/LangStandards.def -index 0fdd35f320..c99be69ba1 100644 ---- a/include/clang/Frontend/LangStandards.def -+++ b/include/clang/Frontend/LangStandards.def -@@ -158,6 +158,10 @@ LANGSTANDARD(opencl12, "cl1.2", - LANGSTANDARD(opencl20, "cl2.0", - OpenCL, "OpenCL 2.0", - LineComment | C99 | Digraphs | HexFloat | OpenCL) -+LANGSTANDARD(opencl30, "cl3.0", -+ OpenCL, "OpenCL 3.0", -+ LineComment | C99 | Digraphs | HexFloat | OpenCL) -+ - LANGSTANDARD(openclcpp, "c++", - OpenCL, "OpenCL C++ 1.0", - LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | Digraphs | OpenCL) -diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h -index ced5773f0c..825cf88777 100644 ---- a/include/clang/Sema/Sema.h -+++ b/include/clang/Sema/Sema.h -@@ -8604,19 +8604,22 @@ public: - SourceLocation FuncLoc); - - //===--------------------------------------------------------------------===// -- // OpenCL extensions. -+ // OpenCL extensions and features - // - private: - std::string CurrOpenCLExtension; -+ std::string CurrOpenCLFeature; - /// Extensions required by an OpenCL type. -- llvm::DenseMap> OpenCLTypeExtMap; -+ llvm::DenseMap> OpenCLTypeExtMap; -+ /// Features required by an OpenCL type (since 3.0). -+ llvm::DenseMap> OpenCLTypeFeatureMap; - /// Extensions required by an OpenCL declaration. - llvm::DenseMap> OpenCLDeclExtMap; - public: - llvm::StringRef getCurrentOpenCLExtension() const { - return CurrOpenCLExtension; - } -- -+ llvm::StringRef getCurrentOpenCLFeature() const { return CurrOpenCLFeature; } - /// Check if a function declaration \p FD associates with any - /// extensions present in OpenCLDeclExtMap and if so return the - /// extension(s) name(s). -@@ -8635,6 +8638,10 @@ public: - CurrOpenCLExtension = Ext; - } - -+ void setCurrentOpenCLFeature(llvm::StringRef Feat) { -+ CurrOpenCLFeature = std::string(Feat); -+ } -+ - /// Set OpenCL extensions for a type which can only be used when these - /// OpenCL extensions are enabled. If \p Exts is empty, do nothing. - /// \param Exts A space separated list of OpenCL extensions. -@@ -8651,11 +8658,15 @@ public: - /// empty, do nothing. - void setCurrentOpenCLExtensionForType(QualType T); - -+ void setCurrentOpenCLFeatureForType(QualType T); -+ - /// Set current OpenCL extensions for a declaration which - /// can only be used when these OpenCL extensions are enabled. If current - /// OpenCL extension is empty, do nothing. - void setCurrentOpenCLExtensionForDecl(Decl *FD); - -+ void setCurrentOpenCLFeatureForDecl(Decl *FD); -+ - bool isOpenCLDisabledDecl(Decl *FD); - - /// Check if type \p T corresponding to declaration specifier \p DS -@@ -8670,6 +8681,11 @@ public: - /// \return true if type is disabled. - bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); - -+ bool isSupportedOpenCLOMemoryrdering(int64_t Ordering); -+ -+ bool isSupportedOpenCLMemoryScope(SyncScope Scope); -+ -+ bool checkOpenCLFeatureSupportForBuiltin(CallExpr* Call); - //===--------------------------------------------------------------------===// - // OpenMP directives and clauses. - // -diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp -index 21b6f36e9a..3e4937a958 100644 ---- a/lib/AST/ASTContext.cpp -+++ b/lib/AST/ASTContext.cpp -@@ -1313,7 +1313,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, - ObjCSuperType = QualType(); - - // void * type -- if (LangOpts.OpenCLVersion >= 200) { -+ if (!LangOpts.OpenCLCPlusPlus && -+ Target.getSupportedOpenCLOpts().isAvailableOption( -+ "__opencl_c_generic_address_space")) { - auto Q = VoidTy.getQualifiers(); - Q.setAddressSpace(LangAS::opencl_generic); - VoidPtrTy = getPointerType(getCanonicalType( -diff --git a/lib/Basic/Builtins.cpp b/lib/Basic/Builtins.cpp -index 7e7f67ca87..dd0d4dfa83 100644 ---- a/lib/Basic/Builtins.cpp -+++ b/lib/Basic/Builtins.cpp -@@ -73,11 +73,16 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; - bool OclC2Unsupported = LangOpts.OpenCLVersion != 200 && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; -+ bool OclC2030Unsupported = -+ (LangOpts.OpenCLVersion < 200 && LangOpts.OpenCLVersion != 300 && -+ !LangOpts.OpenCLCPlusPlus) && -+ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCL20_30_LANG; -+ - bool OclCUnsupported = !LangOpts.OpenCL && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); - bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; - return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && -- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && -+ !OclC1Unsupported && !OclC2Unsupported && !OclC2030Unsupported && !OpenMPUnsupported && - !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported; - } - -diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp -index 8b7621d796..199db8be94 100644 ---- a/lib/Basic/TargetInfo.cpp -+++ b/lib/Basic/TargetInfo.cpp -@@ -372,6 +372,11 @@ void TargetInfo::adjust(LangOptions &Opts) { - HalfFormat = &llvm::APFloat::IEEEhalf(); - FloatFormat = &llvm::APFloat::IEEEsingle(); - LongDoubleFormat = &llvm::APFloat::IEEEquad(); -+ // OpenCL features. -+#define OPENCLFEATURE(Feat) \ -+ if (getSupportedOpenCLOpts().isSupportedFeature(#Feat, Opts)) \ -+ getSupportedOpenCLOpts().supportFeature(#Feat); -+#include "clang/Basic/OpenCLFeatures.def" - } - - if (Opts.NewAlignOverride) -diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp -index 3c139d7247..069382cd3f 100644 ---- a/lib/Basic/Targets.cpp -+++ b/lib/Basic/Targets.cpp -@@ -659,6 +659,7 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags, - - Target->setSupportedOpenCLOpts(); - Target->setOpenCLExtensionOpts(); -+ Target->setOpenCLFeatureOpts(); - Target->setMaxAtomicWidth(); - - if (!Target->validateTarget(Diags)) -diff --git a/lib/Basic/Targets/AMDGPU.h b/lib/Basic/Targets/AMDGPU.h -index 926772809a..3eca768ae0 100644 ---- a/lib/Basic/Targets/AMDGPU.h -+++ b/lib/Basic/Targets/AMDGPU.h -@@ -244,31 +244,31 @@ public: - - void setSupportedOpenCLOpts() override { - auto &Opts = getSupportedOpenCLOpts(); -- Opts.support("cl_clang_storage_class_specifiers"); -- Opts.support("cl_khr_icd"); -+ Opts.supportExtension("cl_clang_storage_class_specifiers"); -+ Opts.supportExtension("cl_khr_icd"); - - bool IsAMDGCN = isAMDGCN(getTriple()); - - if (hasFP64()) -- Opts.support("cl_khr_fp64"); -+ Opts.supportExtension("cl_khr_fp64"); - - if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) { -- Opts.support("cl_khr_byte_addressable_store"); -- Opts.support("cl_khr_global_int32_base_atomics"); -- Opts.support("cl_khr_global_int32_extended_atomics"); -- Opts.support("cl_khr_local_int32_base_atomics"); -- Opts.support("cl_khr_local_int32_extended_atomics"); -+ Opts.supportExtension("cl_khr_byte_addressable_store"); -+ Opts.supportExtension("cl_khr_global_int32_base_atomics"); -+ Opts.supportExtension("cl_khr_global_int32_extended_atomics"); -+ Opts.supportExtension("cl_khr_local_int32_base_atomics"); -+ Opts.supportExtension("cl_khr_local_int32_extended_atomics"); - } - - if (IsAMDGCN) { -- Opts.support("cl_khr_fp16"); -- Opts.support("cl_khr_int64_base_atomics"); -- Opts.support("cl_khr_int64_extended_atomics"); -- Opts.support("cl_khr_mipmap_image"); -- Opts.support("cl_khr_subgroups"); -- Opts.support("cl_khr_3d_image_writes"); -- Opts.support("cl_amd_media_ops"); -- Opts.support("cl_amd_media_ops2"); -+ Opts.supportExtension("cl_khr_fp16"); -+ Opts.supportExtension("cl_khr_int64_base_atomics"); -+ Opts.supportExtension("cl_khr_int64_extended_atomics"); -+ Opts.supportExtension("cl_khr_mipmap_image"); -+ Opts.supportExtension("cl_khr_subgroups"); -+ Opts.supportExtension("cl_khr_3d_image_writes"); -+ Opts.supportExtension("cl_amd_media_ops"); -+ Opts.supportExtension("cl_amd_media_ops2"); - } - } - -diff --git a/lib/Basic/Targets/NVPTX.h b/lib/Basic/Targets/NVPTX.h -index 84d466d2f4..5b2d2a08c2 100644 ---- a/lib/Basic/Targets/NVPTX.h -+++ b/lib/Basic/Targets/NVPTX.h -@@ -113,16 +113,16 @@ public: - - void setSupportedOpenCLOpts() override { - auto &Opts = getSupportedOpenCLOpts(); -- Opts.support("cl_clang_storage_class_specifiers"); -- Opts.support("cl_khr_gl_sharing"); -- Opts.support("cl_khr_icd"); -- -- Opts.support("cl_khr_fp64"); -- Opts.support("cl_khr_byte_addressable_store"); -- Opts.support("cl_khr_global_int32_base_atomics"); -- Opts.support("cl_khr_global_int32_extended_atomics"); -- Opts.support("cl_khr_local_int32_base_atomics"); -- Opts.support("cl_khr_local_int32_extended_atomics"); -+ Opts.supportExtension("cl_clang_storage_class_specifiers"); -+ Opts.supportExtension("cl_khr_gl_sharing"); -+ Opts.supportExtension("cl_khr_icd"); -+ -+ Opts.supportExtension("cl_khr_fp64"); -+ Opts.supportExtension("cl_khr_byte_addressable_store"); -+ Opts.supportExtension("cl_khr_global_int32_base_atomics"); -+ Opts.supportExtension("cl_khr_global_int32_extended_atomics"); -+ Opts.supportExtension("cl_khr_local_int32_base_atomics"); -+ Opts.supportExtension("cl_khr_local_int32_extended_atomics"); - } - - CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { -diff --git a/lib/Basic/Targets/SPIR.h b/lib/Basic/Targets/SPIR.h -index e8d92f11a1..54056dfec4 100644 ---- a/lib/Basic/Targets/SPIR.h -+++ b/lib/Basic/Targets/SPIR.h -@@ -96,7 +96,7 @@ public: - void setSupportedOpenCLOpts() override { - // Assume all OpenCL extensions and optional core features are supported - // for SPIR since it is a generic target. -- getSupportedOpenCLOpts().supportAll(); -+ getSupportedOpenCLOpts().supportAllExtensions(); - } - }; - class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { -diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h -index 05930ae9ee..a7ece655eb 100644 ---- a/lib/Basic/Targets/X86.h -+++ b/lib/Basic/Targets/X86.h -@@ -322,7 +322,7 @@ public: - bool hasSjLjLowering() const override { return true; } - - void setSupportedOpenCLOpts() override { -- getSupportedOpenCLOpts().supportAll(); -+ getSupportedOpenCLOpts().supportAllExtensions(); - } - }; - -diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp -index 3e6528c259..bcecfe3f90 100644 ---- a/lib/Frontend/CompilerInvocation.cpp -+++ b/lib/Frontend/CompilerInvocation.cpp -@@ -2138,6 +2138,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, - Opts.OpenCLVersion = 120; - else if (LangStd == LangStandard::lang_opencl20) - Opts.OpenCLVersion = 200; -+ else if (LangStd == LangStandard::lang_opencl30) -+ Opts.OpenCLVersion = 300; - else if (LangStd == LangStandard::lang_openclcpp) - Opts.OpenCLCPlusPlusVersion = 100; - -@@ -2342,6 +2344,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, - .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) - .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) - .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) -+ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) - .Case("c++", LangStandard::lang_openclcpp) - .Default(LangStandard::lang_unspecified); - -@@ -2581,7 +2584,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, - Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); - Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); - Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL -- && Opts.OpenCLVersion == 200); -+ && Opts.OpenCLVersion >= 200); - Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); - Opts.CoroutinesTS = Args.hasArg(OPT_fcoroutines_ts); - -@@ -3212,6 +3215,8 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args, - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - Opts.Triple = llvm::Triple::normalize(Opts.Triple); - Opts.OpenCLExtensionsAsWritten = Args.getAllArgValues(OPT_cl_ext_EQ); -+ Opts.OpenCLFeaturesAsWritten = Args.getAllArgValues(OPT_cl_feature_EQ); -+ - Opts.ForceEnableInt128 = Args.hasArg(OPT_fforce_enable_int128); - Opts.NVPTXUseShortPointers = Args.hasFlag( - options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false); -diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp -index 4cde22ce9a..c0a844a833 100644 ---- a/lib/Frontend/InitPreprocessor.cpp -+++ b/lib/Frontend/InitPreprocessor.cpp -@@ -435,6 +435,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, - case 200: - Builder.defineMacro("__OPENCL_C_VERSION__", "200"); - break; -+ case 300: -+ Builder.defineMacro("__OPENCL_C_VERSION__", "300"); -+ break; - default: - llvm_unreachable("Unsupported OpenCL version"); - } -@@ -1064,6 +1067,12 @@ static void InitializePredefinedMacros(const TargetInfo &TI, - Builder.defineMacro(#Ext); - #include "clang/Basic/OpenCLExtensions.def" - -+ // OpenCL features. -+#define OPENCLFEATURE(Feat) \ -+ if (TI.getSupportedOpenCLOpts().isSupportedFeature(#Feat, LangOpts)) \ -+ Builder.defineMacro(#Feat); -+#include "clang/Basic/OpenCLFeatures.def" -+ - auto Arch = TI.getTriple().getArch(); - if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64) - Builder.defineMacro("__IMAGE_SUPPORT__"); -diff --git a/lib/Headers/opencl-c.h b/lib/Headers/opencl-c.h -index 514c710c11..3992163a77 100644 ---- a/lib/Headers/opencl-c.h -+++ b/lib/Headers/opencl-c.h -@@ -7603,7 +7603,7 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); - * Returns fmin(x - floor (x), 0x1.fffffep-1f ). - * floor(x) is returned in iptr. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld fract(float x, float *iptr); - float2 __ovld fract(float2 x, float2 *iptr); - float3 __ovld fract(float3 x, float3 *iptr); -@@ -7685,7 +7685,7 @@ half4 __ovld fract(half4 x, __private half4 *iptr); - half8 __ovld fract(half8 x, __private half8 *iptr); - half16 __ovld fract(half16 x, __private half16 *iptr); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Extract mantissa and exponent from x. For each -@@ -7693,7 +7693,7 @@ half16 __ovld fract(half16 x, __private half16 *iptr); - * magnitude in the interval [1/2, 1) or 0. Each - * component of x equals mantissa returned * 2^exp. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld frexp(float x, int *exp); - float2 __ovld frexp(float2 x, int2 *exp); - float3 __ovld frexp(float3 x, int3 *exp); -@@ -7775,7 +7775,7 @@ half4 __ovld frexp(half4 x, __private int4 *exp); - half8 __ovld frexp(half8 x, __private int8 *exp); - half16 __ovld frexp(half16 x, __private int16 *exp); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Compute the value of the square root of x^2 + y^2 -@@ -7900,7 +7900,7 @@ half8 __ovld __cnfn lgamma(half8 x); - half16 __ovld __cnfn lgamma(half16 x); - #endif //cl_khr_fp16 - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld lgamma_r(float x, int *signp); - float2 __ovld lgamma_r(float2 x, int2 *signp); - float3 __ovld lgamma_r(float3 x, int3 *signp); -@@ -7982,7 +7982,7 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); - half8 __ovld lgamma_r(half8 x, __private int8 *signp); - half16 __ovld lgamma_r(half16 x, __private int16 *signp); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Compute natural logarithm. -@@ -8206,7 +8206,7 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); - * the argument. It stores the integral part in the object - * pointed to by iptr. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld modf(float x, float *iptr); - float2 __ovld modf(float2 x, float2 *iptr); - float3 __ovld modf(float3 x, float3 *iptr); -@@ -8288,7 +8288,7 @@ half4 __ovld modf(half4 x, __private half4 *iptr); - half8 __ovld modf(half8 x, __private half8 *iptr); - half16 __ovld modf(half16 x, __private half16 *iptr); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Returns a quiet NaN. The nancode may be placed -@@ -8466,7 +8466,7 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); - * sign as x/y. It stores this signed value in the object - * pointed to by quo. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld remquo(float x, float y, int *quo); - float2 __ovld remquo(float2 x, float2 y, int2 *quo); - float3 __ovld remquo(float3 x, float3 y, int3 *quo); -@@ -8549,7 +8549,7 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); - half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); - half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - /** - * Round to integral value (using round to nearest - * even rounding mode) in floating-point format. -@@ -8690,7 +8690,7 @@ half16 __ovld __cnfn sin(half16); - * is the return value and computed cosine is returned - * in cosval. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - float __ovld sincos(float x, float *cosval); - float2 __ovld sincos(float2 x, float2 *cosval); - float3 __ovld sincos(float3 x, float3 *cosval); -@@ -8772,7 +8772,7 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); - half8 __ovld sincos(half8 x, __private half8 *cosval); - half16 __ovld sincos(half16 x, __private half16 *cosval); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Compute hyperbolic sine. -@@ -11617,7 +11617,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); - half16 __ovld vload16(size_t offset, const __constant half *p); - #endif //cl_khr_fp16 - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - char2 __ovld vload2(size_t offset, const char *p); - uchar2 __ovld vload2(size_t offset, const uchar *p); - short2 __ovld vload2(size_t offset, const short *p); -@@ -11855,9 +11855,9 @@ half4 __ovld vload4(size_t offset, const __private half *p); - half8 __ovld vload8(size_t offset, const __private half *p); - half16 __ovld vload16(size_t offset, const __private half *p); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - void __ovld vstore2(char2 data, size_t offset, char *p); - void __ovld vstore2(uchar2 data, size_t offset, uchar *p); - void __ovld vstore2(short2 data, size_t offset, short *p); -@@ -12091,7 +12091,7 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); - void __ovld vstore8(half8 data, size_t offset, __private half *p); - void __ovld vstore16(half16 data, size_t offset, __private half *p); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * Read sizeof (half) bytes of data from address -@@ -12158,7 +12158,7 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); - * The default current rounding mode is round to - * nearest even. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - void __ovld vstore_half(float data, size_t offset, half *p); - void __ovld vstore_half_rte(float data, size_t offset, half *p); - void __ovld vstore_half_rtz(float data, size_t offset, half *p); -@@ -12204,7 +12204,7 @@ void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); - void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); - void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); - #endif //cl_khr_fp64 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * The floatn value given by data is converted to -@@ -12217,7 +12217,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); - * The default current rounding mode is round to - * nearest even. - */ --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_generic_address_space - void __ovld vstore_half2(float2 data, size_t offset, half *p); - void __ovld vstore_half3(float3 data, size_t offset, half *p); - void __ovld vstore_half4(float4 data, size_t offset, half *p); -@@ -12423,7 +12423,7 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); - void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); - void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); - #endif //cl_khr_fp64 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_generic_address_space - - /** - * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) -@@ -14802,7 +14802,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); - #endif //cl_khr_fp16 - - // Image read functions for read_write images --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); - int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); - uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); -@@ -14845,7 +14845,7 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co - float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - #ifdef cl_khr_mipmap_image - float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); - int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); -@@ -14919,7 +14919,7 @@ float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler - int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); - uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); - #endif //cl_khr_mipmap_image --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - // Image read functions returning half4 type - #ifdef cl_khr_fp16 -@@ -14930,7 +14930,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); - half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); - half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Write color value to location specified by coordinate -@@ -15019,7 +15019,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo - void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); - void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); - void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); - void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); -@@ -15052,11 +15052,11 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in - void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); - void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); - void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); - void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); --#endif //cl_khr_3d_image_writes -+#endif //defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - - #endif //defined(cl_khr_mipmap_image_writes) - #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -@@ -15065,7 +15065,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 - #ifdef cl_khr_fp16 - void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); - void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); - #endif - void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); -@@ -15074,7 +15074,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col - #endif //cl_khr_fp16 - - // Image write functions for read_write images --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); - void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); - void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); -@@ -15106,7 +15106,7 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col - void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color); - #endif //cl_khr_depth_images - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - #if defined(cl_khr_mipmap_image_writes) - void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color); - void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color); -@@ -15133,8 +15133,8 @@ void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 c - void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); - #endif //cl_khr_3d_image_writes - --#endif //cl_khr_mipmap_image_writes --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //defined(cl_khr_mipmap_image_writes) -+#endif //__opencl_c_read_write_images - - // Image write functions for half4 type - #ifdef cl_khr_fp16 -@@ -15147,7 +15147,7 @@ void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 col - void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); - void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); - #endif //cl_khr_fp16 --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have - // access qualifier, which by default assume read_only access qualifier. Image query builtin -@@ -15179,7 +15179,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); - int __ovld __cnfn get_image_width(write_only image1d_t image); - int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_width(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_width(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_width(write_only image1d_array_t image); -@@ -15195,7 +15195,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image); - int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_width(read_write image1d_t image); - int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_width(read_write image2d_t image); -@@ -15212,7 +15212,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); - int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the image height in pixels. -@@ -15232,7 +15232,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - - int __ovld __cnfn get_image_height(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_height(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_height(write_only image2d_array_t image); -@@ -15247,7 +15247,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image); - int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_height(read_write image2d_t image); - int __ovld __cnfn get_image_height(read_write image3d_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_t image); -@@ -15261,18 +15261,18 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the image depth in pixels. - */ - int __ovld __cnfn get_image_depth(read_only image3d_t image); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_depth(write_only image3d_t image); - #endif - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_depth(read_write image3d_t image); - #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 - -@@ -15289,13 +15289,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); - - int __ovld get_image_num_mip_levels(write_only image1d_t image); - int __ovld get_image_num_mip_levels(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld get_image_num_mip_levels(write_only image3d_t image); - #endif - -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_mip_levels(read_write image1d_t image); - int __ovld get_image_num_mip_levels(read_write image2d_t image); - int __ovld get_image_num_mip_levels(read_write image3d_t image); -+#endif //__opencl_c_read_write_images - - int __ovld get_image_num_mip_levels(read_only image1d_array_t image); - int __ovld get_image_num_mip_levels(read_only image2d_array_t image); -@@ -15307,10 +15309,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); - int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); - int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); - -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_mip_levels(read_write image1d_array_t image); - int __ovld get_image_num_mip_levels(read_write image2d_array_t image); - int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); - int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); -+#endif - - #endif //cl_khr_mipmap_image - #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -@@ -15374,7 +15378,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth - int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); - int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); -@@ -15390,7 +15394,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im - int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); -@@ -15407,7 +15411,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im - int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the image channel order. Valid values are: -@@ -15470,7 +15474,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i - int __ovld __cnfn get_image_channel_order(write_only image1d_t image); - int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_channel_order(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_channel_order(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); -@@ -15486,7 +15490,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image) - int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_channel_order(read_write image1d_t image); - int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_channel_order(read_write image2d_t image); -@@ -15503,7 +15507,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) - int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the 2D image width and height as an int2 -@@ -15536,7 +15540,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image); - int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int2 __ovld __cnfn get_image_dim(read_write image2d_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); - #ifdef cl_khr_depth_images -@@ -15549,7 +15553,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the 3D image width, height, and depth as an -@@ -15558,12 +15562,12 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); - * component and the w component is 0. - */ - int4 __ovld __cnfn get_image_dim(read_only image3d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) && defined(__opencl_c_3d_image_writes) - int4 __ovld __cnfn get_image_dim(write_only image3d_t image); - #endif --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int4 __ovld __cnfn get_image_dim(read_write image3d_t image); --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the image array size. -@@ -15589,7 +15593,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_ - size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array); - #endif //cl_khr_gl_msaa_sharing - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); - #ifdef cl_khr_depth_images -@@ -15599,7 +15603,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); - #endif //cl_khr_gl_msaa_sharing --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - - /** - * Return the number of samples associated with image -@@ -15617,18 +15621,18 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); - int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); - int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_samples(read_write image2d_msaa_t image); - int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); - int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); - int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); - int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_read_write_images - #endif - - // OpenCL v2.0 s6.13.15 - Work-group Functions - --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_work_group_collective_functions - int __ovld __conv work_group_all(int predicate); - int __ovld __conv work_group_any(int predicate); - -@@ -15726,17 +15730,17 @@ double __ovld __conv work_group_scan_inclusive_min(double x); - double __ovld __conv work_group_scan_inclusive_max(double x); - #endif //cl_khr_fp64 - --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_work_group_collective_functions - - // OpenCL v2.0 s6.13.16 - Pipe Functions --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_pipes - #define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) - bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_pipes - - - // OpenCL v2.0 s6.13.17 - Enqueue Kernels --#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#ifdef __opencl_c_device_enqueue - - #define CL_COMPLETE 0x0 - #define CL_RUNNING 0x1 -@@ -15803,10 +15807,11 @@ bool __ovld is_valid_event (clk_event_t event); - void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); - - queue_t __ovld get_default_queue(void); --#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -+#endif //__opencl_c_device_enqueue - - // OpenCL Extension v2.0 s9.17 - Sub-groups - -+#ifdef __opencl_c_subgroups - #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) - // Shared Sub Group Functions - uint __ovld get_sub_group_size(void); -@@ -16011,12 +16016,12 @@ uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, in - uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); - uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); - uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); - uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); - uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); - uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); -@@ -16028,12 +16033,12 @@ void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, i - void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); - void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); - void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); - void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); - void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); - void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data ); -@@ -16151,12 +16156,12 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t ima - uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); - uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); - uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); - uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); - uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); - uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p ); -@@ -16168,12 +16173,12 @@ void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t im - void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); - void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); - void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); - void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); - void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); - void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data ); -@@ -16185,12 +16190,12 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t im - ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); - ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); - ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); - ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); - ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); - ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p ); -@@ -16202,18 +16207,19 @@ void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t i - void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); - void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); - --#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); - void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); - void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); - void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); --#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_read_write_images - - void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); - void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data ); - void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data ); - void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data ); - #endif // cl_intel_subgroups_short -+#endif // __opencl_c_subgroups - - #ifdef cl_intel_device_side_avc_motion_estimation - #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin -diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp -index 298a2bad56..43134f3301 100644 ---- a/lib/Parse/ParseDecl.cpp -+++ b/lib/Parse/ParseDecl.cpp -@@ -3807,8 +3807,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, - case tok::kw___generic: - // generic address space is introduced only in OpenCL v2.0 - // see OpenCL C Spec v2.0 s6.5.5 -- if (Actions.getLangOpts().OpenCLVersion < 200 && -- !Actions.getLangOpts().OpenCLCPlusPlus) { -+ if (!Actions.getOpenCLOptions().isAvailableOption( -+ "__opencl_c_generic_address_space")) { - DiagID = diag::err_opencl_unknown_type_specifier; - PrevSpec = Tok.getIdentifierInfo()->getNameStart(); - isInvalid = true; -diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp -index 7e9b1011e8..46ee375624 100644 ---- a/lib/Parse/ParsePragma.cpp -+++ b/lib/Parse/ParsePragma.cpp -@@ -692,21 +692,21 @@ void Parser::HandlePragmaOpenCLExtension() { - // behavior is set to disable." - if (Name == "all") { - if (State == Disable) { -- Opt.disableAll(); -- Opt.enableSupportedCore(getLangOpts()); -+ Opt.disableAllExtensions(); -+ Opt.enableSupportedCoreExtensions(getLangOpts()); - } else { - PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; - } - } else if (State == Begin) { -- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { -- Opt.support(Name); -+ if (!Opt.isKnownExtension(Name) || !Opt.isSupported(Name, getLangOpts())) { -+ Opt.supportExtension(Name); - } - Actions.setCurrentOpenCLExtension(Name); - } else if (State == End) { - if (Name != Actions.getCurrentOpenCLExtension()) - PP.Diag(NameLoc, diag::warn_pragma_begin_end_mismatch); - Actions.setCurrentOpenCLExtension(""); -- } else if (!Opt.isKnown(Name)) -+ } else if (!Opt.isKnownExtension(Name)) - PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; - else if (Opt.isSupportedExtension(Name, getLangOpts())) - Opt.enable(Name, State == Enable); -diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp -index 8b002dac13..ce70e5b85a 100644 ---- a/lib/Sema/DeclSpec.cpp -+++ b/lib/Sema/DeclSpec.cpp -@@ -590,7 +590,7 @@ bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc, - // specifiers are not supported." - // OpenCL C++ v1.0 s2.9 restricts register. - if (S.getLangOpts().OpenCL && -- !S.getOpenCLOptions().isEnabled("cl_clang_storage_class_specifiers")) { -+ !S.getOpenCLOptions().isAvailableOption("cl_clang_storage_class_specifiers")) { - switch (SC) { - case SCS_extern: - case SCS_private_extern: -diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp -index 9d33ec5190..50c3fd6f44 100644 ---- a/lib/Sema/Sema.cpp -+++ b/lib/Sema/Sema.cpp -@@ -111,6 +111,7 @@ public: - } - }; - -+ - } // end namespace sema - } // end namespace clang - -@@ -258,13 +259,17 @@ void Sema::Initialize() { - if (getLangOpts().OpenCL) { - getOpenCLOptions().addSupport( - Context.getTargetInfo().getSupportedOpenCLOpts()); -- getOpenCLOptions().enableSupportedCore(getLangOpts()); -+ getOpenCLOptions().enableSupportedCoreExtensions(getLangOpts()); -+ getOpenCLOptions().supportCoreFeatures(getLangOpts()); - addImplicitTypedef("sampler_t", Context.OCLSamplerTy); - addImplicitTypedef("event_t", Context.OCLEventTy); - if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { - addImplicitTypedef("clk_event_t", Context.OCLClkEventTy); -+ setOpenCLExtensionForType(Context.OCLClkEventTy, "__opencl_c_device_enqueue"); - addImplicitTypedef("queue_t", Context.OCLQueueTy); -+ setOpenCLExtensionForType(Context.OCLQueueTy, "__opencl_c_device_enqueue"); - addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy); -+ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); - addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy)); - addImplicitTypedef("atomic_uint", - Context.getAtomicType(Context.UnsignedIntTy)); -@@ -1915,12 +1920,25 @@ void Sema::setCurrentOpenCLExtensionForType(QualType T) { - setOpenCLExtensionForType(T, CurrOpenCLExtension); - } - -+void Sema::setCurrentOpenCLFeatureForType(QualType T) { -+ if (CurrOpenCLFeature.empty()) -+ return; -+ setOpenCLExtensionForType(T, CurrOpenCLFeature); -+} -+ - void Sema::setCurrentOpenCLExtensionForDecl(Decl *D) { - if (CurrOpenCLExtension.empty()) - return; - setOpenCLExtensionForDecl(D, CurrOpenCLExtension); - } - -+void Sema::setCurrentOpenCLFeatureForDecl(Decl *D) { -+ if (CurrOpenCLFeature.empty()) -+ return; -+ setOpenCLExtensionForDecl(D, CurrOpenCLFeature); -+} -+ -+ - std::string Sema::getOpenCLExtensionsFromDeclExtMap(FunctionDecl *FD) { - if (!OpenCLDeclExtMap.empty()) - return getOpenCLExtensionsFromExtMap(FD, OpenCLDeclExtMap); -@@ -1954,7 +1972,7 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { - if (Loc == OpenCLDeclExtMap.end()) - return false; - for (auto &I : Loc->second) { -- if (!getOpenCLOptions().isEnabled(I)) -+ if (!getOpenCLOptions().isAvailableOption(I)) - return true; - } - return false; -@@ -1970,11 +1988,18 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, - return false; - bool Disabled = false; - for (auto &I : Loc->second) { -- if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { -- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo -- << I << SrcRange; -+ if (getOpenCLOptions().isKnownExtension(I) && I != CurrOpenCLExtension && -+ !getOpenCLOptions().isAvailableOption(I)) { -+ Diag(DiagLoc, diag::err_opencl_requires_extension) -+ << Selector << DiagInfo << I << SrcRange; -+ Disabled = true; -+ } else if (getOpenCLOptions().isKnownFeature(I) && I != CurrOpenCLFeature && -+ !getOpenCLOptions().isAvailableOption(I)) { -+ Diag(DiagLoc, diag::err_opencl_requires_feature) -+ << Selector << DiagInfo << I << SrcRange; - Disabled = true; - } -+ - } - return Disabled; - } -diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp -index 0b4645e11c..82d3972a6a 100644 ---- a/lib/Sema/SemaCast.cpp -+++ b/lib/Sema/SemaCast.cpp -@@ -2644,7 +2644,7 @@ void CastOperation::CheckCStyleCast() { - } - - if (Self.getLangOpts().OpenCL && -- !Self.getOpenCLOptions().isEnabled("cl_khr_fp16")) { -+ !Self.getOpenCLOptions().isAvailableOption("cl_khr_fp16")) { - if (DestType->isHalfType()) { - Self.Diag(SrcExpr.get()->getBeginLoc(), diag::err_opencl_cast_to_half) - << DestType << SrcExpr.get()->getSourceRange(); -diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp -index b2c727b5c4..5ca92d5568 100644 ---- a/lib/Sema/SemaChecking.cpp -+++ b/lib/Sema/SemaChecking.cpp -@@ -402,15 +402,23 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { - } - - static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { -- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { -- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) -- << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; -- return true; -- } -- return false; -+ bool ExtensionEnabled = -+ S.getOpenCLOptions().isAvailableOption("cl_khr_subgroups"); -+ bool FeatureSupported = -+ S.getOpenCLOptions().isAvailableOption("__opencl_c_subgroups"); -+ if (ExtensionEnabled || FeatureSupported) -+ return false; -+ S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) -+ << 1 << Call->getDirectCallee() -+ << (!ExtensionEnabled ? "cl_khr_subgroups" : "__opencl_c_subgroups"); -+ return true; - } - - static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { -+ -+ if (!S.checkOpenCLFeatureSupportForBuiltin(TheCall)) -+ return true; -+ - if (checkArgCount(S, TheCall, 2)) - return true; - -@@ -438,6 +446,10 @@ static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { - /// get_kernel_work_group_size - /// and get_kernel_preferred_work_group_size_multiple builtin functions. - static bool SemaOpenCLBuiltinKernelWorkGroupSize(Sema &S, CallExpr *TheCall) { -+ -+ if (!S.checkOpenCLFeatureSupportForBuiltin(TheCall)) -+ return true; -+ - if (checkArgCount(S, TheCall, 1)) - return true; - -@@ -516,6 +528,9 @@ static bool checkOpenCLEnqueueVariadicArgs(Sema &S, CallExpr *TheCall, - static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) { - unsigned NumArgs = TheCall->getNumArgs(); - -+ if (!S.checkOpenCLFeatureSupportForBuiltin(TheCall)) -+ return true; -+ - if (NumArgs < 4) { - S.Diag(TheCall->getBeginLoc(), diag::err_typecheck_call_too_few_args); - return true; -@@ -706,11 +721,69 @@ static bool checkOpenCLPipePacketType(Sema &S, CallExpr *Call, unsigned Idx) { - return false; - } - -+bool Sema::checkOpenCLFeatureSupportForBuiltin(CallExpr *Call) { -+ unsigned DiagID = 0; -+ bool IsValid = true; -+ switch (Call->getDirectCallee()->getBuiltinID()) { -+ // OpenCL v3.0 s6.13.16 - Pipe Functions require support for OpenCL C 2.0 -+ // or the __opencl_c_pipes feature macro -+ case Builtin::BIread_pipe: -+ case Builtin::BIwrite_pipe: -+ case Builtin::BIreserve_read_pipe: -+ case Builtin::BIreserve_write_pipe: -+ case Builtin::BIwork_group_reserve_read_pipe: -+ case Builtin::BIwork_group_reserve_write_pipe: -+ case Builtin::BIsub_group_reserve_read_pipe: -+ case Builtin::BIsub_group_reserve_write_pipe: -+ case Builtin::BIcommit_read_pipe: -+ case Builtin::BIcommit_write_pipe: -+ case Builtin::BIwork_group_commit_read_pipe: -+ case Builtin::BIwork_group_commit_write_pipe: -+ case Builtin::BIsub_group_commit_read_pipe: -+ case Builtin::BIsub_group_commit_write_pipe: -+ case Builtin::BIget_pipe_num_packets: -+ case Builtin::BIget_pipe_max_packets: -+ DiagID = diag::err_opencl_builtin_pipe_requires_feature; -+ IsValid = OpenCLFeatures.isAvailableOption("__opencl_c_pipes"); -+ break; -+ case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: -+ case Builtin::BIget_kernel_sub_group_count_for_ndrange: -+ DiagID = diag::err_opencl_builtin_subgroup_query_requires_feature; -+ IsValid = OpenCLFeatures.isAvailableOption("__opencl_c_device_enqueue"); -+ break; -+ case Builtin::BIget_kernel_work_group_size: -+ case Builtin::BIget_kernel_preferred_work_group_size_multiple: -+ DiagID = diag::err_opencl_builtin_subgroup_query_requires_feature; -+ IsValid = OpenCLFeatures.isAvailableOption("__opencl_c_device_enqueue"); -+ break; -+ case Builtin::BIenqueue_kernel: -+ DiagID = diag::err_opencl_builtin_enqueue_requires_feature; -+ IsValid = OpenCLFeatures.isAvailableOption("__opencl_c_device_enqueue"); -+ break; -+ case Builtin::BIto_global: -+ case Builtin::BIto_local: -+ case Builtin::BIto_private: -+ DiagID = diag::err_opencl_builtin_address_space_requires_feature; -+ IsValid = OpenCLFeatures.isAvailableOption("__opencl_c_generic_address_space"); -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (!IsValid) -+ Diag(Call->getBeginLoc(), DiagID) << Call->getDirectCallee(); -+ -+ return IsValid; -+} -+ - // Performs semantic analysis for the read/write_pipe call. - // \param S Reference to the semantic analyzer. - // \param Call A pointer to the builtin call. - // \return True if a semantic error has been found, false otherwise. - static bool SemaBuiltinRWPipe(Sema &S, CallExpr *Call) { -+ if (!S.checkOpenCLFeatureSupportForBuiltin(Call)) -+ return true; - // OpenCL v2.0 s6.13.16.2 - The built-in read/write - // functions have two forms. - switch (Call->getNumArgs()) { -@@ -766,6 +839,9 @@ static bool SemaBuiltinRWPipe(Sema &S, CallExpr *Call) { - // \param Call The call to the builtin function to be analyzed. - // \return True if a semantic error was found, false otherwise. - static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) { -+ if (!S.checkOpenCLFeatureSupportForBuiltin(Call)) -+ return true; -+ - if (checkArgCount(S, Call, 2)) - return true; - -@@ -795,6 +871,9 @@ static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) { - // \param Call The call to the builtin function to be analyzed. - // \return True if a semantic error was found, false otherwise. - static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) { -+ if (!S.checkOpenCLFeatureSupportForBuiltin(Call)) -+ return true; -+ - if (checkArgCount(S, Call, 2)) - return true; - -@@ -818,6 +897,9 @@ static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) { - // \param Call The call to the builtin function to be analyzed. - // \return True if a semantic error was found, false otherwise. - static bool SemaBuiltinPipePackets(Sema &S, CallExpr *Call) { -+ if (!S.checkOpenCLFeatureSupportForBuiltin(Call)) -+ return true; -+ - if (checkArgCount(S, Call, 1)) - return true; - -@@ -838,6 +920,9 @@ static bool SemaBuiltinPipePackets(Sema &S, CallExpr *Call) { - // \return True if a semantic error has been found, false otherwise. - static bool SemaOpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID, - CallExpr *Call) { -+ if (!S.checkOpenCLFeatureSupportForBuiltin(Call)) -+ return true; -+ - if (Call->getNumArgs() != 1) { - S.Diag(Call->getBeginLoc(), diag::err_opencl_builtin_to_addr_arg_num) - << Call->getDirectCallee() << Call->getSourceRange(); -@@ -4344,6 +4429,39 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { - } - } - -+bool Sema::isSupportedOpenCLOMemoryrdering(int64_t Ordering) { -+ assert(llvm::isValidAtomicOrderingCABI(Ordering)); -+ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; -+ switch (OrderingCABI) { -+ case llvm::AtomicOrderingCABI::acquire: -+ case llvm::AtomicOrderingCABI::release: -+ case llvm::AtomicOrderingCABI::acq_rel: -+ return OpenCLFeatures.isAvailableOption("__opencl_c_atomic_order_acq_rel"); -+ case llvm::AtomicOrderingCABI::seq_cst: -+ return OpenCLFeatures.isAvailableOption("__opencl_c_atomic_order_seq_cst"); -+ -+ default: -+ return true; -+ } -+} -+ -+bool Sema::isSupportedOpenCLMemoryScope(SyncScope Scope) { -+ switch (Scope) { -+ case SyncScope::OpenCLDevice: -+ return OpenCLFeatures.isAvailableOption( -+ "__opencl_c_atomic_scope_device"); -+ case SyncScope::OpenCLAllSVMDevices: -+ return OpenCLFeatures.isAvailableOption( -+ "__opencl_c_atomic_scope_all_devices"); -+ case SyncScope::OpenCLSubGroup: -+ return OpenCLFeatures.isAvailableOption( -+ "__opencl_c_subgroups"); -+ -+ default: -+ return true; -+ } -+} -+ - ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, - AtomicExpr::AtomicOp Op) { - CallExpr *TheCall = cast(TheCallResult.get()); -@@ -4740,21 +4858,36 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, - - if (SubExprs.size() >= 2 && Form != Init) { - llvm::APSInt Result(32); -- if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && -- !isValidOrderingForOp(Result.getSExtValue(), Op)) -- Diag(SubExprs[1]->getBeginLoc(), -- diag::warn_atomic_op_has_invalid_memory_order) -- << SubExprs[1]->getSourceRange(); -+ if (SubExprs[1]->isIntegerConstantExpr(Result, Context)) { -+ if (!isValidOrderingForOp(Result.getSExtValue(), Op)) -+ Diag(SubExprs[1]->getBeginLoc(), -+ diag::warn_atomic_op_has_invalid_memory_order) -+ << SubExprs[1]->getSourceRange(); -+ else if (IsOpenCL && -+ !isSupportedOpenCLOMemoryrdering(Result.getSExtValue())) { -+ Diag(SubExprs[1]->getBeginLoc(), -+ diag::err_opencl_unsupported_memory_order) -+ << SubExprs[1]->getSourceRange(); -+ return ExprError(); -+ } -+ } - } - - if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { - auto *Scope = TheCall->getArg(TheCall->getNumArgs() - 1); - llvm::APSInt Result(32); -- if (Scope->isIntegerConstantExpr(Result, Context) && -- !ScopeModel->isValid(Result.getZExtValue())) { -- Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope) -- << Scope->getSourceRange(); -+ if (Scope->isIntegerConstantExpr(Result, Context)) { -+ if (!ScopeModel->isValid(Result.getZExtValue())) { -+ Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope) -+ << Scope->getSourceRange(); -+ } else if (IsOpenCL && !isSupportedOpenCLMemoryScope( -+ ScopeModel->map(Result.getZExtValue()))) { -+ Diag(Scope->getBeginLoc(), diag::err_opencl_unsupported_memory_scope) -+ << Scope->getSourceRange(); -+ return ExprError(); -+ } - } -+ - SubExprs.push_back(Scope); - } - -diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp -index 7ddf2e88b7..4578d15ea2 100644 ---- a/lib/Sema/SemaDecl.cpp -+++ b/lib/Sema/SemaDecl.cpp -@@ -5182,8 +5182,10 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { - Dcl && Dcl->getDeclContext()->isFileContext()) - Dcl->setTopLevelDeclInObjCContainer(); - -- if (getLangOpts().OpenCL) -+ if (getLangOpts().OpenCL) { - setCurrentOpenCLExtensionForDecl(Dcl); -+ setCurrentOpenCLFeatureForDecl(Dcl); -+ } - - return Dcl; - } -@@ -6315,7 +6317,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( - NR = NR->getPointeeType(); - } - -- if (!getOpenCLOptions().isEnabled("cl_khr_fp16")) { -+ if (!getOpenCLOptions().isAvailableOption("cl_khr_fp16")) { - // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and - // half array type (unless the cl_khr_fp16 extension is enabled). - if (Context.getBaseElementType(R)->isHalfType()) { -@@ -7318,7 +7320,8 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { - // OpenCL v1.2 s6.8 - The static qualifier is valid only in program - // scope. - if (getLangOpts().OpenCLVersion == 120 && -- !getOpenCLOptions().isEnabled("cl_clang_storage_class_specifiers") && -+ !getOpenCLOptions().isAvailableOption( -+ "cl_clang_storage_class_specifiers") && - NewVD->isStaticLocal()) { - Diag(NewVD->getLocation(), diag::err_static_function_scope); - NewVD->setInvalidDecl(); -@@ -7333,6 +7336,12 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { - } - - if (T->isBlockPointerType()) { -+ if (!OpenCLFeatures.isAvailableOption("__opencl_c_device_enqueue")) { -+ Diag(NewVD->getLocation(), -+ diag::err_opencl_blocks_support_requires_feature) -+ << 1; -+ return; -+ } - // OpenCL v2.0 s6.12.5 - Any block declaration must be const qualified and - // can't use 'extern' storage class. - if (!T.isConstQualified()) { -@@ -7360,10 +7369,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { - if (!T->isSamplerT() && - !(T.getAddressSpace() == LangAS::opencl_constant || - (T.getAddressSpace() == LangAS::opencl_global && -- (getLangOpts().OpenCLVersion == 200 || -- getLangOpts().OpenCLCPlusPlus)))) { -+ (OpenCLFeatures.isAvailableOption( -+ "__opencl_c_program_scope_global_variables"))))) { - int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; -- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) -+ if (OpenCLFeatures.isAvailableOption( -+ "__opencl_c_program_scope_global_variables")) - Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) - << Scope << "global or constant"; - else -@@ -8085,7 +8095,8 @@ static OpenCLParamType getOpenCLKernelParameterType(Sema &S, QualType PT) { - // OpenCL extension spec v1.2 s9.5: - // This extension adds support for half scalar and vector types as built-in - // types that can be used for arithmetic operations, conversions etc. -- if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16") && PT->isHalfType()) -+ if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16") && -+ PT->isHalfType()) - return InvalidKernelParam; - - if (PT->isRecordType()) -diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp -index ff9393a56b..27776bebfc 100644 ---- a/lib/Sema/SemaExpr.cpp -+++ b/lib/Sema/SemaExpr.cpp -@@ -583,8 +583,8 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { - return E; - - // OpenCL usually rejects direct accesses to values of 'half' type. -- if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && -- T->isHalfType()) { -+ if (getLangOpts().OpenCL && -+ !getOpenCLOptions().isAvailableOption("cl_khr_fp16") && T->isHalfType()) { - Diag(E->getExprLoc(), diag::err_opencl_half_load_store) - << 0 << T; - return ExprError(); -@@ -746,7 +746,7 @@ ExprResult Sema::DefaultArgumentPromotion(Expr *E) { - if (BTy && (BTy->getKind() == BuiltinType::Half || - BTy->getKind() == BuiltinType::Float)) { - if (getLangOpts().OpenCL && -- !getOpenCLOptions().isEnabled("cl_khr_fp64")) { -+ !getOpenCLOptions().isAvailableOption("cl_khr_fp64")) { - if (BTy->getKind() == BuiltinType::Half) { - E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); - } -@@ -3428,7 +3428,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { - } else if (Literal.isFloatingLiteral()) { - QualType Ty; - if (Literal.isHalf){ -- if (getOpenCLOptions().isEnabled("cl_khr_fp16")) -+ if (getOpenCLOptions().isAvailableOption("cl_khr_fp16")) - Ty = Context.HalfTy; - else { - Diag(Tok.getLocation(), diag::err_half_const_requires_fp16); -@@ -3454,7 +3454,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { - Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); - } - } else if (getLangOpts().OpenCL && -- !getOpenCLOptions().isEnabled("cl_khr_fp64")) { -+ !getOpenCLOptions().isAvailableOption("cl_khr_fp64")) { - // Impose single-precision float type when cl_khr_fp64 is not enabled. - Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64); - Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); -@@ -11367,8 +11367,9 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, - // OpenCL v1.2 s6.1.1.1 p2: - // The half data type can only be used to declare a pointer to a buffer that - // contains half values -- if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && -- LHSType->isHalfType()) { -+ if (getLangOpts().OpenCL && -+ !getOpenCLOptions().isAvailableOption("cl_khr_fp16") && -+ LHSType->isHalfType()) { - Diag(Loc, diag::err_opencl_half_load_store) << 1 - << LHSType.getUnqualifiedType(); - return QualType(); -diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp -index 10c0c6bf33..e90471b9eb 100644 ---- a/lib/Sema/SemaInit.cpp -+++ b/lib/Sema/SemaInit.cpp -@@ -5298,7 +5298,7 @@ static bool TryOCLZeroOpaqueTypeInitialization(Sema &S, - // We should allow zero initialization for all types defined in the - // cl_intel_device_side_avc_motion_estimation extension, except - // intel_sub_group_avc_mce_payload_t and intel_sub_group_avc_mce_result_t. -- if (S.getOpenCLOptions().isEnabled( -+ if (S.getOpenCLOptions().isAvailableOption( - "cl_intel_device_side_avc_motion_estimation") && - DestType->isOCLIntelSubgroupAVCType()) { - if (DestType->isOCLIntelSubgroupAVCMcePayloadType() || -@@ -8087,7 +8087,7 @@ ExprResult InitializationSequence::Perform(Sema &S, - unsigned AddressingMode = (0x0E & SamplerValue) >> 1; - unsigned FilterMode = (0x30 & SamplerValue) >> 4; - if (FilterMode != 1 && FilterMode != 2 && -- !S.getOpenCLOptions().isEnabled( -+ !S.getOpenCLOptions().isAvailableOption( - "cl_intel_device_side_avc_motion_estimation")) - S.Diag(Kind.getLocation(), - diag::warn_sampler_initializer_invalid_bits) -diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp -index 1ae94c8aec..5d4d2e533e 100644 ---- a/lib/Sema/SemaType.cpp -+++ b/lib/Sema/SemaType.cpp -@@ -1924,6 +1924,7 @@ static bool checkQualifiedFunction(Sema &S, QualType T, SourceLocation Loc, - return true; - } - -+ - /// Build a pointer type. - /// - /// \param T The type to which we'll be building a pointer. -@@ -4536,7 +4537,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, - // FIXME: This really should be in BuildFunctionType. - if (T->isHalfType()) { - if (S.getLangOpts().OpenCL) { -- if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { -+ if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16")) { - S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return) - << T << 0 /*pointer hint*/; - D.setInvalidType(true); -@@ -4748,7 +4749,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, - // Disallow half FP parameters. - // FIXME: This really should be in BuildFunctionType. - if (S.getLangOpts().OpenCL) { -- if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { -+ if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16")) { - S.Diag(Param->getLocation(), - diag::err_opencl_half_param) << ParamTy; - D.setInvalidType(); -@@ -7260,7 +7261,7 @@ static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State, - // The default address space name for arguments to a function in a - // program, or local variables of a function is __private. All function - // arguments shall be in the __private address space. -- if (State.getSema().getLangOpts().OpenCLVersion <= 120 && -+ if (!State.getSema().getOpenCLOptions().isAvailableOption("__opencl_c_generic_address_space") && - !State.getSema().getLangOpts().OpenCLCPlusPlus) { - ImpAddr = LangAS::opencl_private; - } else { -diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp -index e0b2b24a0d..96a23a063e 100644 ---- a/lib/Serialization/ASTReader.cpp -+++ b/lib/Serialization/ASTReader.cpp -@@ -3318,6 +3318,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { - for (unsigned I = 0, E = Record.size(); I != E; ) { - auto Name = ReadString(Record, I); - auto &Opt = OpenCLExtensions.OptMap[Name]; -+ Opt.OptType = static_cast(Record[I++]); - Opt.Supported = Record[I++] != 0; - Opt.Enabled = Record[I++] != 0; - Opt.Avail = Record[I++]; -diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp -index 37adcb7064..86c3200b60 100644 ---- a/lib/Serialization/ASTWriter.cpp -+++ b/lib/Serialization/ASTWriter.cpp -@@ -4266,6 +4266,7 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) { - for (const auto &I:Opts.OptMap) { - AddString(I.getKey(), Record); - auto V = I.getValue(); -+ Record.push_back(V.OptType); - Record.push_back(V.Supported ? 1 : 0); - Record.push_back(V.Enabled ? 1 : 0); - Record.push_back(V.Avail); -diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl -index 3c8fea2a80..ab6ee3607a 100644 ---- a/test/CodeGenOpenCL/address-spaces.cl -+++ b/test/CodeGenOpenCL/address-spaces.cl -@@ -1,8 +1,12 @@ - // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR - // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR - // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - - // SPIR: %struct.S = type { i32, i32, i32* } -diff --git a/test/CodeGenOpenCL/feature-address-spaces.cl b/test/CodeGenOpenCL/feature-address-spaces.cl -new file mode 100644 -index 0000000000..f9eba4944b ---- /dev/null -+++ b/test/CodeGenOpenCL/feature-address-spaces.cl -@@ -0,0 +1,186 @@ -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -cl-feature=__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=AS-CHECK -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s -+ -+void check(bool); -+ -+void test1(float f, unsigned u, __local half *h_local, __global half *h_global, __private half *h_private) { -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rte(f, u, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rtz(f, u, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half8_rtp(f, u, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half(f, u, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half2(f, u, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f, h_private); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rte(f, u, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rtz(f, u, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half8_rtp(f, u, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half(f, u, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half2(f, u, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(3)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f, h_local); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rte(f, u, h_global); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half_rtz(f, u, h_global); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half8_rtp(f, u, h_global); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half(f, u, h_global); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore_half2(f, u, h_global); -+ // AS-CHECK: {{.+}} = addrspacecast half addrspace(1)* %{{.+}} to half addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f, h_global); -+} -+ -+void test2(float f, float2 f2, __local float *f_local, __global float *f_global, __private float *f_private, __local float2 *f2_local, __global float2 *f2_global, __private float2 *f2_private) { -+ // AS-CHECK: {{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ fract(f, f_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(3)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ fract(f2, f2_local); -+ // AS-CHECK: {{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f, f_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(3)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f2, f2_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(3)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f2, f2_local); -+ // AS-CHECK: {{.+}} = addrspacecast float* %{{.+}} to float addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ fract(f, f_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float>* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ fract(f2, f2_private); -+ // AS-CHECK: {{.+}} = addrspacecast float* %{{.+}} to float addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f, f_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float>* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f2, f2_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float>* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f2, f2_private); -+ fract(f, f_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(1)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ fract(f2, f2_global); -+ // AS-CHECK: {{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f, f_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(1)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ modf(f2, f2_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x float> addrspace(1)* {{.+}} to <2 x float> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ sincos(f2, f2_global); -+ -+} -+ -+void test3(float f, float2 f2, __local int *i_local, __global int *i_global, __private int *i_private, __local int2 *i2_local, __global int2 *i2_global, __private int2 *i2_private) { -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(3)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f, i_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(3)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f2, i2_local); -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(3)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f, i_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(3)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f2, i2_local); -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(3)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f, f, i_local); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(3)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f2, f, i2_local); -+ // AS-CHECK: {{.+}} = addrspacecast i32* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f, i_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32>* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f2, i2_private); -+ // AS-CHECK: {{.+}} = addrspacecast i32* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f, i_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32>* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f2, i2_private); -+ // AS-CHECK: {{.+}} = addrspacecast i32* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f, f, i_private); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32>* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f2, f, i2_private); -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f, i_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(1)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ frexp(f2, i2_global); -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f, i_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(1)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ lgamma_r(f2, i2_global); -+ // AS-CHECK: {{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i32 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f, f, i_global); -+ // AS-CHECK: {{.+}} = addrspacecast <2 x i32> addrspace(1)* {{.+}} to <2 x i32> addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ remquo(f2, f, i2_global); -+} -+ -+void test4(unsigned u, __local char *c_local, __global char *c_global, __private char *c_private) { -+ // AS-CHECK: {{.+}} = addrspacecast i8 addrspace(3)* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ char2 c2 = vload2(u, c_local); -+ // AS-CHECK: {{.+}} = addrspacecast i8 addrspace(3)* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore2(c2, u, c_local); -+ // AS-CHECK: {{.+}} = addrspacecast i8* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ c2 = vload2(u, c_private); -+ // AS-CHECK: {{.+}} = addrspacecast i8* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore2(c2, u, c_private); -+ // AS-CHECK: {{.+}} = addrspacecast i8 addrspace(1)* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ c2 = vload2(u, c_global); -+ // AS-CHECK: {{.+}} = addrspacecast i8 addrspace(1)* %{{.+}} to i8 addrspace(4)* -+ // CHECK-NOT: addrspacecast -+ vstore2(c2, u, c_global); -+} -diff --git a/test/CodeGenOpenCL/to_addr_builtin.cl b/test/CodeGenOpenCL/to_addr_builtin.cl -index 72c09da488..535ddd35a4 100644 ---- a/test/CodeGenOpenCL/to_addr_builtin.cl -+++ b/test/CodeGenOpenCL/to_addr_builtin.cl -@@ -1,4 +1,6 @@ - // RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s -+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=cl2.0 -o - %s | FileCheck %s -+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=cl3.0 -cl-feature=__opencl_c_generic_address_space -o - %s | FileCheck %s - - // CHECK: %[[A:.*]] = type { float, float, float } - typedef struct { -diff --git a/test/Driver/unknown-std.cl b/test/Driver/unknown-std.cl -index 90ee97b77f..b9940116d1 100644 ---- a/test/Driver/unknown-std.cl -+++ b/test/Driver/unknown-std.cl -@@ -10,6 +10,7 @@ - // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard - // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard - // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard -+// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard - // CHECK-NEXT: note: use 'c++' for 'OpenCL C++ 1.0' standard - - // Make sure that no other output is present. -diff --git a/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -index 619ecc4e47..1c903b6822 100644 ---- a/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -+++ b/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -@@ -1,6 +1,9 @@ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-feature=__opencl_c_generic_address_space -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-feature=__opencl_c_generic_address_space -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-feature=__opencl_c_generic_address_space - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=c++ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=c++ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=c++ -diff --git a/test/SemaOpenCL/address-spaces.cl b/test/SemaOpenCL/address-spaces.cl -index 30f311d6ef..4da718d5fa 100644 ---- a/test/SemaOpenCL/address-spaces.cl -+++ b/test/SemaOpenCL/address-spaces.cl -@@ -1,5 +1,6 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only - // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -verify -pedantic -fsyntax-only -cl-feature=__opencl_c_generic_address_space - // RUN: %clang_cc1 %s -cl-std=c++ -verify -pedantic -fsyntax-only - - __constant int ci = 1; -diff --git a/test/SemaOpenCL/feature-device-enqueue.cl b/test/SemaOpenCL/feature-device-enqueue.cl -new file mode 100644 -index 0000000000..cae604759b ---- /dev/null -+++ b/test/SemaOpenCL/feature-device-enqueue.cl -@@ -0,0 +1,29 @@ -+// RUN: %clang_cc1 %s -cl-feature=__opencl_c_generic_address_space -cl-std=CL3.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -+ -+__kernel void test1() { -+ queue_t default_queue; // expected-error{{use of type 'queue_t' requires __opencl_c_device_enqueue feature to be supported}} -+ clk_event_t evt; // expected-error{{use of type 'clk_event_t' requires __opencl_c_device_enqueue feature to be supported}} -+} -+ -+__kernel void test2() { -+ void (^const block_A)(int) = ^(int a) { // expected-error{{OpenCL blocks usage requires feature support}} -+ return; -+ }; -+ void (^const block_B)(void) = ^{ // expected-error{{OpenCL blocks usage requires feature support}} -+ return; -+ }; -+ void (^const block_C)(local void *) = ^(local void *a) { // expected-error{{OpenCL blocks usage requires feature support}} -+ return; -+ }; -+ void (^const block_D)(local int *) = ^(local int *a) { // expected-error{{OpenCL blocks usage requires feature support}} -+ return; -+ }; -+ -+} -+ -+typedef struct {int a;} ndrange_t; -+ -+__kernel void test3() { -+ queue_t default_queue; // expected-error{{use of type 'queue_t' requires __opencl_c_device_enqueue feature to be supported}} -+} -+ -diff --git a/test/SemaOpenCL/feature-images.cl b/test/SemaOpenCL/feature-images.cl -new file mode 100644 -index 0000000000..489fd2d386 ---- /dev/null -+++ b/test/SemaOpenCL/feature-images.cl -@@ -0,0 +1,28 @@ -+// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-feature=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-feature=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown -+ -+#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 300) -+// expected-no-diagnostics -+__kernel void write_3d_image(__write_only image3d_t i) {} -+__kernel void read_write_3d_image(__read_write image3d_t i) {} -+__kernel void read_write_2d_image(__read_write image2d_t i) {} -+__kernel void read_write_1d_image(__read_write image1d_t i) {} -+#else -+#ifndef __opencl_c_3d_image_writes -+__kernel void write_3d_image(__write_only image3d_t i) { // expected-error{{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes feature to be supported}} -+} -+#endif -+#ifndef __opencl_c_read_write_images -+__kernel void read_write_3d_image(__read_write image3d_t i) { // expected-error{{use of type '__read_write image3d_t' requires __opencl_c_read_write_images feature to be supported}} -+} -+ -+__kernel void read_write_2d_image(__read_write image2d_t i) { // expected-error{{use of type '__read_write image2d_t' requires __opencl_c_read_write_images feature to be supported}} -+} -+ -+__kernel void read_write_1d_image(__read_write image1d_t i) { // expected-error{{use of type '__read_write image1d_t' requires __opencl_c_read_write_images feature to be supported}} -+} -+#endif -+ -+#endif -diff --git a/test/SemaOpenCL/feature-memory-scope.cl b/test/SemaOpenCL/feature-memory-scope.cl -new file mode 100644 -index 0000000000..6fbce297ca ---- /dev/null -+++ b/test/SemaOpenCL/feature-memory-scope.cl -@@ -0,0 +1,118 @@ -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -verify -fsyntax-only -triple=spir64 -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -verify -fsyntax-only -triple=spir64 -cl-feature=__opencl_c_subgroups,__opencl_c_atomic_scope_device,__opencl_c_atomic_scope_all_devices,__opencl_c_atomic_order_acq_rel,__opencl_c_atomic_order_seq_cst -DSUPPORTED -+ -+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable -+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable -+ -+typedef enum memory_order { -+ memory_order_relaxed = __ATOMIC_RELAXED, -+ memory_order_acquire = __ATOMIC_ACQUIRE, -+ memory_order_release = __ATOMIC_RELEASE, -+ memory_order_acq_rel = __ATOMIC_ACQ_REL, -+ memory_order_seq_cst = __ATOMIC_SEQ_CST -+} memory_order; -+ -+typedef enum memory_scope { -+ memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, -+ memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, -+ memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, -+ memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, -+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) -+ memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -+#endif -+} memory_scope; -+ -+#ifndef SUPPORTED -+void test1(atomic_int *Ap, int *p, int val) { -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); // expected-error {{OpenCL memory order requires feature support}} -+ -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices); // expected-error{{OpenCL memory scope requires feature support}} -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device); // expected-error{{OpenCL memory scope requires feature support}} -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group); // // expected-error{{OpenCL memory scope requires feature support}} -+} -+#else -+// expected-no-diagnostics -+void test2(atomic_int *Ap, int *p, int val) { -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_group); -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_group); -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_group); -+ (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_group); -+ -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); -+ -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); -+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); -+ -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices); -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device); -+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group); -+} -+#endif -diff --git a/test/SemaOpenCL/feature-pipes.cl b/test/SemaOpenCL/feature-pipes.cl -new file mode 100644 -index 0000000000..b7d261645d ---- /dev/null -+++ b/test/SemaOpenCL/feature-pipes.cl -@@ -0,0 +1,71 @@ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups -+ -+#pragma OPENCL EXTENSION cl_khr_subgroups : enable -+ -+void test1(read_only pipe int p, global int *ptr) { -+ read_pipe(p, ptr); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = reserve_read_pipe(p, 2); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test2(read_only pipe int p, global int *ptr, int tmp) { -+ read_pipe(p, tmp, 2, ptr); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ commit_read_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test3(write_only pipe int p, global int *ptr) { -+ write_pipe(p, ptr); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = reserve_write_pipe(p, 2); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test4(write_only pipe int p, global int *ptr, int tmp) { -+ write_pipe(p, tmp, 2, ptr); // // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ commit_write_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test5(read_only pipe int p, global int *ptr) { -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = work_group_reserve_read_pipe(p, 2); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test6(int p, int tmp) { -+ work_group_commit_read_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test7(write_only pipe int p, global int *ptr) { -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = work_group_reserve_write_pipe(p, 2); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test8(int p, int tmp) { -+ work_group_commit_write_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test9(read_only pipe int p, global int *ptr) { -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = sub_group_reserve_read_pipe(p, 2); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test10(int p, int tmp) { -+ sub_group_commit_read_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test11(write_only pipe int p, global int *ptr) { -+ reserve_id_t rid; // expected-error{{use of type 'reserve_id_t' requires __opencl_c_pipes feature to be supported}} -+ rid = sub_group_reserve_write_pipe(p, 2); //expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test12(int p, int tmp) { -+ sub_group_commit_write_pipe(p, tmp); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test13(read_only pipe int p, global int *ptr) { -+ *ptr = get_pipe_num_packets(p); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ *ptr = get_pipe_max_packets(p); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -+ -+void test14(write_only pipe int p, global int *ptr) { -+ *ptr = get_pipe_num_packets(p); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+ *ptr = get_pipe_max_packets(p); // expected-error{{pipe functions require __opencl_c_pipes feature to be supported}} -+} -diff --git a/test/SemaOpenCL/invalid-block.cl b/test/SemaOpenCL/invalid-block.cl -index 5d6dc380a3..16ebd64511 100644 ---- a/test/SemaOpenCL/invalid-block.cl -+++ b/test/SemaOpenCL/invalid-block.cl -@@ -1,5 +1,4 @@ - // RUN: %clang_cc1 -verify -fblocks -cl-std=CL2.0 %s -- - // OpenCL v2.0 s6.12.5 - void f0(int (^const bl)()); - // All blocks declarations must be const qualified and initialized. -diff --git a/test/SemaOpenCL/storageclass-cl20.cl b/test/SemaOpenCL/storageclass-cl20.cl -index 581701d2a6..a4dee00cfe 100644 ---- a/test/SemaOpenCL/storageclass-cl20.cl -+++ b/test/SemaOpenCL/storageclass-cl20.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-feature=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space - - int G2 = 0; - global int G3 = 0; -diff --git a/test/SemaOpenCL/storageclass.cl b/test/SemaOpenCL/storageclass.cl -index f35ab9c2e0..f878073b2d 100644 ---- a/test/SemaOpenCL/storageclass.cl -+++ b/test/SemaOpenCL/storageclass.cl -@@ -1,5 +1,4 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -- - static constant int G1 = 0; - constant int G2 = 0; - int G3 = 0; // expected-error{{program scope variable must reside in constant address space}} -diff --git a/test/SemaOpenCL/to_addr_builtin.cl b/test/SemaOpenCL/to_addr_builtin.cl -index 26389d24fc..1c8c7b49e9 100644 ---- a/test/SemaOpenCL/to_addr_builtin.cl -+++ b/test/SemaOpenCL/to_addr_builtin.cl -@@ -1,6 +1,6 @@ - // RUN: %clang_cc1 -verify -fsyntax-only %s - // RUN: %clang_cc1 -Wconversion -verify -fsyntax-only -cl-std=CL2.0 %s -- -+// RUN: %clang_cc1 -Wconversion -verify -fsyntax-only -cl-std=CL3.0 -cl-feature=__opencl_c_generic_address_space %s - void test(void) { - global int *glob; - local int *loc; --- -2.17.1 - diff --git a/patches/clang/0006-OpenCL-3.0-support.patch b/patches/clang/0006-OpenCL-3.0-support.patch new file mode 100644 index 00000000..9cec90f2 --- /dev/null +++ b/patches/clang/0006-OpenCL-3.0-support.patch @@ -0,0 +1,8466 @@ +From d91e758930a7e59d29525659b5b698c6e9456cee Mon Sep 17 00:00:00 2001 +From: Anton Zabaznov +Date: Thu, 24 Sep 2020 00:12:24 +0300 +Subject: [PATCH] OpenCL 3.0 support + +--- + include/clang/Basic/Builtins.def | 67 +- + include/clang/Basic/Builtins.h | 13 +- + include/clang/Basic/DiagnosticParseKinds.td | 2 + + include/clang/Basic/DiagnosticSemaKinds.td | 7 + + include/clang/Basic/LangOptions.def | 2 + + include/clang/Basic/OpenCLExtensions.def | 15 + + include/clang/Basic/OpenCLOptions.h | 144 +- + include/clang/Driver/Options.td | 2 +- + include/clang/Frontend/LangStandards.def | 5 + + include/clang/Sema/Sema.h | 9 + + lib/AST/ASTContext.cpp | 3 +- + lib/Basic/Builtins.cpp | 27 +- + lib/Basic/TargetInfo.cpp | 11 + + lib/Basic/Targets.cpp | 1 - + lib/CodeGen/CodeGenFunction.cpp | 6 +- + lib/Frontend/CompilerInvocation.cpp | 7 +- + lib/Frontend/InitPreprocessor.cpp | 8 +- + lib/Headers/opencl-c-base.h | 578 +++ + lib/Headers/opencl-c.h | 3358 ++++++++++++++--- + lib/Parse/ParseDecl.cpp | 9 +- + lib/Parse/ParsePragma.cpp | 10 +- + lib/Sema/Sema.cpp | 47 +- + lib/Sema/SemaChecking.cpp | 38 +- + lib/Sema/SemaDecl.cpp | 9 +- + lib/Sema/SemaDeclAttr.cpp | 7 + + lib/Sema/SemaDeclCXX.cpp | 10 + + lib/Sema/SemaType.cpp | 24 +- + test/CodeGenOpenCL/addr-space-struct-arg.cl | 7 +- + .../address-spaces-conversions.cl | 2 + + test/CodeGenOpenCL/address-spaces-mangling.cl | 3 +- + test/CodeGenOpenCL/address-spaces.cl | 4 + + .../amdgcn-automatic-variable.cl | 1 + + test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +- + test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 + + test/CodeGenOpenCL/fpmath.cl | 2 + + .../generic-address-space-feature.cl | 26 + + .../intel-subgroups-avc-ext-types.cl | 1 + + .../kernels-have-spir-cc-by-default.cl | 3 + + test/CodeGenOpenCL/logical-ops.cl | 1 + + test/CodeGenOpenCL/no-half.cl | 1 + + test/CodeGenOpenCL/pipe_builtin.cl | 5 + + test/CodeGenOpenCL/pipe_types.cl | 1 + + test/CodeGenOpenCL/printf.cl | 2 + + test/CodeGenOpenCL/unroll-hint.cl | 1 + + test/Driver/autocomplete.c | 3 +- + test/Driver/opencl.cl | 2 + + test/Driver/unknown-std.cl | 1 + + test/Frontend/stdlang.c | 1 + + test/Headers/opencl-c-header.cl | 9 +- + test/Index/pipe-size.cl | 7 + + test/Preprocessor/init.c | 1 + + test/Preprocessor/predefined-macros.c | 13 + + .../Sema/feature-extensions-simult-support.cl | 75 + + test/Sema/features-ignore-pragma.cl | 24 + + test/Sema/opencl-features-pipes.cl | 18 + + test/Sema/opencl-features.cl | 127 + + test/Sema/pipe_builtins_feature.cl | 23 + + .../address-spaces-conversions-cl2.0.cl | 3 + + test/SemaOpenCL/address-spaces.cl | 1 + + test/SemaOpenCL/cl20-device-side-enqueue.cl | 16 +- + .../SemaOpenCL/forget-unsupported-builtins.cl | 23 + + test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 + + test/SemaOpenCL/storageclass-cl20.cl | 1 + + 63 files changed, 4129 insertions(+), 722 deletions(-) + create mode 100644 lib/Headers/opencl-c-base.h + create mode 100644 test/CodeGenOpenCL/generic-address-space-feature.cl + create mode 100644 test/Sema/feature-extensions-simult-support.cl + create mode 100644 test/Sema/features-ignore-pragma.cl + create mode 100644 test/Sema/opencl-features-pipes.cl + create mode 100644 test/Sema/opencl-features.cl + create mode 100644 test/Sema/pipe_builtins_feature.cl + create mode 100644 test/SemaOpenCL/forget-unsupported-builtins.cl + +diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def +index fa031ce09f..3acfafe3b5 100644 +--- a/include/clang/Basic/Builtins.def ++++ b/include/clang/Basic/Builtins.def +@@ -103,6 +103,10 @@ + # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) + #endif + ++#if defined(BUILTIN) && !defined(OPENCLBUILTIN) ++# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) ++#endif ++ + // Standard libc/libm functions: + BUILTIN(__builtin_atan2 , "ddd" , "Fne") + BUILTIN(__builtin_atan2f, "fff" , "Fne") +@@ -1460,48 +1464,54 @@ BUILTIN(__builtin_coro_suspend, "cIb", "n") + BUILTIN(__builtin_coro_param, "bv*v*", "n") + // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. + // We need the generic prototype, since the packet type could be anything. +-LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + + // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. + // Custom builtin check allows to perform special check of passed block arguments. +-LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue") + + // OpenCL v2.0 s6.13.9 - Address space qualifier functions. +-LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) ++// FIXME: Pointer parameters of OpenCL builtins should have their address space ++// requirement defined. ++OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") + + // OpenCL half load/store builtin +-LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES) ++OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "") + + // Builtins for os_log/os_trace + BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") +@@ -1522,3 +1532,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") + #undef BUILTIN + #undef LIBBUILTIN + #undef LANGBUILTIN ++#undef OPENCLBUILTIN +diff --git a/include/clang/Basic/Builtins.h b/include/clang/Basic/Builtins.h +index fa2bcc4c7a..2db27b7d81 100644 +--- a/include/clang/Basic/Builtins.h ++++ b/include/clang/Basic/Builtins.h +@@ -36,13 +36,13 @@ enum LanguageID { + CXX_LANG = 0x4, // builtin for cplusplus only. + OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ + MS_LANG = 0x10, // builtin requires MS mode. +- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. ++ OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions. + OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. + OMP_LANG = 0x80, // builtin requires OpenMP. + ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. + ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. + ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. +- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. ++ ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages. + }; + + namespace Builtin { +@@ -225,6 +225,10 @@ public: + /// for non-builtins. + bool canBeRedeclared(unsigned ID) const; + ++ bool requiresFeatures(unsigned ID) const { ++ return requiresFeatures(getRecord(ID)); ++ } ++ + private: + const Info &getRecord(unsigned ID) const; + +@@ -232,6 +236,11 @@ private: + bool builtinIsSupported(const Builtin::Info &BuiltinInfo, + const LangOptions &LangOpts); + ++ bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo, ++ const LangOptions &LangOpts) const; ++ ++ bool requiresFeatures(const Builtin::Info &BuiltinInfo) const; ++ + /// Helper function for isPrintfLike and isScanfLike. + bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, + const char *Fmt) const; +diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td +index 06281e2904..9440365251 100644 +--- a/include/clang/Basic/DiagnosticParseKinds.td ++++ b/include/clang/Basic/DiagnosticParseKinds.td +@@ -1124,6 +1124,8 @@ def warn_pragma_unsupported_extension : Warning< + "unsupported OpenCL extension %0 - ignoring">, InGroup; + def warn_pragma_extension_is_core : Warning< + "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup>, DefaultIgnore; ++def warn_opencl_pragma_feature_ignore : Warning< ++ "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup; + + // OpenCL errors. + def err_opencl_taking_function_address_parser : Error< +diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td +index 7ef57b02fe..2c8ab973c9 100644 +--- a/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/include/clang/Basic/DiagnosticSemaKinds.td +@@ -8680,6 +8680,13 @@ def err_opencl_builtin_expected_type : Error< + def ext_opencl_ext_vector_type_rgba_selector: ExtWarn< + "vector component name '%0' is an OpenCL version 2.2 feature">, + InGroup; ++ ++def err_opencl_pipes_require_feat : Error< ++ "usage of OpenCL pipes requires feature support">; ++def err_opencl_memory_scope_require_feat : Error< ++ "usage of memory scope requires feature support">; ++def err_opencl_memory_ordering_require_feat : Error< ++ "usage of memory ordering requires feature support">; + } // end of sema category + + let CategoryName = "OpenMP Issue" in { +diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def +index 49961856c9..3034d417e6 100644 +--- a/include/clang/Basic/LangOptions.def ++++ b/include/clang/Basic/LangOptions.def +@@ -193,6 +193,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") + LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") + LANGOPT(OpenCLCPlusPlus , 1, 0, "OpenCL C++") + LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "OpenCL C++ version") ++LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword") ++LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword") + LANGOPT(NativeHalfType , 1, 0, "Native half type support") + LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") + LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") +diff --git a/include/clang/Basic/OpenCLExtensions.def b/include/clang/Basic/OpenCLExtensions.def +index c0ba1565b5..77c905ac6c 100644 +--- a/include/clang/Basic/OpenCLExtensions.def ++++ b/include/clang/Basic/OpenCLExtensions.def +@@ -88,6 +88,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U) + ++OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U) + #undef OPENCLEXT_INTERNAL + + #ifdef OPENCLEXT +diff --git a/include/clang/Basic/OpenCLOptions.h b/include/clang/Basic/OpenCLOptions.h +index c76fa88092..43ff07350c 100644 +--- a/include/clang/Basic/OpenCLOptions.h ++++ b/include/clang/Basic/OpenCLOptions.h +@@ -17,11 +17,16 @@ + + #include "clang/Basic/LangOptions.h" + #include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringSwitch.h" + + namespace clang { + + /// OpenCL supported extensions and optional core features + class OpenCLOptions { ++ // OpenCL Version ++ unsigned CLVer = 120; ++ bool IsOpenCLCPlusPlus = false; ++ + struct Info { + bool Supported; // Is this option supported + bool Enabled; // Is this option enabled +@@ -32,7 +37,38 @@ class OpenCLOptions { + :Supported(S), Enabled(E), Avail(A), Core(C){} + }; + llvm::StringMap OptMap; ++ + public: ++ void setOpenCLVersion(const LangOptions &LO) { ++ IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus; ++ CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ } ++ ++ // Get extension which is semantically equivalent to a given feature ++ // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups) ++ llvm::Optional getEquivalentExtension(StringRef Feature) const { ++ return llvm::StringSwitch>(Feature) ++ .Case("__opencl_c_3d_image_writes", ++ Optional("cl_khr_3d_image_writes")) ++ .Case("__opencl_c_subgroups", Optional("cl_khr_subgroups")) ++ .Case("__opencl_c_fp64", Optional("cl_khr_fp64")) ++ .Default(Optional()); ++ } ++ ++ // Same as above but for extensions ++ llvm::Optional getEquivalentFeature(StringRef Extension) const { ++ return llvm::StringSwitch>(Extension) ++ .Case("cl_khr_3d_image_writes", ++ Optional("__opencl_c_3d_image_writes")) ++ .Case("cl_khr_subgroups", Optional("__opencl_c_subgroups")) ++ .Case("cl_khr_fp64", Optional("__opencl_c_fp64")) ++ .Default(Optional()); ++ } ++ ++ bool isFeature(llvm::StringRef Ext) const { ++ return Ext.startswith("__opencl_c"); ++ } ++ + bool isKnown(llvm::StringRef Ext) const { + return OptMap.find(Ext) != OptMap.end(); + } +@@ -43,32 +79,88 @@ public: + + // Is supported as either an extension or an (optional) core feature for + // OpenCL version \p CLVer. +- bool isSupported(llvm::StringRef Ext, LangOptions LO) const { ++ bool isSupported(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer; + } + + // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. + // For supported extension, return false. +- bool isSupportedCore(llvm::StringRef Ext, LangOptions LO) const { ++ bool isSupportedCore(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; + } + + // Is supported OpenCL extension for OpenCL version \p CLVer. + // For supported (optional) core feature, return false. +- bool isSupportedExtension(llvm::StringRef Ext, LangOptions LO) const { ++ bool isSupportedExtension(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); + } + ++ // Support features whose support is directly related to the ++ // specific OpenCL version. For example, OpenCL 2.0 supports ++ // all features that are optional in 3.0 ++ void adjustFeatures() { ++ // Support int64 by default (assume compiling for FULL profile) ++ OptMap["__opencl_c_int64"].Supported = true; ++ ++ if (CLVer >= 300) { ++ // Simultaneously support extension and corresponding feature ++ for (llvm::StringRef F : ++ {"__opencl_c_subgroups", "__opencl_c_3d_image_writes", ++ "__opencl_c_fp64"}) { ++ auto Ext = getEquivalentExtension(F); ++ OptMap[*Ext].Supported = OptMap[F].Supported; ++ } ++ ++ // OpenCL C compilers that define the feature macro __opencl_c_pipes or ++ // or __opencl_c_device_enqueue must also define the ++ // feature macro __opencl_c_generic_address_space. ++ if (OptMap["__opencl_c_pipes"].Supported || ++ OptMap["__opencl_c_device_enqueue"].Supported) ++ OptMap["__opencl_c_generic_address_space"].Supported = true; ++ ++ // OpenCL C compilers that define the feature macro ++ // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also ++ // define the feature macro __opencl_c_images ++ if (OptMap["__opencl_c_3d_image_writes"].Supported || ++ OptMap["__opencl_c_read_write_images"].Supported) ++ OptMap["__opencl_c_images"].Supported = true; ++ ++ // All other features are already supported with options ++ // or in target settings ++ return; ++ } ++ ++ auto FeaturesRange = llvm::make_filter_range( ++ OptMap, [&](llvm::StringMapEntry &OptVal) { ++ auto Opt = OptVal.getKey(); ++ return isFeature(Opt); ++ }); ++ ++ for (auto &It : FeaturesRange) { ++ auto &Info = It.getValue(); ++ // For OpenCL version less then 3.0 some ++ // features should be supported simulateneously ++ // with specific extension ++ if (Optional Ext = getEquivalentExtension(It.getKey())) ++ Info.Supported = Info.Enabled = OptMap[*Ext].Supported; ++ else if (Info.Avail <= CLVer) ++ Info.Supported = Info.Enabled = true; ++ } ++ } ++ + void enable(llvm::StringRef Ext, bool V = true) { ++ // Ignore disabling extensions if corresponding features ++ // already supported for OpenCL version higher then 3.0 ++ if (CLVer >= 300) ++ if (Optional F = getEquivalentFeature(Ext)) ++ if (V != OptMap[*F].Enabled) ++ return; + OptMap[Ext].Enabled = V; + } + +@@ -97,7 +189,7 @@ public: + OptMap[Ext].Supported = V; + } + +- OpenCLOptions(){ ++ OpenCLOptions() { + #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ + OptMap[#Ext].Avail = AvailVer; \ + OptMap[#Ext].Core = CoreVer; +@@ -105,35 +197,53 @@ public: + } + + void addSupport(const OpenCLOptions &Opts) { ++ assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer); + for (auto &I:Opts.OptMap) +- if (I.second.Supported) ++ if (I.second.Supported) { + OptMap[I.getKey()].Supported = true; ++ // All features are enabled as they are supported ++ if (isFeature(I.getKey())) ++ OptMap[I.getKey()].Enabled = true; ++ } ++ if (CLVer >= 300) { ++ // Enabling extensions with respect to features ++ for (llvm::StringRef Ext : ++ {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) { ++ auto Feature = getEquivalentFeature(Ext); ++ enable(Ext, OptMap[*Feature].Enabled); ++ } ++ } + } + + void copy(const OpenCLOptions &Opts) { ++ CLVer = Opts.CLVer; ++ IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus; + OptMap = Opts.OptMap; + } + + // Turn on or off support of all options. + void supportAll(bool On = true) { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Supported = On; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) ++ if (!isFeature(I->getKey())) ++ I->second.Supported = On; + } + + void disableAll() { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Enabled = false; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) { ++ auto Ext = I->getKey(); ++ if (!isFeature(Ext)) ++ enable(Ext, false); ++ } + } + +- void enableSupportedCore(LangOptions LO) { ++ void enableSupportedCore() { + for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) +- if (isSupportedCore(I->getKey(), LO)) ++ if (isSupportedCore(I->getKey())) + I->second.Enabled = true; + } +- + friend class ASTWriter; + friend class ASTReader; + }; +diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td +index d02d9744d7..62739a6972 100644 +--- a/include/clang/Driver/Options.td ++++ b/include/clang/Driver/Options.td +@@ -519,7 +519,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC + def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; + def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, +- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,c++">; ++ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0">; + def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; + def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, +diff --git a/include/clang/Frontend/LangStandards.def b/include/clang/Frontend/LangStandards.def +index 0fdd35f320..181c4493aa 100644 +--- a/include/clang/Frontend/LangStandards.def ++++ b/include/clang/Frontend/LangStandards.def +@@ -161,11 +161,16 @@ LANGSTANDARD(opencl20, "cl2.0", + LANGSTANDARD(openclcpp, "c++", + OpenCL, "OpenCL C++ 1.0", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | Digraphs | OpenCL) ++LANGSTANDARD(opencl30, "cl3.0", ++ OpenCL, "OpenCL 3.0", ++ LineComment | C99 | Digraphs | HexFloat | OpenCL) + + LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") + LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") + LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") + LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") ++LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") ++LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") + + // CUDA + LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)", +diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h +index ced5773f0c..4344efb620 100644 +--- a/include/clang/Sema/Sema.h ++++ b/include/clang/Sema/Sema.h +@@ -8670,6 +8670,10 @@ public: + /// \return true if type is disabled. + bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); + ++ bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call); ++ ++ bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const; ++ + //===--------------------------------------------------------------------===// + // OpenMP directives and clauses. + // +@@ -9956,6 +9960,11 @@ public: + /// that the user intended an assignment used as condition. + void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE); + ++ template ++ void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, SourceRange SrcRange); ++ + /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid. + ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false); + +diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp +index 21b6f36e9a..c74b6349f7 100644 +--- a/lib/AST/ASTContext.cpp ++++ b/lib/AST/ASTContext.cpp +@@ -1313,7 +1313,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, + ObjCSuperType = QualType(); + + // void * type +- if (LangOpts.OpenCLVersion >= 200) { ++ if (Target.getSupportedOpenCLOpts().isEnabled( ++ "__opencl_c_generic_address_space")) { + auto Q = VoidTy.getQualifiers(); + Q.setAddressSpace(LangAS::opencl_generic); + VoidPtrTy = getPointerType(getCanonicalType( +diff --git a/lib/Basic/Builtins.cpp b/lib/Basic/Builtins.cpp +index 7e7f67ca87..29bfa66b35 100644 +--- a/lib/Basic/Builtins.cpp ++++ b/lib/Basic/Builtins.cpp +@@ -24,6 +24,8 @@ static const Builtin::Info BuiltinInfo[] = { + { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, + #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ + { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, ++#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \ ++ {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE}, + #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ + { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, + #include "clang/Basic/Builtins.def" +@@ -71,13 +73,15 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, + bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; + bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; +- bool OclC2Unsupported = LangOpts.OpenCLVersion != 200 && +- (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; ++ bool OclC2PUnsupported = ++ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG && ++ ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) || ++ !OclBuiltinIsSupported(BuiltinInfo, LangOpts)); + bool OclCUnsupported = !LangOpts.OpenCL && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); + bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; + return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && +- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && ++ !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported && + !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported; + } + +@@ -162,3 +166,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const { + (!hasReferenceArgsOrResult(ID) && + !hasCustomTypechecking(ID)); + } ++ ++bool Builtin::Context::OclBuiltinIsSupported( ++ const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const { ++ if (!requiresFeatures(BuiltinInfo)) ++ return true; ++ ++ return llvm::StringSwitch(BuiltinInfo.Features) ++ .Case("__opencl_c_device_enqueue", LangOpts.Blocks) ++ .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword) ++ .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword) ++ .Default(false); ++} ++ ++bool Builtin::Context::requiresFeatures( ++ const Builtin::Info &BuiltinInfo) const { ++ return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != ""; ++} +diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp +index 8b7621d796..ff9fc12be4 100644 +--- a/lib/Basic/TargetInfo.cpp ++++ b/lib/Basic/TargetInfo.cpp +@@ -372,6 +372,17 @@ void TargetInfo::adjust(LangOptions &Opts) { + HalfFormat = &llvm::APFloat::IEEEhalf(); + FloatFormat = &llvm::APFloat::IEEEsingle(); + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ ++ auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions; ++ ++ SupportedOCLOpts.setOpenCLVersion(Opts); ++ SupportedOCLOpts.adjustFeatures(); ++ ++ if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200) ++ Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue"); ++ Opts.OpenCLGenericKeyword = ++ SupportedOCLOpts.isSupported("__opencl_c_generic_address_space"); ++ Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes"); + } + + if (Opts.NewAlignOverride) +diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp +index 3c139d7247..d468139c26 100644 +--- a/lib/Basic/Targets.cpp ++++ b/lib/Basic/Targets.cpp +@@ -40,7 +40,6 @@ + #include "clang/Basic/Diagnostic.h" + #include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/Triple.h" +- + using namespace clang; + + namespace clang { +diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp +index 1713e40c31..a8cb431629 100644 +--- a/lib/CodeGen/CodeGenFunction.cpp ++++ b/lib/CodeGen/CodeGenFunction.cpp +@@ -2372,11 +2372,11 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, + std::string MissingFeature; + if (BuiltinID) { + SmallVector ReqFeatures; +- const char *FeatureList = +- CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + // Return if the builtin doesn't have any required features. +- if (!FeatureList || StringRef(FeatureList) == "") ++ if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID)) + return; ++ const char *FeatureList = ++ CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + StringRef(FeatureList).split(ReqFeatures, ','); + if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) + CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature) +diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp +index 3e6528c259..a6c94e18c7 100644 +--- a/lib/Frontend/CompilerInvocation.cpp ++++ b/lib/Frontend/CompilerInvocation.cpp +@@ -2138,6 +2138,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, + Opts.OpenCLVersion = 120; + else if (LangStd == LangStandard::lang_opencl20) + Opts.OpenCLVersion = 200; ++ else if (LangStd == LangStandard::lang_opencl30) ++ Opts.OpenCLVersion = 300; + else if (LangStd == LangStandard::lang_openclcpp) + Opts.OpenCLCPlusPlusVersion = 100; + +@@ -2342,9 +2344,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) + .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) + .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) ++ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) + .Case("c++", LangStandard::lang_openclcpp) + .Default(LangStandard::lang_unspecified); +- + if (OpenCLLangStd == LangStandard::lang_unspecified) { + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); +@@ -2580,8 +2582,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + + Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); + Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); +- Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL +- && Opts.OpenCLVersion == 200); ++ Opts.Blocks = Args.hasArg(OPT_fblocks); + Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); + Opts.CoroutinesTS = Args.hasArg(OPT_fcoroutines_ts); + +diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp +index 4cde22ce9a..6b3f75cb1a 100644 +--- a/lib/Frontend/InitPreprocessor.cpp ++++ b/lib/Frontend/InitPreprocessor.cpp +@@ -435,6 +435,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + case 200: + Builder.defineMacro("__OPENCL_C_VERSION__", "200"); + break; ++ case 300: ++ Builder.defineMacro("__OPENCL_C_VERSION__", "300"); ++ break; + default: + llvm_unreachable("Unsupported OpenCL version"); + } +@@ -442,6 +445,8 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + Builder.defineMacro("CL_VERSION_1_1", "110"); + Builder.defineMacro("CL_VERSION_1_2", "120"); + Builder.defineMacro("CL_VERSION_2_0", "200"); ++ Builder.defineMacro("CL_VERSION_3_0", "300"); ++ + + if (TI.isLittleEndian()) + Builder.defineMacro("__ENDIAN_LITTLE__"); +@@ -599,6 +604,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES", "3"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_SUB_GROUP", "4"); ++ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_ALL_DEVICES", "5"); + + // Support for #pragma redefine_extname (Sun compatibility) + Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1"); +@@ -1060,7 +1066,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, + // OpenCL definitions. + if (LangOpts.OpenCL) { + #define OPENCLEXT(Ext) \ +- if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ ++ if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ + Builder.defineMacro(#Ext); + #include "clang/Basic/OpenCLExtensions.def" + +diff --git a/lib/Headers/opencl-c-base.h b/lib/Headers/opencl-c-base.h +new file mode 100644 +index 0000000000..d81cbdb8a7 +--- /dev/null ++++ b/lib/Headers/opencl-c-base.h +@@ -0,0 +1,578 @@ ++//===----- opencl-c-base.h - OpenCL C language base definitions -----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _OPENCL_BASE_H_ ++#define _OPENCL_BASE_H_ ++ ++// built-in scalar data types: ++ ++/** ++ * An unsigned 8-bit integer. ++ */ ++typedef unsigned char uchar; ++ ++/** ++ * An unsigned 16-bit integer. ++ */ ++typedef unsigned short ushort; ++ ++/** ++ * An unsigned 32-bit integer. ++ */ ++typedef unsigned int uint; ++ ++/** ++ * An unsigned 64-bit integer. ++ */ ++typedef unsigned long ulong; ++ ++/** ++ * The unsigned integer type of the result of the sizeof operator. This ++ * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS ++ * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if ++ * CL_DEVICE_ADDRESS_BITS is 64-bits. ++ */ ++typedef __SIZE_TYPE__ size_t; ++ ++/** ++ * A signed integer type that is the result of subtracting two pointers. ++ * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS ++ * defined in table 4.3 is 32-bits and is a 64-bit signed integer if ++ * CL_DEVICE_ADDRESS_BITS is 64-bits. ++ */ ++typedef __PTRDIFF_TYPE__ ptrdiff_t; ++ ++/** ++ * A signed integer type with the property that any valid pointer to ++ * void can be converted to this type, then converted back to pointer ++ * to void, and the result will compare equal to the original pointer. ++ */ ++typedef __INTPTR_TYPE__ intptr_t; ++ ++/** ++ * An unsigned integer type with the property that any valid pointer to ++ * void can be converted to this type, then converted back to pointer ++ * to void, and the result will compare equal to the original pointer. ++ */ ++typedef __UINTPTR_TYPE__ uintptr_t; ++ ++// built-in vector data types: ++typedef char char2 __attribute__((ext_vector_type(2))); ++typedef char char3 __attribute__((ext_vector_type(3))); ++typedef char char4 __attribute__((ext_vector_type(4))); ++typedef char char8 __attribute__((ext_vector_type(8))); ++typedef char char16 __attribute__((ext_vector_type(16))); ++typedef uchar uchar2 __attribute__((ext_vector_type(2))); ++typedef uchar uchar3 __attribute__((ext_vector_type(3))); ++typedef uchar uchar4 __attribute__((ext_vector_type(4))); ++typedef uchar uchar8 __attribute__((ext_vector_type(8))); ++typedef uchar uchar16 __attribute__((ext_vector_type(16))); ++typedef short short2 __attribute__((ext_vector_type(2))); ++typedef short short3 __attribute__((ext_vector_type(3))); ++typedef short short4 __attribute__((ext_vector_type(4))); ++typedef short short8 __attribute__((ext_vector_type(8))); ++typedef short short16 __attribute__((ext_vector_type(16))); ++typedef ushort ushort2 __attribute__((ext_vector_type(2))); ++typedef ushort ushort3 __attribute__((ext_vector_type(3))); ++typedef ushort ushort4 __attribute__((ext_vector_type(4))); ++typedef ushort ushort8 __attribute__((ext_vector_type(8))); ++typedef ushort ushort16 __attribute__((ext_vector_type(16))); ++typedef int int2 __attribute__((ext_vector_type(2))); ++typedef int int3 __attribute__((ext_vector_type(3))); ++typedef int int4 __attribute__((ext_vector_type(4))); ++typedef int int8 __attribute__((ext_vector_type(8))); ++typedef int int16 __attribute__((ext_vector_type(16))); ++typedef uint uint2 __attribute__((ext_vector_type(2))); ++typedef uint uint3 __attribute__((ext_vector_type(3))); ++typedef uint uint4 __attribute__((ext_vector_type(4))); ++typedef uint uint8 __attribute__((ext_vector_type(8))); ++typedef uint uint16 __attribute__((ext_vector_type(16))); ++typedef long long2 __attribute__((ext_vector_type(2))); ++typedef long long3 __attribute__((ext_vector_type(3))); ++typedef long long4 __attribute__((ext_vector_type(4))); ++typedef long long8 __attribute__((ext_vector_type(8))); ++typedef long long16 __attribute__((ext_vector_type(16))); ++typedef ulong ulong2 __attribute__((ext_vector_type(2))); ++typedef ulong ulong3 __attribute__((ext_vector_type(3))); ++typedef ulong ulong4 __attribute__((ext_vector_type(4))); ++typedef ulong ulong8 __attribute__((ext_vector_type(8))); ++typedef ulong ulong16 __attribute__((ext_vector_type(16))); ++typedef float float2 __attribute__((ext_vector_type(2))); ++typedef float float3 __attribute__((ext_vector_type(3))); ++typedef float float4 __attribute__((ext_vector_type(4))); ++typedef float float8 __attribute__((ext_vector_type(8))); ++typedef float float16 __attribute__((ext_vector_type(16))); ++#ifdef cl_khr_fp16 ++#pragma OPENCL EXTENSION cl_khr_fp16 : enable ++typedef half half2 __attribute__((ext_vector_type(2))); ++typedef half half3 __attribute__((ext_vector_type(3))); ++typedef half half4 __attribute__((ext_vector_type(4))); ++typedef half half8 __attribute__((ext_vector_type(8))); ++typedef half half16 __attribute__((ext_vector_type(16))); ++#endif ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#if __OPENCL_C_VERSION__ < CL_VERSION_1_2 ++#pragma OPENCL EXTENSION cl_khr_fp64 : enable ++#endif ++typedef double double2 __attribute__((ext_vector_type(2))); ++typedef double double3 __attribute__((ext_vector_type(3))); ++typedef double double4 __attribute__((ext_vector_type(4))); ++typedef double double8 __attribute__((ext_vector_type(8))); ++typedef double double16 __attribute__((ext_vector_type(16))); ++#endif ++ ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#define NULL ((void*)0) ++#endif ++ ++/** ++ * Value of maximum non-infinite single-precision floating-point ++ * number. ++ */ ++#define MAXFLOAT 0x1.fffffep127f ++ ++/** ++ * A positive float constant expression. HUGE_VALF evaluates ++ * to +infinity. Used as an error value returned by the built-in ++ * math functions. ++ */ ++#define HUGE_VALF (__builtin_huge_valf()) ++ ++/** ++ * A positive double constant expression. HUGE_VAL evaluates ++ * to +infinity. Used as an error value returned by the built-in ++ * math functions. ++ */ ++#define HUGE_VAL (__builtin_huge_val()) ++ ++/** ++ * A constant expression of type float representing positive or ++ * unsigned infinity. ++ */ ++#define INFINITY (__builtin_inff()) ++ ++/** ++ * A constant expression of type float representing a quiet NaN. ++ */ ++#define NAN as_float(INT_MAX) ++ ++#define FP_ILOGB0 INT_MIN ++#define FP_ILOGBNAN INT_MAX ++ ++#define FLT_DIG 6 ++#define FLT_MANT_DIG 24 ++#define FLT_MAX_10_EXP +38 ++#define FLT_MAX_EXP +128 ++#define FLT_MIN_10_EXP -37 ++#define FLT_MIN_EXP -125 ++#define FLT_RADIX 2 ++#define FLT_MAX 0x1.fffffep127f ++#define FLT_MIN 0x1.0p-126f ++#define FLT_EPSILON 0x1.0p-23f ++ ++#define M_E_F 2.71828182845904523536028747135266250f ++#define M_LOG2E_F 1.44269504088896340735992468100189214f ++#define M_LOG10E_F 0.434294481903251827651128918916605082f ++#define M_LN2_F 0.693147180559945309417232121458176568f ++#define M_LN10_F 2.30258509299404568401799145468436421f ++#define M_PI_F 3.14159265358979323846264338327950288f ++#define M_PI_2_F 1.57079632679489661923132169163975144f ++#define M_PI_4_F 0.785398163397448309615660845819875721f ++#define M_1_PI_F 0.318309886183790671537767526745028724f ++#define M_2_PI_F 0.636619772367581343075535053490057448f ++#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f ++#define M_SQRT2_F 1.41421356237309504880168872420969808f ++#define M_SQRT1_2_F 0.707106781186547524400844362104849039f ++ ++#define DBL_DIG 15 ++#define DBL_MANT_DIG 53 ++#define DBL_MAX_10_EXP +308 ++#define DBL_MAX_EXP +1024 ++#define DBL_MIN_10_EXP -307 ++#define DBL_MIN_EXP -1021 ++#define DBL_RADIX 2 ++#define DBL_MAX 0x1.fffffffffffffp1023 ++#define DBL_MIN 0x1.0p-1022 ++#define DBL_EPSILON 0x1.0p-52 ++ ++#define M_E 0x1.5bf0a8b145769p+1 ++#define M_LOG2E 0x1.71547652b82fep+0 ++#define M_LOG10E 0x1.bcb7b1526e50ep-2 ++#define M_LN2 0x1.62e42fefa39efp-1 ++#define M_LN10 0x1.26bb1bbb55516p+1 ++#define M_PI 0x1.921fb54442d18p+1 ++#define M_PI_2 0x1.921fb54442d18p+0 ++#define M_PI_4 0x1.921fb54442d18p-1 ++#define M_1_PI 0x1.45f306dc9c883p-2 ++#define M_2_PI 0x1.45f306dc9c883p-1 ++#define M_2_SQRTPI 0x1.20dd750429b6dp+0 ++#define M_SQRT2 0x1.6a09e667f3bcdp+0 ++#define M_SQRT1_2 0x1.6a09e667f3bcdp-1 ++ ++#ifdef cl_khr_fp16 ++ ++#define HALF_DIG 3 ++#define HALF_MANT_DIG 11 ++#define HALF_MAX_10_EXP +4 ++#define HALF_MAX_EXP +16 ++#define HALF_MIN_10_EXP -4 ++#define HALF_MIN_EXP -13 ++#define HALF_RADIX 2 ++#define HALF_MAX ((0x1.ffcp15h)) ++#define HALF_MIN ((0x1.0p-14h)) ++#define HALF_EPSILON ((0x1.0p-10h)) ++ ++#define M_E_H 2.71828182845904523536028747135266250h ++#define M_LOG2E_H 1.44269504088896340735992468100189214h ++#define M_LOG10E_H 0.434294481903251827651128918916605082h ++#define M_LN2_H 0.693147180559945309417232121458176568h ++#define M_LN10_H 2.30258509299404568401799145468436421h ++#define M_PI_H 3.14159265358979323846264338327950288h ++#define M_PI_2_H 1.57079632679489661923132169163975144h ++#define M_PI_4_H 0.785398163397448309615660845819875721h ++#define M_1_PI_H 0.318309886183790671537767526745028724h ++#define M_2_PI_H 0.636619772367581343075535053490057448h ++#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h ++#define M_SQRT2_H 1.41421356237309504880168872420969808h ++#define M_SQRT1_2_H 0.707106781186547524400844362104849039h ++ ++#endif //cl_khr_fp16 ++ ++#define CHAR_BIT 8 ++#define SCHAR_MAX 127 ++#define SCHAR_MIN (-128) ++#define UCHAR_MAX 255 ++#define CHAR_MAX SCHAR_MAX ++#define CHAR_MIN SCHAR_MIN ++#define USHRT_MAX 65535 ++#define SHRT_MAX 32767 ++#define SHRT_MIN (-32768) ++#define UINT_MAX 0xffffffff ++#define INT_MAX 2147483647 ++#define INT_MIN (-2147483647-1) ++#define ULONG_MAX 0xffffffffffffffffUL ++#define LONG_MAX 0x7fffffffffffffffL ++#define LONG_MIN (-0x7fffffffffffffffL-1) ++ ++// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions ++ ++// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence ++typedef uint cl_mem_fence_flags; ++ ++/** ++ * Queue a memory fence to ensure correct ++ * ordering of memory operations to local memory ++ */ ++#define CLK_LOCAL_MEM_FENCE 0x01 ++ ++/** ++ * Queue a memory fence to ensure correct ++ * ordering of memory operations to global memory ++ */ ++#define CLK_GLOBAL_MEM_FENCE 0x02 ++ ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++typedef enum memory_scope { ++ memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, ++ memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, ++#ifdef __opencl_c_atomic_scope_device ++ memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, ++#endif ++#ifdef __opencl_c_atomic_scope_all_devices ++ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_DEVICES, ++ memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, ++#endif ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) ++ memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP ++#endif ++} memory_scope; ++ ++/** ++ * Queue a memory fence to ensure correct ordering of memory ++ * operations between work-items of a work-group to ++ * image memory. ++ */ ++#define CLK_IMAGE_MEM_FENCE 0x04 ++ ++#ifndef ATOMIC_VAR_INIT ++#define ATOMIC_VAR_INIT(x) (x) ++#endif //ATOMIC_VAR_INIT ++#define ATOMIC_FLAG_INIT 0 ++ ++// enum values aligned with what clang uses in EmitAtomicExpr() ++typedef enum memory_order { ++ memory_order_relaxed = __ATOMIC_RELAXED, ++ memory_order_acquire = __ATOMIC_ACQUIRE, ++ memory_order_release = __ATOMIC_RELEASE, ++ memory_order_acq_rel = __ATOMIC_ACQ_REL, ++#ifdef __opencl_c_atomic_order_seq_cst ++ memory_order_seq_cst = __ATOMIC_SEQ_CST ++#endif //__opencl_c_atomic_order_seq_cst ++} memory_order; ++ ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions ++ ++// These values need to match the runtime equivalent ++// ++// Addressing Mode. ++// ++#define CLK_ADDRESS_NONE 0 ++#define CLK_ADDRESS_CLAMP_TO_EDGE 2 ++#define CLK_ADDRESS_CLAMP 4 ++#define CLK_ADDRESS_REPEAT 6 ++#define CLK_ADDRESS_MIRRORED_REPEAT 8 ++ ++// ++// Coordination Normalization ++// ++#define CLK_NORMALIZED_COORDS_FALSE 0 ++#define CLK_NORMALIZED_COORDS_TRUE 1 ++ ++// ++// Filtering Mode. ++// ++#define CLK_FILTER_NEAREST 0x10 ++#define CLK_FILTER_LINEAR 0x20 ++ ++#ifdef cl_khr_gl_msaa_sharing ++#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable ++#endif //cl_khr_gl_msaa_sharing ++ ++// ++// Channel Datatype. ++// ++#define CLK_SNORM_INT8 0x10D0 ++#define CLK_SNORM_INT16 0x10D1 ++#define CLK_UNORM_INT8 0x10D2 ++#define CLK_UNORM_INT16 0x10D3 ++#define CLK_UNORM_SHORT_565 0x10D4 ++#define CLK_UNORM_SHORT_555 0x10D5 ++#define CLK_UNORM_INT_101010 0x10D6 ++#define CLK_SIGNED_INT8 0x10D7 ++#define CLK_SIGNED_INT16 0x10D8 ++#define CLK_SIGNED_INT32 0x10D9 ++#define CLK_UNSIGNED_INT8 0x10DA ++#define CLK_UNSIGNED_INT16 0x10DB ++#define CLK_UNSIGNED_INT32 0x10DC ++#define CLK_HALF_FLOAT 0x10DD ++#define CLK_FLOAT 0x10DE ++#define CLK_UNORM_INT24 0x10DF ++ ++// Channel order, numbering must be aligned with cl_channel_order in cl.h ++// ++#define CLK_R 0x10B0 ++#define CLK_A 0x10B1 ++#define CLK_RG 0x10B2 ++#define CLK_RA 0x10B3 ++#define CLK_RGB 0x10B4 ++#define CLK_RGBA 0x10B5 ++#define CLK_BGRA 0x10B6 ++#define CLK_ARGB 0x10B7 ++#define CLK_INTENSITY 0x10B8 ++#define CLK_LUMINANCE 0x10B9 ++#define CLK_Rx 0x10BA ++#define CLK_RGx 0x10BB ++#define CLK_RGBx 0x10BC ++#define CLK_DEPTH 0x10BD ++#define CLK_DEPTH_STENCIL 0x10BE ++#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#define CLK_sRGB 0x10BF ++#define CLK_sRGBx 0x10C0 ++#define CLK_sRGBA 0x10C1 ++#define CLK_sBGRA 0x10C2 ++#define CLK_ABGR 0x10C3 ++#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++ ++// OpenCL v2.0 s6.13.16 - Pipe Functions ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) ++ ++// OpenCL v2.0 s6.13.17 - Enqueue Kernels ++#define CL_COMPLETE 0x0 ++#define CL_RUNNING 0x1 ++#define CL_SUBMITTED 0x2 ++#define CL_QUEUED 0x3 ++ ++#define CLK_SUCCESS 0 ++#define CLK_ENQUEUE_FAILURE -101 ++#define CLK_INVALID_QUEUE -102 ++#define CLK_INVALID_NDRANGE -160 ++#define CLK_INVALID_EVENT_WAIT_LIST -57 ++#define CLK_DEVICE_QUEUE_FULL -161 ++#define CLK_INVALID_ARG_SIZE -51 ++#define CLK_EVENT_ALLOCATION_FAILURE -100 ++#define CLK_OUT_OF_RESOURCES -5 ++ ++#define CLK_NULL_QUEUE 0 ++#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t)) ++ ++// execution model related definitions ++#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 ++#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1 ++#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2 ++ ++typedef int kernel_enqueue_flags_t; ++typedef int clk_profiling_info; ++ ++// Profiling info name (see capture_event_profiling_info) ++#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 ++ ++#define MAX_WORK_DIM 3 ++ ++typedef struct { ++ unsigned int workDimension; ++ size_t globalWorkOffset[MAX_WORK_DIM]; ++ size_t globalWorkSize[MAX_WORK_DIM]; ++ size_t localWorkSize[MAX_WORK_DIM]; ++} ndrange_t; ++ ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++#ifdef cl_intel_device_side_avc_motion_estimation ++#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin ++ ++#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0 ++#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1 ++#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2 ++#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3 ++ ++#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0 ++#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1 ++#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2 ++#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3 ++ ++#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0 ++#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 ++#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 ++ ++#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 ++#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E ++#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D ++#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B ++#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 ++#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F ++#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F ++#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F ++ ++#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 ++#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 ++#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 ++ ++#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 ++#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 ++#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 ++#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 ++#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 ++#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 ++#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 ++#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 ++#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 ++ ++#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 ++#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 ++ ++#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 ++#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 ++#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 ++ ++#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 ++#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 ++#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 ++#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 ++ ++#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 ++#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 ++#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 ++#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B ++#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 ++ ++#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 ++#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 ++#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 ++#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 ++ ++#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0 ++#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1 ++#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2 ++ ++#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 ++#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 ++ ++#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30) ++#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30) ++ ++#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 ++#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 ++ ++#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0 ++#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 ++#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 ++#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 ++ ++#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 ++#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 ++#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 ++#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 ++ ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 ++#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 ++#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 ++#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 ++#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 ++#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 ++ ++#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1 ++#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2 ++#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3 ++ ++#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 ++#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 ++ ++#define CLK_AVC_ME_INITIALIZE_INTEL 0x0 ++ ++#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0 ++ ++#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0 ++ ++#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 ++#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 ++ ++#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end ++#endif // cl_intel_device_side_avc_motion_estimation ++ ++#endif //_OPENCL_BASE_H_ +diff --git a/lib/Headers/opencl-c.h b/lib/Headers/opencl-c.h +index 514c710c11..9dcd10d54f 100644 +--- a/lib/Headers/opencl-c.h ++++ b/lib/Headers/opencl-c.h +@@ -4883,7 +4883,7 @@ float16 __ovld __cnfn convert_float16(float16); + + // Conversions with double data type parameters or return value. + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + char __ovld __cnfn convert_char(double); + char __ovld __cnfn convert_char_rte(double); + char __ovld __cnfn convert_char_rtn(double); +@@ -5703,7 +5703,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16); + double16 __ovld __cnfn convert_double16_rtz(uint16); + double16 __ovld __cnfn convert_double16_rtz(ulong16); + double16 __ovld __cnfn convert_double16_rtz(ushort16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + // Convert half types to non-double types. +@@ -6521,7 +6521,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16); + half16 __ovld __cnfn convert_half16_rtz(half16); + + // Convert half types to double types. +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn convert_double(half); + double __ovld __cnfn convert_double_rte(half); + double __ovld __cnfn convert_double_rtp(half); +@@ -6584,7 +6584,7 @@ half16 __ovld __cnfn convert_half16_rte(double16); + half16 __ovld __cnfn convert_half16_rtp(double16); + half16 __ovld __cnfn convert_half16_rtn(double16); + half16 __ovld __cnfn convert_half16_rtz(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif // cl_khr_fp16 + +@@ -6655,14 +6655,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16); + #define as_float8(x) __builtin_astype((x), float8) + #define as_float16(x) __builtin_astype((x), float16) + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #define as_double(x) __builtin_astype((x), double) + #define as_double2(x) __builtin_astype((x), double2) + #define as_double3(x) __builtin_astype((x), double3) + #define as_double4(x) __builtin_astype((x), double4) + #define as_double8(x) __builtin_astype((x), double8) + #define as_double16(x) __builtin_astype((x), double16) +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + #define as_half(x) __builtin_astype((x), half) +@@ -6785,14 +6785,14 @@ float3 __ovld __cnfn acos(float3); + float4 __ovld __cnfn acos(float4); + float8 __ovld __cnfn acos(float8); + float16 __ovld __cnfn acos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acos(double); + double2 __ovld __cnfn acos(double2); + double3 __ovld __cnfn acos(double3); + double4 __ovld __cnfn acos(double4); + double8 __ovld __cnfn acos(double8); + double16 __ovld __cnfn acos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acos(half); + half2 __ovld __cnfn acos(half2); +@@ -6811,14 +6811,14 @@ float3 __ovld __cnfn acosh(float3); + float4 __ovld __cnfn acosh(float4); + float8 __ovld __cnfn acosh(float8); + float16 __ovld __cnfn acosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acosh(double); + double2 __ovld __cnfn acosh(double2); + double3 __ovld __cnfn acosh(double3); + double4 __ovld __cnfn acosh(double4); + double8 __ovld __cnfn acosh(double8); + double16 __ovld __cnfn acosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acosh(half); + half2 __ovld __cnfn acosh(half2); +@@ -6837,14 +6837,14 @@ float3 __ovld __cnfn acospi(float3 x); + float4 __ovld __cnfn acospi(float4 x); + float8 __ovld __cnfn acospi(float8 x); + float16 __ovld __cnfn acospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acospi(double x); + double2 __ovld __cnfn acospi(double2 x); + double3 __ovld __cnfn acospi(double3 x); + double4 __ovld __cnfn acospi(double4 x); + double8 __ovld __cnfn acospi(double8 x); + double16 __ovld __cnfn acospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acospi(half x); + half2 __ovld __cnfn acospi(half2 x); +@@ -6863,14 +6863,14 @@ float3 __ovld __cnfn asin(float3); + float4 __ovld __cnfn asin(float4); + float8 __ovld __cnfn asin(float8); + float16 __ovld __cnfn asin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asin(double); + double2 __ovld __cnfn asin(double2); + double3 __ovld __cnfn asin(double3); + double4 __ovld __cnfn asin(double4); + double8 __ovld __cnfn asin(double8); + double16 __ovld __cnfn asin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asin(half); + half2 __ovld __cnfn asin(half2); +@@ -6889,14 +6889,14 @@ float3 __ovld __cnfn asinh(float3); + float4 __ovld __cnfn asinh(float4); + float8 __ovld __cnfn asinh(float8); + float16 __ovld __cnfn asinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinh(double); + double2 __ovld __cnfn asinh(double2); + double3 __ovld __cnfn asinh(double3); + double4 __ovld __cnfn asinh(double4); + double8 __ovld __cnfn asinh(double8); + double16 __ovld __cnfn asinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinh(half); + half2 __ovld __cnfn asinh(half2); +@@ -6915,14 +6915,14 @@ float3 __ovld __cnfn asinpi(float3 x); + float4 __ovld __cnfn asinpi(float4 x); + float8 __ovld __cnfn asinpi(float8 x); + float16 __ovld __cnfn asinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinpi(double x); + double2 __ovld __cnfn asinpi(double2 x); + double3 __ovld __cnfn asinpi(double3 x); + double4 __ovld __cnfn asinpi(double4 x); + double8 __ovld __cnfn asinpi(double8 x); + double16 __ovld __cnfn asinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinpi(half x); + half2 __ovld __cnfn asinpi(half2 x); +@@ -6941,14 +6941,14 @@ float3 __ovld __cnfn atan(float3 y_over_x); + float4 __ovld __cnfn atan(float4 y_over_x); + float8 __ovld __cnfn atan(float8 y_over_x); + float16 __ovld __cnfn atan(float16 y_over_x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan(double y_over_x); + double2 __ovld __cnfn atan(double2 y_over_x); + double3 __ovld __cnfn atan(double3 y_over_x); + double4 __ovld __cnfn atan(double4 y_over_x); + double8 __ovld __cnfn atan(double8 y_over_x); + double16 __ovld __cnfn atan(double16 y_over_x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan(half y_over_x); + half2 __ovld __cnfn atan(half2 y_over_x); +@@ -6967,14 +6967,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x); + float4 __ovld __cnfn atan2(float4 y, float4 x); + float8 __ovld __cnfn atan2(float8 y, float8 x); + float16 __ovld __cnfn atan2(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2(double y, double x); + double2 __ovld __cnfn atan2(double2 y, double2 x); + double3 __ovld __cnfn atan2(double3 y, double3 x); + double4 __ovld __cnfn atan2(double4 y, double4 x); + double8 __ovld __cnfn atan2(double8 y, double8 x); + double16 __ovld __cnfn atan2(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2(half y, half x); + half2 __ovld __cnfn atan2(half2 y, half2 x); +@@ -6993,14 +6993,14 @@ float3 __ovld __cnfn atanh(float3); + float4 __ovld __cnfn atanh(float4); + float8 __ovld __cnfn atanh(float8); + float16 __ovld __cnfn atanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanh(double); + double2 __ovld __cnfn atanh(double2); + double3 __ovld __cnfn atanh(double3); + double4 __ovld __cnfn atanh(double4); + double8 __ovld __cnfn atanh(double8); + double16 __ovld __cnfn atanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanh(half); + half2 __ovld __cnfn atanh(half2); +@@ -7019,14 +7019,14 @@ float3 __ovld __cnfn atanpi(float3 x); + float4 __ovld __cnfn atanpi(float4 x); + float8 __ovld __cnfn atanpi(float8 x); + float16 __ovld __cnfn atanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanpi(double x); + double2 __ovld __cnfn atanpi(double2 x); + double3 __ovld __cnfn atanpi(double3 x); + double4 __ovld __cnfn atanpi(double4 x); + double8 __ovld __cnfn atanpi(double8 x); + double16 __ovld __cnfn atanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanpi(half x); + half2 __ovld __cnfn atanpi(half2 x); +@@ -7045,14 +7045,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x); + float4 __ovld __cnfn atan2pi(float4 y, float4 x); + float8 __ovld __cnfn atan2pi(float8 y, float8 x); + float16 __ovld __cnfn atan2pi(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2pi(double y, double x); + double2 __ovld __cnfn atan2pi(double2 y, double2 x); + double3 __ovld __cnfn atan2pi(double3 y, double3 x); + double4 __ovld __cnfn atan2pi(double4 y, double4 x); + double8 __ovld __cnfn atan2pi(double8 y, double8 x); + double16 __ovld __cnfn atan2pi(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2pi(half y, half x); + half2 __ovld __cnfn atan2pi(half2 y, half2 x); +@@ -7071,14 +7071,14 @@ float3 __ovld __cnfn cbrt(float3); + float4 __ovld __cnfn cbrt(float4); + float8 __ovld __cnfn cbrt(float8); + float16 __ovld __cnfn cbrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cbrt(double); + double2 __ovld __cnfn cbrt(double2); + double3 __ovld __cnfn cbrt(double3); + double4 __ovld __cnfn cbrt(double4); + double8 __ovld __cnfn cbrt(double8); + double16 __ovld __cnfn cbrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cbrt(half); + half2 __ovld __cnfn cbrt(half2); +@@ -7098,14 +7098,14 @@ float3 __ovld __cnfn ceil(float3); + float4 __ovld __cnfn ceil(float4); + float8 __ovld __cnfn ceil(float8); + float16 __ovld __cnfn ceil(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ceil(double); + double2 __ovld __cnfn ceil(double2); + double3 __ovld __cnfn ceil(double3); + double4 __ovld __cnfn ceil(double4); + double8 __ovld __cnfn ceil(double8); + double16 __ovld __cnfn ceil(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ceil(half); + half2 __ovld __cnfn ceil(half2); +@@ -7124,14 +7124,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y); + float4 __ovld __cnfn copysign(float4 x, float4 y); + float8 __ovld __cnfn copysign(float8 x, float8 y); + float16 __ovld __cnfn copysign(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn copysign(double x, double y); + double2 __ovld __cnfn copysign(double2 x, double2 y); + double3 __ovld __cnfn copysign(double3 x, double3 y); + double4 __ovld __cnfn copysign(double4 x, double4 y); + double8 __ovld __cnfn copysign(double8 x, double8 y); + double16 __ovld __cnfn copysign(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn copysign(half x, half y); + half2 __ovld __cnfn copysign(half2 x, half2 y); +@@ -7150,14 +7150,14 @@ float3 __ovld __cnfn cos(float3); + float4 __ovld __cnfn cos(float4); + float8 __ovld __cnfn cos(float8); + float16 __ovld __cnfn cos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cos(double); + double2 __ovld __cnfn cos(double2); + double3 __ovld __cnfn cos(double3); + double4 __ovld __cnfn cos(double4); + double8 __ovld __cnfn cos(double8); + double16 __ovld __cnfn cos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cos(half); + half2 __ovld __cnfn cos(half2); +@@ -7176,14 +7176,14 @@ float3 __ovld __cnfn cosh(float3); + float4 __ovld __cnfn cosh(float4); + float8 __ovld __cnfn cosh(float8); + float16 __ovld __cnfn cosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cosh(double); + double2 __ovld __cnfn cosh(double2); + double3 __ovld __cnfn cosh(double3); + double4 __ovld __cnfn cosh(double4); + double8 __ovld __cnfn cosh(double8); + double16 __ovld __cnfn cosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cosh(half); + half2 __ovld __cnfn cosh(half2); +@@ -7202,14 +7202,14 @@ float3 __ovld __cnfn cospi(float3 x); + float4 __ovld __cnfn cospi(float4 x); + float8 __ovld __cnfn cospi(float8 x); + float16 __ovld __cnfn cospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cospi(double x); + double2 __ovld __cnfn cospi(double2 x); + double3 __ovld __cnfn cospi(double3 x); + double4 __ovld __cnfn cospi(double4 x); + double8 __ovld __cnfn cospi(double8 x); + double16 __ovld __cnfn cospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cospi(half x); + half2 __ovld __cnfn cospi(half2 x); +@@ -7228,14 +7228,14 @@ float3 __ovld __cnfn erfc(float3); + float4 __ovld __cnfn erfc(float4); + float8 __ovld __cnfn erfc(float8); + float16 __ovld __cnfn erfc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erfc(double); + double2 __ovld __cnfn erfc(double2); + double3 __ovld __cnfn erfc(double3); + double4 __ovld __cnfn erfc(double4); + double8 __ovld __cnfn erfc(double8); + double16 __ovld __cnfn erfc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erfc(half); + half2 __ovld __cnfn erfc(half2); +@@ -7255,14 +7255,14 @@ float3 __ovld __cnfn erf(float3); + float4 __ovld __cnfn erf(float4); + float8 __ovld __cnfn erf(float8); + float16 __ovld __cnfn erf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erf(double); + double2 __ovld __cnfn erf(double2); + double3 __ovld __cnfn erf(double3); + double4 __ovld __cnfn erf(double4); + double8 __ovld __cnfn erf(double8); + double16 __ovld __cnfn erf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erf(half); + half2 __ovld __cnfn erf(half2); +@@ -7281,14 +7281,14 @@ float3 __ovld __cnfn exp(float3 x); + float4 __ovld __cnfn exp(float4 x); + float8 __ovld __cnfn exp(float8 x); + float16 __ovld __cnfn exp(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp(double x); + double2 __ovld __cnfn exp(double2 x); + double3 __ovld __cnfn exp(double3 x); + double4 __ovld __cnfn exp(double4 x); + double8 __ovld __cnfn exp(double8 x); + double16 __ovld __cnfn exp(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp(half x); + half2 __ovld __cnfn exp(half2 x); +@@ -7307,14 +7307,14 @@ float3 __ovld __cnfn exp2(float3); + float4 __ovld __cnfn exp2(float4); + float8 __ovld __cnfn exp2(float8); + float16 __ovld __cnfn exp2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp2(double); + double2 __ovld __cnfn exp2(double2); + double3 __ovld __cnfn exp2(double3); + double4 __ovld __cnfn exp2(double4); + double8 __ovld __cnfn exp2(double8); + double16 __ovld __cnfn exp2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp2(half); + half2 __ovld __cnfn exp2(half2); +@@ -7333,14 +7333,14 @@ float3 __ovld __cnfn exp10(float3); + float4 __ovld __cnfn exp10(float4); + float8 __ovld __cnfn exp10(float8); + float16 __ovld __cnfn exp10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp10(double); + double2 __ovld __cnfn exp10(double2); + double3 __ovld __cnfn exp10(double3); + double4 __ovld __cnfn exp10(double4); + double8 __ovld __cnfn exp10(double8); + double16 __ovld __cnfn exp10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp10(half); + half2 __ovld __cnfn exp10(half2); +@@ -7359,14 +7359,14 @@ float3 __ovld __cnfn expm1(float3 x); + float4 __ovld __cnfn expm1(float4 x); + float8 __ovld __cnfn expm1(float8 x); + float16 __ovld __cnfn expm1(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn expm1(double x); + double2 __ovld __cnfn expm1(double2 x); + double3 __ovld __cnfn expm1(double3 x); + double4 __ovld __cnfn expm1(double4 x); + double8 __ovld __cnfn expm1(double8 x); + double16 __ovld __cnfn expm1(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn expm1(half x); + half2 __ovld __cnfn expm1(half2 x); +@@ -7385,14 +7385,14 @@ float3 __ovld __cnfn fabs(float3); + float4 __ovld __cnfn fabs(float4); + float8 __ovld __cnfn fabs(float8); + float16 __ovld __cnfn fabs(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fabs(double); + double2 __ovld __cnfn fabs(double2); + double3 __ovld __cnfn fabs(double3); + double4 __ovld __cnfn fabs(double4); + double8 __ovld __cnfn fabs(double8); + double16 __ovld __cnfn fabs(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fabs(half); + half2 __ovld __cnfn fabs(half2); +@@ -7411,14 +7411,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y); + float4 __ovld __cnfn fdim(float4 x, float4 y); + float8 __ovld __cnfn fdim(float8 x, float8 y); + float16 __ovld __cnfn fdim(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fdim(double x, double y); + double2 __ovld __cnfn fdim(double2 x, double2 y); + double3 __ovld __cnfn fdim(double3 x, double3 y); + double4 __ovld __cnfn fdim(double4 x, double4 y); + double8 __ovld __cnfn fdim(double8 x, double8 y); + double16 __ovld __cnfn fdim(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fdim(half x, half y); + half2 __ovld __cnfn fdim(half2 x, half2 y); +@@ -7438,14 +7438,14 @@ float3 __ovld __cnfn floor(float3); + float4 __ovld __cnfn floor(float4); + float8 __ovld __cnfn floor(float8); + float16 __ovld __cnfn floor(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn floor(double); + double2 __ovld __cnfn floor(double2); + double3 __ovld __cnfn floor(double3); + double4 __ovld __cnfn floor(double4); + double8 __ovld __cnfn floor(double8); + double16 __ovld __cnfn floor(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn floor(half); + half2 __ovld __cnfn floor(half2); +@@ -7468,14 +7468,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c); + float4 __ovld __cnfn fma(float4 a, float4 b, float4 c); + float8 __ovld __cnfn fma(float8 a, float8 b, float8 c); + float16 __ovld __cnfn fma(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fma(double a, double b, double c); + double2 __ovld __cnfn fma(double2 a, double2 b, double2 c); + double3 __ovld __cnfn fma(double3 a, double3 b, double3 c); + double4 __ovld __cnfn fma(double4 a, double4 b, double4 c); + double8 __ovld __cnfn fma(double8 a, double8 b, double8 c); + double16 __ovld __cnfn fma(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fma(half a, half b, half c); + half2 __ovld __cnfn fma(half2 a, half2 b, half2 c); +@@ -7502,7 +7502,7 @@ float3 __ovld __cnfn fmax(float3 x, float y); + float4 __ovld __cnfn fmax(float4 x, float y); + float8 __ovld __cnfn fmax(float8 x, float y); + float16 __ovld __cnfn fmax(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmax(double x, double y); + double2 __ovld __cnfn fmax(double2 x, double2 y); + double3 __ovld __cnfn fmax(double3 x, double3 y); +@@ -7514,7 +7514,7 @@ double3 __ovld __cnfn fmax(double3 x, double y); + double4 __ovld __cnfn fmax(double4 x, double y); + double8 __ovld __cnfn fmax(double8 x, double y); + double16 __ovld __cnfn fmax(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmax(half x, half y); + half2 __ovld __cnfn fmax(half2 x, half2 y); +@@ -7546,7 +7546,7 @@ float3 __ovld __cnfn fmin(float3 x, float y); + float4 __ovld __cnfn fmin(float4 x, float y); + float8 __ovld __cnfn fmin(float8 x, float y); + float16 __ovld __cnfn fmin(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmin(double x, double y); + double2 __ovld __cnfn fmin(double2 x, double2 y); + double3 __ovld __cnfn fmin(double3 x, double3 y); +@@ -7558,7 +7558,7 @@ double3 __ovld __cnfn fmin(double3 x, double y); + double4 __ovld __cnfn fmin(double4 x, double y); + double8 __ovld __cnfn fmin(double8 x, double y); + double16 __ovld __cnfn fmin(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmin(half x, half y); + half2 __ovld __cnfn fmin(half2 x, half2 y); +@@ -7582,14 +7582,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y); + float4 __ovld __cnfn fmod(float4 x, float4 y); + float8 __ovld __cnfn fmod(float8 x, float8 y); + float16 __ovld __cnfn fmod(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmod(double x, double y); + double2 __ovld __cnfn fmod(double2 x, double2 y); + double3 __ovld __cnfn fmod(double3 x, double3 y); + double4 __ovld __cnfn fmod(double4 x, double4 y); + double8 __ovld __cnfn fmod(double8 x, double8 y); + double16 __ovld __cnfn fmod(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmod(half x, half y); + half2 __ovld __cnfn fmod(half2 x, half2 y); +@@ -7603,21 +7603,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); + * Returns fmin(x - floor (x), 0x1.fffffep-1f ). + * floor(x) is returned in iptr. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld fract(float x, float *iptr); + float2 __ovld fract(float2 x, float2 *iptr); + float3 __ovld fract(float3 x, float3 *iptr); + float4 __ovld fract(float4 x, float4 *iptr); + float8 __ovld fract(float8 x, float8 *iptr); + float16 __ovld fract(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, double *iptr); + double2 __ovld fract(double2 x, double2 *iptr); + double3 __ovld fract(double3 x, double3 *iptr); + double4 __ovld fract(double4 x, double4 *iptr); + double8 __ovld fract(double8 x, double8 *iptr); + double16 __ovld fract(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, half *iptr); + half2 __ovld fract(half2 x, half2 *iptr); +@@ -7626,7 +7626,9 @@ half4 __ovld fract(half4 x, half4 *iptr); + half8 __ovld fract(half8 x, half8 *iptr); + half16 __ovld fract(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld fract(float x, __global float *iptr); + float2 __ovld fract(float2 x, __global float2 *iptr); + float3 __ovld fract(float3 x, __global float3 *iptr); +@@ -7645,7 +7647,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr); + float4 __ovld fract(float4 x, __private float4 *iptr); + float8 __ovld fract(float8 x, __private float8 *iptr); + float16 __ovld fract(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, __global double *iptr); + double2 __ovld fract(double2 x, __global double2 *iptr); + double3 __ovld fract(double3 x, __global double3 *iptr); +@@ -7664,7 +7666,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr); + double4 __ovld fract(double4 x, __private double4 *iptr); + double8 __ovld fract(double8 x, __private double8 *iptr); + double16 __ovld fract(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, __global half *iptr); + half2 __ovld fract(half2 x, __global half2 *iptr); +@@ -7685,29 +7687,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr); + half8 __ovld fract(half8 x, __private half8 *iptr); + half16 __ovld fract(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Extract mantissa and exponent from x. For each + * component the mantissa returned is a float with + * magnitude in the interval [1/2, 1) or 0. Each + * component of x equals mantissa returned * 2^exp. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld frexp(float x, int *exp); + float2 __ovld frexp(float2 x, int2 *exp); + float3 __ovld frexp(float3 x, int3 *exp); + float4 __ovld frexp(float4 x, int4 *exp); + float8 __ovld frexp(float8 x, int8 *exp); + float16 __ovld frexp(float16 x, int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, int *exp); + double2 __ovld frexp(double2 x, int2 *exp); + double3 __ovld frexp(double3 x, int3 *exp); + double4 __ovld frexp(double4 x, int4 *exp); + double8 __ovld frexp(double8 x, int8 *exp); + double16 __ovld frexp(double16 x, int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, int *exp); + half2 __ovld frexp(half2 x, int2 *exp); +@@ -7716,7 +7718,9 @@ half4 __ovld frexp(half4 x, int4 *exp); + half8 __ovld frexp(half8 x, int8 *exp); + half16 __ovld frexp(half16 x, int16 *exp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld frexp(float x, __global int *exp); + float2 __ovld frexp(float2 x, __global int2 *exp); + float3 __ovld frexp(float3 x, __global int3 *exp); +@@ -7735,7 +7739,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp); + float4 __ovld frexp(float4 x, __private int4 *exp); + float8 __ovld frexp(float8 x, __private int8 *exp); + float16 __ovld frexp(float16 x, __private int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, __global int *exp); + double2 __ovld frexp(double2 x, __global int2 *exp); + double3 __ovld frexp(double3 x, __global int3 *exp); +@@ -7754,7 +7758,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp); + double4 __ovld frexp(double4 x, __private int4 *exp); + double8 __ovld frexp(double8 x, __private int8 *exp); + double16 __ovld frexp(double16 x, __private int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, __global int *exp); + half2 __ovld frexp(half2 x, __global int2 *exp); +@@ -7775,7 +7779,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp); + half8 __ovld frexp(half8 x, __private int8 *exp); + half16 __ovld frexp(half16 x, __private int16 *exp); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Compute the value of the square root of x^2 + y^2 +@@ -7787,14 +7792,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y); + float4 __ovld __cnfn hypot(float4 x, float4 y); + float8 __ovld __cnfn hypot(float8 x, float8 y); + float16 __ovld __cnfn hypot(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn hypot(double x, double y); + double2 __ovld __cnfn hypot(double2 x, double2 y); + double3 __ovld __cnfn hypot(double3 x, double3 y); + double4 __ovld __cnfn hypot(double4 x, double4 y); + double8 __ovld __cnfn hypot(double8 x, double8 y); + double16 __ovld __cnfn hypot(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn hypot(half x, half y); + half2 __ovld __cnfn hypot(half2 x, half2 y); +@@ -7813,14 +7818,14 @@ int3 __ovld __cnfn ilogb(float3 x); + int4 __ovld __cnfn ilogb(float4 x); + int8 __ovld __cnfn ilogb(float8 x); + int16 __ovld __cnfn ilogb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn ilogb(double x); + int2 __ovld __cnfn ilogb(double2 x); + int3 __ovld __cnfn ilogb(double3 x); + int4 __ovld __cnfn ilogb(double4 x); + int8 __ovld __cnfn ilogb(double8 x); + int16 __ovld __cnfn ilogb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn ilogb(half x); + int2 __ovld __cnfn ilogb(half2 x); +@@ -7844,7 +7849,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n); + float4 __ovld __cnfn ldexp(float4 x, int n); + float8 __ovld __cnfn ldexp(float8 x, int n); + float16 __ovld __cnfn ldexp(float16 x, int n); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ldexp(double x, int n); + double2 __ovld __cnfn ldexp(double2 x, int2 n); + double3 __ovld __cnfn ldexp(double3 x, int3 n); +@@ -7856,7 +7861,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n); + double4 __ovld __cnfn ldexp(double4 x, int n); + double8 __ovld __cnfn ldexp(double8 x, int n); + double16 __ovld __cnfn ldexp(double16 x, int n); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ldexp(half x, int n); + half2 __ovld __cnfn ldexp(half2 x, int2 n); +@@ -7883,14 +7888,14 @@ float3 __ovld __cnfn lgamma(float3 x); + float4 __ovld __cnfn lgamma(float4 x); + float8 __ovld __cnfn lgamma(float8 x); + float16 __ovld __cnfn lgamma(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn lgamma(double x); + double2 __ovld __cnfn lgamma(double2 x); + double3 __ovld __cnfn lgamma(double3 x); + double4 __ovld __cnfn lgamma(double4 x); + double8 __ovld __cnfn lgamma(double8 x); + double16 __ovld __cnfn lgamma(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn lgamma(half x); + half2 __ovld __cnfn lgamma(half2 x); +@@ -7900,21 +7905,21 @@ half8 __ovld __cnfn lgamma(half8 x); + half16 __ovld __cnfn lgamma(half16 x); + #endif //cl_khr_fp16 + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld lgamma_r(float x, int *signp); + float2 __ovld lgamma_r(float2 x, int2 *signp); + float3 __ovld lgamma_r(float3 x, int3 *signp); + float4 __ovld lgamma_r(float4 x, int4 *signp); + float8 __ovld lgamma_r(float8 x, int8 *signp); + float16 __ovld lgamma_r(float16 x, int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, int *signp); + double2 __ovld lgamma_r(double2 x, int2 *signp); + double3 __ovld lgamma_r(double3 x, int3 *signp); + double4 __ovld lgamma_r(double4 x, int4 *signp); + double8 __ovld lgamma_r(double8 x, int8 *signp); + double16 __ovld lgamma_r(double16 x, int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, int *signp); + half2 __ovld lgamma_r(half2 x, int2 *signp); +@@ -7923,7 +7928,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); + half8 __ovld lgamma_r(half8 x, int8 *signp); + half16 __ovld lgamma_r(half16 x, int16 *signp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld lgamma_r(float x, __global int *signp); + float2 __ovld lgamma_r(float2 x, __global int2 *signp); + float3 __ovld lgamma_r(float3 x, __global int3 *signp); +@@ -7942,7 +7949,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp); + float4 __ovld lgamma_r(float4 x, __private int4 *signp); + float8 __ovld lgamma_r(float8 x, __private int8 *signp); + float16 __ovld lgamma_r(float16 x, __private int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, __global int *signp); + double2 __ovld lgamma_r(double2 x, __global int2 *signp); + double3 __ovld lgamma_r(double3 x, __global int3 *signp); +@@ -7961,7 +7968,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp); + double4 __ovld lgamma_r(double4 x, __private int4 *signp); + double8 __ovld lgamma_r(double8 x, __private int8 *signp); + double16 __ovld lgamma_r(double16 x, __private int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, __global int *signp); + half2 __ovld lgamma_r(half2 x, __global int2 *signp); +@@ -7982,8 +7989,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); + half8 __ovld lgamma_r(half8 x, __private int8 *signp); + half16 __ovld lgamma_r(half16 x, __private int16 *signp); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute natural logarithm. + */ +@@ -7993,14 +8000,14 @@ float3 __ovld __cnfn log(float3); + float4 __ovld __cnfn log(float4); + float8 __ovld __cnfn log(float8); + float16 __ovld __cnfn log(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log(double); + double2 __ovld __cnfn log(double2); + double3 __ovld __cnfn log(double3); + double4 __ovld __cnfn log(double4); + double8 __ovld __cnfn log(double8); + double16 __ovld __cnfn log(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log(half); + half2 __ovld __cnfn log(half2); +@@ -8011,7 +8018,7 @@ half16 __ovld __cnfn log(half16); + #endif //cl_khr_fp16 + + /** +- * Compute a base 2 logarithm. ++ * Compute a base 2 logarithm + */ + float __ovld __cnfn log2(float); + float2 __ovld __cnfn log2(float2); +@@ -8019,14 +8026,14 @@ float3 __ovld __cnfn log2(float3); + float4 __ovld __cnfn log2(float4); + float8 __ovld __cnfn log2(float8); + float16 __ovld __cnfn log2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log2(double); + double2 __ovld __cnfn log2(double2); + double3 __ovld __cnfn log2(double3); + double4 __ovld __cnfn log2(double4); + double8 __ovld __cnfn log2(double8); + double16 __ovld __cnfn log2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log2(half); + half2 __ovld __cnfn log2(half2); +@@ -8045,14 +8052,14 @@ float3 __ovld __cnfn log10(float3); + float4 __ovld __cnfn log10(float4); + float8 __ovld __cnfn log10(float8); + float16 __ovld __cnfn log10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log10(double); + double2 __ovld __cnfn log10(double2); + double3 __ovld __cnfn log10(double3); + double4 __ovld __cnfn log10(double4); + double8 __ovld __cnfn log10(double8); + double16 __ovld __cnfn log10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log10(half); + half2 __ovld __cnfn log10(half2); +@@ -8071,14 +8078,14 @@ float3 __ovld __cnfn log1p(float3 x); + float4 __ovld __cnfn log1p(float4 x); + float8 __ovld __cnfn log1p(float8 x); + float16 __ovld __cnfn log1p(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log1p(double x); + double2 __ovld __cnfn log1p(double2 x); + double3 __ovld __cnfn log1p(double3 x); + double4 __ovld __cnfn log1p(double4 x); + double8 __ovld __cnfn log1p(double8 x); + double16 __ovld __cnfn log1p(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log1p(half x); + half2 __ovld __cnfn log1p(half2 x); +@@ -8098,14 +8105,14 @@ float3 __ovld __cnfn logb(float3 x); + float4 __ovld __cnfn logb(float4 x); + float8 __ovld __cnfn logb(float8 x); + float16 __ovld __cnfn logb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn logb(double x); + double2 __ovld __cnfn logb(double2 x); + double3 __ovld __cnfn logb(double3 x); + double4 __ovld __cnfn logb(double4 x); + double8 __ovld __cnfn logb(double8 x); + double16 __ovld __cnfn logb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn logb(half x); + half2 __ovld __cnfn logb(half2 x); +@@ -8128,14 +8135,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c); + float4 __ovld __cnfn mad(float4 a, float4 b, float4 c); + float8 __ovld __cnfn mad(float8 a, float8 b, float8 c); + float16 __ovld __cnfn mad(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mad(double a, double b, double c); + double2 __ovld __cnfn mad(double2 a, double2 b, double2 c); + double3 __ovld __cnfn mad(double3 a, double3 b, double3 c); + double4 __ovld __cnfn mad(double4 a, double4 b, double4 c); + double8 __ovld __cnfn mad(double8 a, double8 b, double8 c); + double16 __ovld __cnfn mad(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mad(half a, half b, half c); + half2 __ovld __cnfn mad(half2 a, half2 b, half2 c); +@@ -8155,14 +8162,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y); + float4 __ovld __cnfn maxmag(float4 x, float4 y); + float8 __ovld __cnfn maxmag(float8 x, float8 y); + float16 __ovld __cnfn maxmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn maxmag(double x, double y); + double2 __ovld __cnfn maxmag(double2 x, double2 y); + double3 __ovld __cnfn maxmag(double3 x, double3 y); + double4 __ovld __cnfn maxmag(double4 x, double4 y); + double8 __ovld __cnfn maxmag(double8 x, double8 y); + double16 __ovld __cnfn maxmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn maxmag(half x, half y); + half2 __ovld __cnfn maxmag(half2 x, half2 y); +@@ -8182,14 +8189,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y); + float4 __ovld __cnfn minmag(float4 x, float4 y); + float8 __ovld __cnfn minmag(float8 x, float8 y); + float16 __ovld __cnfn minmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn minmag(double x, double y); + double2 __ovld __cnfn minmag(double2 x, double2 y); + double3 __ovld __cnfn minmag(double3 x, double3 y); + double4 __ovld __cnfn minmag(double4 x, double4 y); + double8 __ovld __cnfn minmag(double8 x, double8 y); + double16 __ovld __cnfn minmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn minmag(half x, half y); + half2 __ovld __cnfn minmag(half2 x, half2 y); +@@ -8206,21 +8213,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); + * the argument. It stores the integral part in the object + * pointed to by iptr. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld modf(float x, float *iptr); + float2 __ovld modf(float2 x, float2 *iptr); + float3 __ovld modf(float3 x, float3 *iptr); + float4 __ovld modf(float4 x, float4 *iptr); + float8 __ovld modf(float8 x, float8 *iptr); + float16 __ovld modf(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, double *iptr); + double2 __ovld modf(double2 x, double2 *iptr); + double3 __ovld modf(double3 x, double3 *iptr); + double4 __ovld modf(double4 x, double4 *iptr); + double8 __ovld modf(double8 x, double8 *iptr); + double16 __ovld modf(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, half *iptr); + half2 __ovld modf(half2 x, half2 *iptr); +@@ -8229,7 +8236,9 @@ half4 __ovld modf(half4 x, half4 *iptr); + half8 __ovld modf(half8 x, half8 *iptr); + half16 __ovld modf(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld modf(float x, __global float *iptr); + float2 __ovld modf(float2 x, __global float2 *iptr); + float3 __ovld modf(float3 x, __global float3 *iptr); +@@ -8248,7 +8257,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr); + float4 __ovld modf(float4 x, __private float4 *iptr); + float8 __ovld modf(float8 x, __private float8 *iptr); + float16 __ovld modf(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, __global double *iptr); + double2 __ovld modf(double2 x, __global double2 *iptr); + double3 __ovld modf(double3 x, __global double3 *iptr); +@@ -8267,7 +8276,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr); + double4 __ovld modf(double4 x, __private double4 *iptr); + double8 __ovld modf(double8 x, __private double8 *iptr); + double16 __ovld modf(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, __global half *iptr); + half2 __ovld modf(half2 x, __global half2 *iptr); +@@ -8288,7 +8297,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr); + half8 __ovld modf(half8 x, __private half8 *iptr); + half16 __ovld modf(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Returns a quiet NaN. The nancode may be placed +@@ -8300,14 +8310,14 @@ float3 __ovld __cnfn nan(uint3 nancode); + float4 __ovld __cnfn nan(uint4 nancode); + float8 __ovld __cnfn nan(uint8 nancode); + float16 __ovld __cnfn nan(uint16 nancode); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nan(ulong nancode); + double2 __ovld __cnfn nan(ulong2 nancode); + double3 __ovld __cnfn nan(ulong3 nancode); + double4 __ovld __cnfn nan(ulong4 nancode); + double8 __ovld __cnfn nan(ulong8 nancode); + double16 __ovld __cnfn nan(ulong16 nancode); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nan(ushort nancode); + half2 __ovld __cnfn nan(ushort2 nancode); +@@ -8330,14 +8340,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y); + float4 __ovld __cnfn nextafter(float4 x, float4 y); + float8 __ovld __cnfn nextafter(float8 x, float8 y); + float16 __ovld __cnfn nextafter(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nextafter(double x, double y); + double2 __ovld __cnfn nextafter(double2 x, double2 y); + double3 __ovld __cnfn nextafter(double3 x, double3 y); + double4 __ovld __cnfn nextafter(double4 x, double4 y); + double8 __ovld __cnfn nextafter(double8 x, double8 y); + double16 __ovld __cnfn nextafter(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nextafter(half x, half y); + half2 __ovld __cnfn nextafter(half2 x, half2 y); +@@ -8356,14 +8366,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y); + float4 __ovld __cnfn pow(float4 x, float4 y); + float8 __ovld __cnfn pow(float8 x, float8 y); + float16 __ovld __cnfn pow(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pow(double x, double y); + double2 __ovld __cnfn pow(double2 x, double2 y); + double3 __ovld __cnfn pow(double3 x, double3 y); + double4 __ovld __cnfn pow(double4 x, double4 y); + double8 __ovld __cnfn pow(double8 x, double8 y); + double16 __ovld __cnfn pow(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pow(half x, half y); + half2 __ovld __cnfn pow(half2 x, half2 y); +@@ -8382,14 +8392,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y); + float4 __ovld __cnfn pown(float4 x, int4 y); + float8 __ovld __cnfn pown(float8 x, int8 y); + float16 __ovld __cnfn pown(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pown(double x, int y); + double2 __ovld __cnfn pown(double2 x, int2 y); + double3 __ovld __cnfn pown(double3 x, int3 y); + double4 __ovld __cnfn pown(double4 x, int4 y); + double8 __ovld __cnfn pown(double8 x, int8 y); + double16 __ovld __cnfn pown(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pown(half x, int y); + half2 __ovld __cnfn pown(half2 x, int2 y); +@@ -8408,14 +8418,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y); + float4 __ovld __cnfn powr(float4 x, float4 y); + float8 __ovld __cnfn powr(float8 x, float8 y); + float16 __ovld __cnfn powr(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn powr(double x, double y); + double2 __ovld __cnfn powr(double2 x, double2 y); + double3 __ovld __cnfn powr(double3 x, double3 y); + double4 __ovld __cnfn powr(double4 x, double4 y); + double8 __ovld __cnfn powr(double8 x, double8 y); + double16 __ovld __cnfn powr(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn powr(half x, half y); + half2 __ovld __cnfn powr(half2 x, half2 y); +@@ -8437,14 +8447,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y); + float4 __ovld __cnfn remainder(float4 x, float4 y); + float8 __ovld __cnfn remainder(float8 x, float8 y); + float16 __ovld __cnfn remainder(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn remainder(double x, double y); + double2 __ovld __cnfn remainder(double2 x, double2 y); + double3 __ovld __cnfn remainder(double3 x, double3 y); + double4 __ovld __cnfn remainder(double4 x, double4 y); + double8 __ovld __cnfn remainder(double8 x, double8 y); + double16 __ovld __cnfn remainder(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn remainder(half x, half y); + half2 __ovld __cnfn remainder(half2 x, half2 y); +@@ -8466,21 +8476,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); + * sign as x/y. It stores this signed value in the object + * pointed to by quo. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld remquo(float x, float y, int *quo); + float2 __ovld remquo(float2 x, float2 y, int2 *quo); + float3 __ovld remquo(float3 x, float3 y, int3 *quo); + float4 __ovld remquo(float4 x, float4 y, int4 *quo); + float8 __ovld remquo(float8 x, float8 y, int8 *quo); + float16 __ovld remquo(float16 x, float16 y, int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, int *quo); + double2 __ovld remquo(double2 x, double2 y, int2 *quo); + double3 __ovld remquo(double3 x, double3 y, int3 *quo); + double4 __ovld remquo(double4 x, double4 y, int4 *quo); + double8 __ovld remquo(double8 x, double8 y, int8 *quo); + double16 __ovld remquo(double16 x, double16 y, int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, int *quo); + half2 __ovld remquo(half2 x, half2 y, int2 *quo); +@@ -8488,9 +8498,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); + half4 __ovld remquo(half4 x, half4 y, int4 *quo); + half8 __ovld remquo(half8 x, half8 y, int8 *quo); + half16 __ovld remquo(half16 x, half16 y, int16 *quo); +- + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld remquo(float x, float y, __global int *quo); + float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); + float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); +@@ -8509,7 +8520,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo); + float4 __ovld remquo(float4 x, float4 y, __private int4 *quo); + float8 __ovld remquo(float8 x, float8 y, __private int8 *quo); + float16 __ovld remquo(float16 x, float16 y, __private int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, __global int *quo); + double2 __ovld remquo(double2 x, double2 y, __global int2 *quo); + double3 __ovld remquo(double3 x, double3 y, __global int3 *quo); +@@ -8528,7 +8539,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo); + double4 __ovld remquo(double4 x, double4 y, __private int4 *quo); + double8 __ovld remquo(double8 x, double8 y, __private int8 *quo); + double16 __ovld remquo(double16 x, double16 y, __private int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, __global int *quo); + half2 __ovld remquo(half2 x, half2 y, __global int2 *quo); +@@ -8549,7 +8560,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); + half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); + half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Round to integral value (using round to nearest + * even rounding mode) in floating-point format. +@@ -8562,14 +8574,14 @@ float3 __ovld __cnfn rint(float3); + float4 __ovld __cnfn rint(float4); + float8 __ovld __cnfn rint(float8); + float16 __ovld __cnfn rint(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rint(double); + double2 __ovld __cnfn rint(double2); + double3 __ovld __cnfn rint(double3); + double4 __ovld __cnfn rint(double4); + double8 __ovld __cnfn rint(double8); + double16 __ovld __cnfn rint(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rint(half); + half2 __ovld __cnfn rint(half2); +@@ -8588,14 +8600,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y); + float4 __ovld __cnfn rootn(float4 x, int4 y); + float8 __ovld __cnfn rootn(float8 x, int8 y); + float16 __ovld __cnfn rootn(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rootn(double x, int y); + double2 __ovld __cnfn rootn(double2 x, int2 y); + double3 __ovld __cnfn rootn(double3 x, int3 y); + double4 __ovld __cnfn rootn(double4 x, int4 y); + double8 __ovld __cnfn rootn(double8 x, int8 y); + double16 __ovld __cnfn rootn(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rootn(half x, int y); + half2 __ovld __cnfn rootn(half2 x, int2 y); +@@ -8616,14 +8628,14 @@ float3 __ovld __cnfn round(float3 x); + float4 __ovld __cnfn round(float4 x); + float8 __ovld __cnfn round(float8 x); + float16 __ovld __cnfn round(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn round(double x); + double2 __ovld __cnfn round(double2 x); + double3 __ovld __cnfn round(double3 x); + double4 __ovld __cnfn round(double4 x); + double8 __ovld __cnfn round(double8 x); + double16 __ovld __cnfn round(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn round(half x); + half2 __ovld __cnfn round(half2 x); +@@ -8642,14 +8654,14 @@ float3 __ovld __cnfn rsqrt(float3); + float4 __ovld __cnfn rsqrt(float4); + float8 __ovld __cnfn rsqrt(float8); + float16 __ovld __cnfn rsqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rsqrt(double); + double2 __ovld __cnfn rsqrt(double2); + double3 __ovld __cnfn rsqrt(double3); + double4 __ovld __cnfn rsqrt(double4); + double8 __ovld __cnfn rsqrt(double8); + double16 __ovld __cnfn rsqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rsqrt(half); + half2 __ovld __cnfn rsqrt(half2); +@@ -8668,14 +8680,14 @@ float3 __ovld __cnfn sin(float3); + float4 __ovld __cnfn sin(float4); + float8 __ovld __cnfn sin(float8); + float16 __ovld __cnfn sin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sin(double); + double2 __ovld __cnfn sin(double2); + double3 __ovld __cnfn sin(double3); + double4 __ovld __cnfn sin(double4); + double8 __ovld __cnfn sin(double8); + double16 __ovld __cnfn sin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sin(half); + half2 __ovld __cnfn sin(half2); +@@ -8690,21 +8702,21 @@ half16 __ovld __cnfn sin(half16); + * is the return value and computed cosine is returned + * in cosval. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld sincos(float x, float *cosval); + float2 __ovld sincos(float2 x, float2 *cosval); + float3 __ovld sincos(float3 x, float3 *cosval); + float4 __ovld sincos(float4 x, float4 *cosval); + float8 __ovld sincos(float8 x, float8 *cosval); + float16 __ovld sincos(float16 x, float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, double *cosval); + double2 __ovld sincos(double2 x, double2 *cosval); + double3 __ovld sincos(double3 x, double3 *cosval); + double4 __ovld sincos(double4 x, double4 *cosval); + double8 __ovld sincos(double8 x, double8 *cosval); + double16 __ovld sincos(double16 x, double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, half *cosval); + half2 __ovld sincos(half2 x, half2 *cosval); +@@ -8713,7 +8725,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); + half8 __ovld sincos(half8 x, half8 *cosval); + half16 __ovld sincos(half16 x, half16 *cosval); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld sincos(float x, __global float *cosval); + float2 __ovld sincos(float2 x, __global float2 *cosval); + float3 __ovld sincos(float3 x, __global float3 *cosval); +@@ -8732,7 +8746,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval); + float4 __ovld sincos(float4 x, __private float4 *cosval); + float8 __ovld sincos(float8 x, __private float8 *cosval); + float16 __ovld sincos(float16 x, __private float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, __global double *cosval); + double2 __ovld sincos(double2 x, __global double2 *cosval); + double3 __ovld sincos(double3 x, __global double3 *cosval); +@@ -8751,7 +8765,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval); + double4 __ovld sincos(double4 x, __private double4 *cosval); + double8 __ovld sincos(double8 x, __private double8 *cosval); + double16 __ovld sincos(double16 x, __private double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, __global half *cosval); + half2 __ovld sincos(half2 x, __global half2 *cosval); +@@ -8772,8 +8786,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); + half8 __ovld sincos(half8 x, __private half8 *cosval); + half16 __ovld sincos(half16 x, __private half16 *cosval); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute hyperbolic sine. + */ +@@ -8783,14 +8797,14 @@ float3 __ovld __cnfn sinh(float3); + float4 __ovld __cnfn sinh(float4); + float8 __ovld __cnfn sinh(float8); + float16 __ovld __cnfn sinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinh(double); + double2 __ovld __cnfn sinh(double2); + double3 __ovld __cnfn sinh(double3); + double4 __ovld __cnfn sinh(double4); + double8 __ovld __cnfn sinh(double8); + double16 __ovld __cnfn sinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinh(half); + half2 __ovld __cnfn sinh(half2); +@@ -8809,14 +8823,14 @@ float3 __ovld __cnfn sinpi(float3 x); + float4 __ovld __cnfn sinpi(float4 x); + float8 __ovld __cnfn sinpi(float8 x); + float16 __ovld __cnfn sinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinpi(double x); + double2 __ovld __cnfn sinpi(double2 x); + double3 __ovld __cnfn sinpi(double3 x); + double4 __ovld __cnfn sinpi(double4 x); + double8 __ovld __cnfn sinpi(double8 x); + double16 __ovld __cnfn sinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinpi(half x); + half2 __ovld __cnfn sinpi(half2 x); +@@ -8835,14 +8849,14 @@ float3 __ovld __cnfn sqrt(float3); + float4 __ovld __cnfn sqrt(float4); + float8 __ovld __cnfn sqrt(float8); + float16 __ovld __cnfn sqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sqrt(double); + double2 __ovld __cnfn sqrt(double2); + double3 __ovld __cnfn sqrt(double3); + double4 __ovld __cnfn sqrt(double4); + double8 __ovld __cnfn sqrt(double8); + double16 __ovld __cnfn sqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sqrt(half); + half2 __ovld __cnfn sqrt(half2); +@@ -8861,14 +8875,14 @@ float3 __ovld __cnfn tan(float3); + float4 __ovld __cnfn tan(float4); + float8 __ovld __cnfn tan(float8); + float16 __ovld __cnfn tan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tan(double); + double2 __ovld __cnfn tan(double2); + double3 __ovld __cnfn tan(double3); + double4 __ovld __cnfn tan(double4); + double8 __ovld __cnfn tan(double8); + double16 __ovld __cnfn tan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tan(half); + half2 __ovld __cnfn tan(half2); +@@ -8887,14 +8901,14 @@ float3 __ovld __cnfn tanh(float3); + float4 __ovld __cnfn tanh(float4); + float8 __ovld __cnfn tanh(float8); + float16 __ovld __cnfn tanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanh(double); + double2 __ovld __cnfn tanh(double2); + double3 __ovld __cnfn tanh(double3); + double4 __ovld __cnfn tanh(double4); + double8 __ovld __cnfn tanh(double8); + double16 __ovld __cnfn tanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanh(half); + half2 __ovld __cnfn tanh(half2); +@@ -8913,14 +8927,14 @@ float3 __ovld __cnfn tanpi(float3 x); + float4 __ovld __cnfn tanpi(float4 x); + float8 __ovld __cnfn tanpi(float8 x); + float16 __ovld __cnfn tanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanpi(double x); + double2 __ovld __cnfn tanpi(double2 x); + double3 __ovld __cnfn tanpi(double3 x); + double4 __ovld __cnfn tanpi(double4 x); + double8 __ovld __cnfn tanpi(double8 x); + double16 __ovld __cnfn tanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanpi(half x); + half2 __ovld __cnfn tanpi(half2 x); +@@ -8939,14 +8953,14 @@ float3 __ovld __cnfn tgamma(float3); + float4 __ovld __cnfn tgamma(float4); + float8 __ovld __cnfn tgamma(float8); + float16 __ovld __cnfn tgamma(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tgamma(double); + double2 __ovld __cnfn tgamma(double2); + double3 __ovld __cnfn tgamma(double3); + double4 __ovld __cnfn tgamma(double4); + double8 __ovld __cnfn tgamma(double8); + double16 __ovld __cnfn tgamma(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tgamma(half); + half2 __ovld __cnfn tgamma(half2); +@@ -8966,14 +8980,14 @@ float3 __ovld __cnfn trunc(float3); + float4 __ovld __cnfn trunc(float4); + float8 __ovld __cnfn trunc(float8); + float16 __ovld __cnfn trunc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn trunc(double); + double2 __ovld __cnfn trunc(double2); + double3 __ovld __cnfn trunc(double3); + double4 __ovld __cnfn trunc(double4); + double8 __ovld __cnfn trunc(double8); + double16 __ovld __cnfn trunc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn trunc(half); + half2 __ovld __cnfn trunc(half2); +@@ -10383,7 +10397,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval); + float4 __ovld __cnfn clamp(float4 x, float minval, float maxval); + float8 __ovld __cnfn clamp(float8 x, float minval, float maxval); + float16 __ovld __cnfn clamp(float16 x, float minval, float maxval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn clamp(double x, double minval, double maxval); + double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval); + double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval); +@@ -10395,7 +10409,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval); + double4 __ovld __cnfn clamp(double4 x, double minval, double maxval); + double8 __ovld __cnfn clamp(double8 x, double minval, double maxval); + double16 __ovld __cnfn clamp(double16 x, double minval, double maxval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn clamp(half x, half minval, half maxval); + half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval); +@@ -10420,14 +10434,14 @@ float3 __ovld __cnfn degrees(float3 radians); + float4 __ovld __cnfn degrees(float4 radians); + float8 __ovld __cnfn degrees(float8 radians); + float16 __ovld __cnfn degrees(float16 radians); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn degrees(double radians); + double2 __ovld __cnfn degrees(double2 radians); + double3 __ovld __cnfn degrees(double3 radians); + double4 __ovld __cnfn degrees(double4 radians); + double8 __ovld __cnfn degrees(double8 radians); + double16 __ovld __cnfn degrees(double16 radians); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn degrees(half radians); + half2 __ovld __cnfn degrees(half2 radians); +@@ -10452,7 +10466,7 @@ float3 __ovld __cnfn max(float3 x, float y); + float4 __ovld __cnfn max(float4 x, float y); + float8 __ovld __cnfn max(float8 x, float y); + float16 __ovld __cnfn max(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn max(double x, double y); + double2 __ovld __cnfn max(double2 x, double2 y); + double3 __ovld __cnfn max(double3 x, double3 y); +@@ -10464,7 +10478,7 @@ double3 __ovld __cnfn max(double3 x, double y); + double4 __ovld __cnfn max(double4 x, double y); + double8 __ovld __cnfn max(double8 x, double y); + double16 __ovld __cnfn max(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn max(half x, half y); + half2 __ovld __cnfn max(half2 x, half2 y); +@@ -10494,7 +10508,7 @@ float3 __ovld __cnfn min(float3 x, float y); + float4 __ovld __cnfn min(float4 x, float y); + float8 __ovld __cnfn min(float8 x, float y); + float16 __ovld __cnfn min(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn min(double x, double y); + double2 __ovld __cnfn min(double2 x, double2 y); + double3 __ovld __cnfn min(double3 x, double3 y); +@@ -10506,7 +10520,7 @@ double3 __ovld __cnfn min(double3 x, double y); + double4 __ovld __cnfn min(double4 x, double y); + double8 __ovld __cnfn min(double8 x, double y); + double16 __ovld __cnfn min(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn min(half x, half y); + half2 __ovld __cnfn min(half2 x, half2 y); +@@ -10539,7 +10553,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a); + float4 __ovld __cnfn mix(float4 x, float4 y, float a); + float8 __ovld __cnfn mix(float8 x, float8 y, float a); + float16 __ovld __cnfn mix(float16 x, float16 y, float a); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mix(double x, double y, double a); + double2 __ovld __cnfn mix(double2 x, double2 y, double2 a); + double3 __ovld __cnfn mix(double3 x, double3 y, double3 a); +@@ -10551,7 +10565,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a); + double4 __ovld __cnfn mix(double4 x, double4 y, double a); + double8 __ovld __cnfn mix(double8 x, double8 y, double a); + double16 __ovld __cnfn mix(double16 x, double16 y, double a); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mix(half x, half y, half a); + half2 __ovld __cnfn mix(half2 x, half2 y, half2 a); +@@ -10576,14 +10590,14 @@ float3 __ovld __cnfn radians(float3 degrees); + float4 __ovld __cnfn radians(float4 degrees); + float8 __ovld __cnfn radians(float8 degrees); + float16 __ovld __cnfn radians(float16 degrees); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn radians(double degrees); + double2 __ovld __cnfn radians(double2 degrees); + double3 __ovld __cnfn radians(double3 degrees); + double4 __ovld __cnfn radians(double4 degrees); + double8 __ovld __cnfn radians(double8 degrees); + double16 __ovld __cnfn radians(double16 degrees); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn radians(half degrees); + half2 __ovld __cnfn radians(half2 degrees); +@@ -10607,7 +10621,7 @@ float3 __ovld __cnfn step(float edge, float3 x); + float4 __ovld __cnfn step(float edge, float4 x); + float8 __ovld __cnfn step(float edge, float8 x); + float16 __ovld __cnfn step(float edge, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn step(double edge, double x); + double2 __ovld __cnfn step(double2 edge, double2 x); + double3 __ovld __cnfn step(double3 edge, double3 x); +@@ -10619,7 +10633,7 @@ double3 __ovld __cnfn step(double edge, double3 x); + double4 __ovld __cnfn step(double edge, double4 x); + double8 __ovld __cnfn step(double edge, double8 x); + double16 __ovld __cnfn step(double edge, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn step(half edge, half x); + half2 __ovld __cnfn step(half2 edge, half2 x); +@@ -10659,7 +10673,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x); + float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x); + float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x); + float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn smoothstep(double edge0, double edge1, double x); + double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x); + double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x); +@@ -10671,7 +10685,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x); + double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x); + double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x); + double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn smoothstep(half edge0, half edge1, half x); + half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x); +@@ -10697,14 +10711,14 @@ float3 __ovld __cnfn sign(float3 x); + float4 __ovld __cnfn sign(float4 x); + float8 __ovld __cnfn sign(float8 x); + float16 __ovld __cnfn sign(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sign(double x); + double2 __ovld __cnfn sign(double2 x); + double3 __ovld __cnfn sign(double3 x); + double4 __ovld __cnfn sign(double4 x); + double8 __ovld __cnfn sign(double8 x); + double16 __ovld __cnfn sign(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sign(half x); + half2 __ovld __cnfn sign(half2 x); +@@ -10722,10 +10736,10 @@ half16 __ovld __cnfn sign(half16 x); + */ + float4 __ovld __cnfn cross(float4 p0, float4 p1); + float3 __ovld __cnfn cross(float3 p0, float3 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double4 __ovld __cnfn cross(double4 p0, double4 p1); + double3 __ovld __cnfn cross(double3 p0, double3 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half4 __ovld __cnfn cross(half4 p0, half4 p1); + half3 __ovld __cnfn cross(half3 p0, half3 p1); +@@ -10738,12 +10752,12 @@ float __ovld __cnfn dot(float p0, float p1); + float __ovld __cnfn dot(float2 p0, float2 p1); + float __ovld __cnfn dot(float3 p0, float3 p1); + float __ovld __cnfn dot(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn dot(double p0, double p1); + double __ovld __cnfn dot(double2 p0, double2 p1); + double __ovld __cnfn dot(double3 p0, double3 p1); + double __ovld __cnfn dot(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn dot(half p0, half p1); + half __ovld __cnfn dot(half2 p0, half2 p1); +@@ -10759,12 +10773,12 @@ float __ovld __cnfn distance(float p0, float p1); + float __ovld __cnfn distance(float2 p0, float2 p1); + float __ovld __cnfn distance(float3 p0, float3 p1); + float __ovld __cnfn distance(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn distance(double p0, double p1); + double __ovld __cnfn distance(double2 p0, double2 p1); + double __ovld __cnfn distance(double3 p0, double3 p1); + double __ovld __cnfn distance(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn distance(half p0, half p1); + half __ovld __cnfn distance(half2 p0, half2 p1); +@@ -10780,12 +10794,12 @@ float __ovld __cnfn length(float p); + float __ovld __cnfn length(float2 p); + float __ovld __cnfn length(float3 p); + float __ovld __cnfn length(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn length(double p); + double __ovld __cnfn length(double2 p); + double __ovld __cnfn length(double3 p); + double __ovld __cnfn length(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn length(half p); + half __ovld __cnfn length(half2 p); +@@ -10801,12 +10815,12 @@ float __ovld __cnfn normalize(float p); + float2 __ovld __cnfn normalize(float2 p); + float3 __ovld __cnfn normalize(float3 p); + float4 __ovld __cnfn normalize(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn normalize(double p); + double2 __ovld __cnfn normalize(double2 p); + double3 __ovld __cnfn normalize(double3 p); + double4 __ovld __cnfn normalize(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn normalize(half p); + half2 __ovld __cnfn normalize(half2 p); +@@ -10887,14 +10901,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y); + int4 __ovld __cnfn isequal(float4 x, float4 y); + int8 __ovld __cnfn isequal(float8 x, float8 y); + int16 __ovld __cnfn isequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isequal(double x, double y); + long2 __ovld __cnfn isequal(double2 x, double2 y); + long3 __ovld __cnfn isequal(double3 x, double3 y); + long4 __ovld __cnfn isequal(double4 x, double4 y); + long8 __ovld __cnfn isequal(double8 x, double8 y); + long16 __ovld __cnfn isequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isequal(half x, half y); + short2 __ovld __cnfn isequal(half2 x, half2 y); +@@ -10913,14 +10927,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y); + int4 __ovld __cnfn isnotequal(float4 x, float4 y); + int8 __ovld __cnfn isnotequal(float8 x, float8 y); + int16 __ovld __cnfn isnotequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnotequal(double x, double y); + long2 __ovld __cnfn isnotequal(double2 x, double2 y); + long3 __ovld __cnfn isnotequal(double3 x, double3 y); + long4 __ovld __cnfn isnotequal(double4 x, double4 y); + long8 __ovld __cnfn isnotequal(double8 x, double8 y); + long16 __ovld __cnfn isnotequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnotequal(half x, half y); + short2 __ovld __cnfn isnotequal(half2 x, half2 y); +@@ -10939,14 +10953,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y); + int4 __ovld __cnfn isgreater(float4 x, float4 y); + int8 __ovld __cnfn isgreater(float8 x, float8 y); + int16 __ovld __cnfn isgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreater(double x, double y); + long2 __ovld __cnfn isgreater(double2 x, double2 y); + long3 __ovld __cnfn isgreater(double3 x, double3 y); + long4 __ovld __cnfn isgreater(double4 x, double4 y); + long8 __ovld __cnfn isgreater(double8 x, double8 y); + long16 __ovld __cnfn isgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreater(half x, half y); + short2 __ovld __cnfn isgreater(half2 x, half2 y); +@@ -10965,14 +10979,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y); + int4 __ovld __cnfn isgreaterequal(float4 x, float4 y); + int8 __ovld __cnfn isgreaterequal(float8 x, float8 y); + int16 __ovld __cnfn isgreaterequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreaterequal(double x, double y); + long2 __ovld __cnfn isgreaterequal(double2 x, double2 y); + long3 __ovld __cnfn isgreaterequal(double3 x, double3 y); + long4 __ovld __cnfn isgreaterequal(double4 x, double4 y); + long8 __ovld __cnfn isgreaterequal(double8 x, double8 y); + long16 __ovld __cnfn isgreaterequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreaterequal(half x, half y); + short2 __ovld __cnfn isgreaterequal(half2 x, half2 y); +@@ -10991,14 +11005,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y); + int4 __ovld __cnfn isless(float4 x, float4 y); + int8 __ovld __cnfn isless(float8 x, float8 y); + int16 __ovld __cnfn isless(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isless(double x, double y); + long2 __ovld __cnfn isless(double2 x, double2 y); + long3 __ovld __cnfn isless(double3 x, double3 y); + long4 __ovld __cnfn isless(double4 x, double4 y); + long8 __ovld __cnfn isless(double8 x, double8 y); + long16 __ovld __cnfn isless(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isless(half x, half y); + short2 __ovld __cnfn isless(half2 x, half2 y); +@@ -11017,14 +11031,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y); + int4 __ovld __cnfn islessequal(float4 x, float4 y); + int8 __ovld __cnfn islessequal(float8 x, float8 y); + int16 __ovld __cnfn islessequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessequal(double x, double y); + long2 __ovld __cnfn islessequal(double2 x, double2 y); + long3 __ovld __cnfn islessequal(double3 x, double3 y); + long4 __ovld __cnfn islessequal(double4 x, double4 y); + long8 __ovld __cnfn islessequal(double8 x, double8 y); + long16 __ovld __cnfn islessequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessequal(half x, half y); + short2 __ovld __cnfn islessequal(half2 x, half2 y); +@@ -11044,14 +11058,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y); + int4 __ovld __cnfn islessgreater(float4 x, float4 y); + int8 __ovld __cnfn islessgreater(float8 x, float8 y); + int16 __ovld __cnfn islessgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessgreater(double x, double y); + long2 __ovld __cnfn islessgreater(double2 x, double2 y); + long3 __ovld __cnfn islessgreater(double3 x, double3 y); + long4 __ovld __cnfn islessgreater(double4 x, double4 y); + long8 __ovld __cnfn islessgreater(double8 x, double8 y); + long16 __ovld __cnfn islessgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessgreater(half x, half y); + short2 __ovld __cnfn islessgreater(half2 x, half2 y); +@@ -11070,14 +11084,14 @@ int3 __ovld __cnfn isfinite(float3); + int4 __ovld __cnfn isfinite(float4); + int8 __ovld __cnfn isfinite(float8); + int16 __ovld __cnfn isfinite(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isfinite(double); + long2 __ovld __cnfn isfinite(double2); + long3 __ovld __cnfn isfinite(double3); + long4 __ovld __cnfn isfinite(double4); + long8 __ovld __cnfn isfinite(double8); + long16 __ovld __cnfn isfinite(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isfinite(half); + short2 __ovld __cnfn isfinite(half2); +@@ -11096,14 +11110,14 @@ int3 __ovld __cnfn isinf(float3); + int4 __ovld __cnfn isinf(float4); + int8 __ovld __cnfn isinf(float8); + int16 __ovld __cnfn isinf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isinf(double); + long2 __ovld __cnfn isinf(double2); + long3 __ovld __cnfn isinf(double3); + long4 __ovld __cnfn isinf(double4); + long8 __ovld __cnfn isinf(double8); + long16 __ovld __cnfn isinf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isinf(half); + short2 __ovld __cnfn isinf(half2); +@@ -11122,14 +11136,14 @@ int3 __ovld __cnfn isnan(float3); + int4 __ovld __cnfn isnan(float4); + int8 __ovld __cnfn isnan(float8); + int16 __ovld __cnfn isnan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnan(double); + long2 __ovld __cnfn isnan(double2); + long3 __ovld __cnfn isnan(double3); + long4 __ovld __cnfn isnan(double4); + long8 __ovld __cnfn isnan(double8); + long16 __ovld __cnfn isnan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnan(half); + short2 __ovld __cnfn isnan(half2); +@@ -11148,14 +11162,14 @@ int3 __ovld __cnfn isnormal(float3); + int4 __ovld __cnfn isnormal(float4); + int8 __ovld __cnfn isnormal(float8); + int16 __ovld __cnfn isnormal(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnormal(double); + long2 __ovld __cnfn isnormal(double2); + long3 __ovld __cnfn isnormal(double3); + long4 __ovld __cnfn isnormal(double4); + long8 __ovld __cnfn isnormal(double8); + long16 __ovld __cnfn isnormal(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnormal(half); + short2 __ovld __cnfn isnormal(half2); +@@ -11176,14 +11190,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y); + int4 __ovld __cnfn isordered(float4 x, float4 y); + int8 __ovld __cnfn isordered(float8 x, float8 y); + int16 __ovld __cnfn isordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isordered(double x, double y); + long2 __ovld __cnfn isordered(double2 x, double2 y); + long3 __ovld __cnfn isordered(double3 x, double3 y); + long4 __ovld __cnfn isordered(double4 x, double4 y); + long8 __ovld __cnfn isordered(double8 x, double8 y); + long16 __ovld __cnfn isordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isordered(half x, half y); + short2 __ovld __cnfn isordered(half2 x, half2 y); +@@ -11204,14 +11218,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y); + int4 __ovld __cnfn isunordered(float4 x, float4 y); + int8 __ovld __cnfn isunordered(float8 x, float8 y); + int16 __ovld __cnfn isunordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isunordered(double x, double y); + long2 __ovld __cnfn isunordered(double2 x, double2 y); + long3 __ovld __cnfn isunordered(double3 x, double3 y); + long4 __ovld __cnfn isunordered(double4 x, double4 y); + long8 __ovld __cnfn isunordered(double8 x, double8 y); + long16 __ovld __cnfn isunordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isunordered(half x, half y); + short2 __ovld __cnfn isunordered(half2 x, half2 y); +@@ -11234,14 +11248,14 @@ int3 __ovld __cnfn signbit(float3); + int4 __ovld __cnfn signbit(float4); + int8 __ovld __cnfn signbit(float8); + int16 __ovld __cnfn signbit(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn signbit(double); + long2 __ovld __cnfn signbit(double2); + long3 __ovld __cnfn signbit(double3); + long4 __ovld __cnfn signbit(double4); + long8 __ovld __cnfn signbit(double8); + long16 __ovld __cnfn signbit(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn signbit(half); + short2 __ovld __cnfn signbit(half2); +@@ -11368,14 +11382,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c); + float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c); + float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c); + float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn bitselect(double a, double b, double c); + double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c); + double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c); + double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c); + double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c); + double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn bitselect(half a, half b, half c); + half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c); +@@ -11508,7 +11522,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c); + long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c); + ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn select(double a, double b, long c); + double2 __ovld __cnfn select(double2 a, double2 b, long2 c); + double3 __ovld __cnfn select(double3 a, double3 b, long3 c); +@@ -11521,7 +11535,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c); + double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c); + double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c); + double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn select(half a, half b, short c); + half2 __ovld __cnfn select(half2 a, half2 b, short2 c); +@@ -11600,13 +11614,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p); + long16 __ovld vload16(size_t offset, const __constant long *p); + ulong16 __ovld vload16(size_t offset, const __constant ulong *p); + float16 __ovld vload16(size_t offset, const __constant float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __constant double *p); + double3 __ovld vload3(size_t offset, const __constant double *p); + double4 __ovld vload4(size_t offset, const __constant double *p); + double8 __ovld vload8(size_t offset, const __constant double *p); + double16 __ovld vload16(size_t offset, const __constant double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __constant half *p); +@@ -11617,7 +11631,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); + half16 __ovld vload16(size_t offset, const __constant half *p); + #endif //cl_khr_fp16 + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const char *p); + uchar2 __ovld vload2(size_t offset, const uchar *p); + short2 __ovld vload2(size_t offset, const short *p); +@@ -11664,13 +11678,13 @@ long16 __ovld vload16(size_t offset, const long *p); + ulong16 __ovld vload16(size_t offset, const ulong *p); + float16 __ovld vload16(size_t offset, const float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const double *p); + double3 __ovld vload3(size_t offset, const double *p); + double4 __ovld vload4(size_t offset, const double *p); + double8 __ovld vload8(size_t offset, const double *p); + double16 __ovld vload16(size_t offset, const double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const half *p); +@@ -11680,7 +11694,7 @@ half4 __ovld vload4(size_t offset, const half *p); + half8 __ovld vload8(size_t offset, const half *p); + half16 __ovld vload16(size_t offset, const half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const __global char *p); + uchar2 __ovld vload2(size_t offset, const __global uchar *p); + short2 __ovld vload2(size_t offset, const __global short *p); +@@ -11817,7 +11831,7 @@ long16 __ovld vload16(size_t offset, const __private long *p); + ulong16 __ovld vload16(size_t offset, const __private ulong *p); + float16 __ovld vload16(size_t offset, const __private float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __global double *p); + double3 __ovld vload3(size_t offset, const __global double *p); + double4 __ovld vload4(size_t offset, const __global double *p); +@@ -11833,7 +11847,7 @@ double3 __ovld vload3(size_t offset, const __private double *p); + double4 __ovld vload4(size_t offset, const __private double *p); + double8 __ovld vload8(size_t offset, const __private double *p); + double16 __ovld vload16(size_t offset, const __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __global half *p); +@@ -11855,9 +11869,8 @@ half4 __ovld vload4(size_t offset, const __private half *p); + half8 __ovld vload8(size_t offset, const __private half *p); + half16 __ovld vload16(size_t offset, const __private half *p); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, char *p); + void __ovld vstore2(uchar2 data, size_t offset, uchar *p); + void __ovld vstore2(short2 data, size_t offset, short *p); +@@ -11903,13 +11916,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p); + void __ovld vstore16(long16 data, size_t offset, long *p); + void __ovld vstore16(ulong16 data, size_t offset, ulong *p); + void __ovld vstore16(float16 data, size_t offset, float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, double *p); + void __ovld vstore3(double3 data, size_t offset, double *p); + void __ovld vstore4(double4 data, size_t offset, double *p); + void __ovld vstore8(double8 data, size_t offset, double *p); + void __ovld vstore16(double16 data, size_t offset, double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, half *p); + void __ovld vstore2(half2 data, size_t offset, half *p); +@@ -11918,7 +11931,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p); + void __ovld vstore8(half8 data, size_t offset, half *p); + void __ovld vstore16(half16 data, size_t offset, half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, __global char *p); + void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); + void __ovld vstore2(short2 data, size_t offset, __global short *p); +@@ -12054,7 +12067,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p); + void __ovld vstore16(long16 data, size_t offset, __private long *p); + void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p); + void __ovld vstore16(float16 data, size_t offset, __private float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, __global double *p); + void __ovld vstore3(double3 data, size_t offset, __global double *p); + void __ovld vstore4(double4 data, size_t offset, __global double *p); +@@ -12070,7 +12083,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p); + void __ovld vstore4(double4 data, size_t offset, __private double *p); + void __ovld vstore8(double8 data, size_t offset, __private double *p); + void __ovld vstore16(double16 data, size_t offset, __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, __global half *p); + void __ovld vstore2(half2 data, size_t offset, __global half *p); +@@ -12091,8 +12104,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); + void __ovld vstore8(half8 data, size_t offset, __private half *p); + void __ovld vstore16(half16 data, size_t offset, __private half *p); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- + /** + * Read sizeof (half) bytes of data from address + * (p + offset). The data read is interpreted as a +@@ -12102,14 +12113,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); + * must be 16-bit aligned. + */ + float __ovld vload_half(size_t offset, const __constant half *p); +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const __global half *p); + float __ovld vload_half(size_t offset, const __local half *p); + float __ovld vload_half(size_t offset, const __private half *p); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- + /** + * Read sizeof (halfn) bytes of data from address + * (p + (offset * n)). The data read is interpreted +@@ -12123,13 +12132,14 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); + float4 __ovld vload_half4(size_t offset, const __constant half *p); + float8 __ovld vload_half8(size_t offset, const __constant half *p); + float16 __ovld vload_half16(size_t offset, const __constant half *p); +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++ ++#ifdef __opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const half *p); + float3 __ovld vload_half3(size_t offset, const half *p); + float4 __ovld vload_half4(size_t offset, const half *p); + float8 __ovld vload_half8(size_t offset, const half *p); + float16 __ovld vload_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const __global half *p); + float3 __ovld vload_half3(size_t offset, const __global half *p); + float4 __ovld vload_half4(size_t offset, const __global half *p); +@@ -12145,7 +12155,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); + float4 __ovld vload_half4(size_t offset, const __private half *p); + float8 __ovld vload_half8(size_t offset, const __private half *p); + float16 __ovld vload_half16(size_t offset, const __private half *p); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + + /** + * The float value given by data is first +@@ -12158,20 +12167,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, half *p); + void __ovld vstore_half_rte(float data, size_t offset, half *p); + void __ovld vstore_half_rtz(float data, size_t offset, half *p); + void __ovld vstore_half_rtp(float data, size_t offset, half *p); + void __ovld vstore_half_rtn(float data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, half *p); + void __ovld vstore_half_rte(double data, size_t offset, half *p); + void __ovld vstore_half_rtz(double data, size_t offset, half *p); + void __ovld vstore_half_rtp(double data, size_t offset, half *p); + void __ovld vstore_half_rtn(double data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, __global half *p); + void __ovld vstore_half_rte(float data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); +@@ -12187,7 +12196,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(float data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, __global half *p); + void __ovld vstore_half_rte(double data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __global half *p); +@@ -12203,8 +12212,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * The floatn value given by data is converted to +@@ -12217,7 +12225,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, half *p); + void __ovld vstore_half3(float3 data, size_t offset, half *p); + void __ovld vstore_half4(float4 data, size_t offset, half *p); +@@ -12243,7 +12251,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, half *p); + void __ovld vstore_half3(double3 data, size_t offset, half *p); + void __ovld vstore_half4(double4 data, size_t offset, half *p); +@@ -12269,8 +12277,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, __global half *p); + void __ovld vstore_half3(float3 data, size_t offset, __global half *p); + void __ovld vstore_half4(float4 data, size_t offset, __global half *p); +@@ -12346,7 +12354,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, __global half *p); + void __ovld vstore_half3(double3 data, size_t offset, __global half *p); + void __ovld vstore_half4(double4 data, size_t offset, __global half *p); +@@ -12422,8 +12430,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) +@@ -12444,14 +12451,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); + float4 __ovld vloada_half4(size_t offset, const __constant half *p); + float8 __ovld vloada_half8(size_t offset, const __constant half *p); + float16 __ovld vloada_half16(size_t offset, const __constant half *p); +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const half *p); + float2 __ovld vloada_half2(size_t offset, const half *p); + float3 __ovld vloada_half3(size_t offset, const half *p); + float4 __ovld vloada_half4(size_t offset, const half *p); + float8 __ovld vloada_half8(size_t offset, const half *p); + float16 __ovld vloada_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const __global half *p); + float2 __ovld vloada_half2(size_t offset, const __global half *p); + float3 __ovld vloada_half3(size_t offset, const __global half *p); +@@ -12470,8 +12477,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); + float4 __ovld vloada_half4(size_t offset, const __private half *p); + float8 __ovld vloada_half8(size_t offset, const __private half *p); + float16 __ovld vloada_half16(size_t offset, const __private half *p); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- + /** + * The floatn value given by data is converted to + * a halfn value using the appropriate rounding +@@ -12488,7 +12493,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); + * mode. The default current rounding mode is + * round to nearest even. + */ +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + void __ovld vstorea_half(float data, size_t offset, half *p); + void __ovld vstorea_half2(float2 data, size_t offset, half *p); + void __ovld vstorea_half3(float3 data, size_t offset, half *p); +@@ -12524,7 +12529,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, half *p); + void __ovld vstorea_half2(double2 data, size_t offset, half *p); + void __ovld vstorea_half3(double3 data, size_t offset, half *p); +@@ -12559,9 +12564,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + +-#else + void __ovld vstorea_half(float data, size_t offset, __global half *p); + void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); +@@ -12667,7 +12672,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, __global half *p); + void __ovld vstorea_half2(double2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(double3 data, size_t offset, __global half *p); +@@ -12772,8 +12777,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p); + void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions + +@@ -12838,9 +12842,15 @@ void __ovld __conv barrier(cl_mem_fence_flags flags); + typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, ++#ifdef __opencl_c_atomic_scope_device + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, ++#endif ++#ifdef __opencl_c_atomic_scope_all_devices ++ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_DEVICES, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#endif ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP + #endif + } memory_scope; +@@ -12892,7 +12902,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); + + // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#ifdef __opencl_c_generic_address_space + cl_mem_fence_flags __ovld get_fence(const void *ptr); + cl_mem_fence_flags __ovld get_fence(void *ptr); + +@@ -12903,7 +12913,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); + * where gentype is builtin type or user defined type. + */ + +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_generic_address_space + + // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch + +@@ -13042,7 +13052,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 + event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); +@@ -13055,7 +13065,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double + event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); +@@ -13205,7 +13215,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local + event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -13218,7 +13228,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca + event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -13308,14 +13318,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements); + void __ovld prefetch(const __global long16 *p, size_t num_elements); + void __ovld prefetch(const __global ulong16 *p, size_t num_elements); + void __ovld prefetch(const __global float16 *p, size_t num_elements); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld prefetch(const __global double *p, size_t num_elements); + void __ovld prefetch(const __global double2 *p, size_t num_elements); + void __ovld prefetch(const __global double3 *p, size_t num_elements); + void __ovld prefetch(const __global double4 *p, size_t num_elements); + void __ovld prefetch(const __global double8 *p, size_t num_elements); + void __ovld prefetch(const __global double16 *p, size_t num_elements); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld prefetch(const __global half *p, size_t num_elements); + void __ovld prefetch(const __global half2 *p, size_t num_elements); +@@ -13338,9 +13348,15 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements); + * pointed by p. The function returns old. + */ + int __ovld atomic_add(volatile __global int *p, int val); +-unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_add(volatile __local int *p, int val); +-unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_add(volatile int *p, int val); ++unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_base_atomics) + int __ovld atom_add(volatile __global int *p, int val); +@@ -13364,9 +13380,15 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_sub(volatile __global int *p, int val); +-unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_sub(volatile __local int *p, int val); +-unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_sub(volatile int *p, int val); ++unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_base_atomics) + int __ovld atom_sub(volatile __global int *p, int val); +@@ -13390,9 +13412,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v + * value. + */ + int __ovld atomic_xchg(volatile __global int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xchg(volatile __local int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, ++ unsigned int val); + float __ovld atomic_xchg(volatile __global float *p, float val); + float __ovld atomic_xchg(volatile __local float *p, float val); + +@@ -13474,9 +13498,16 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p); + * returns old. + */ + int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, ++ unsigned int cmp, unsigned int val); + int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, ++ unsigned int cmp, unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); ++unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, ++ unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_base_atomics) + int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val); +@@ -13502,9 +13533,15 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo + * returns old. + */ + int __ovld atomic_min(volatile __global int *p, int val); +-unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_min(volatile __local int *p, int val); +-unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_min(volatile int *p, int val); ++unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_extended_atomics) + int __ovld atom_min(volatile __global int *p, int val); +@@ -13530,9 +13567,15 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_max(volatile __global int *p, int val); +-unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_max(volatile __local int *p, int val); +-unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_max(volatile int *p, int val); ++unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_extended_atomics) + int __ovld atom_max(volatile __global int *p, int val); +@@ -13557,9 +13600,15 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_and(volatile __global int *p, int val); +-unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_and(volatile __local int *p, int val); +-unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_and(volatile int *p, int val); ++unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_extended_atomics) + int __ovld atom_and(volatile __global int *p, int val); +@@ -13584,9 +13633,15 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_or(volatile __global int *p, int val); +-unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_or(volatile __local int *p, int val); +-unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_or(volatile int *p, int val); ++unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_extended_atomics) + int __ovld atom_or(volatile __global int *p, int val); +@@ -13611,9 +13666,15 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va + * pointed by p. The function returns old. + */ + int __ovld atomic_xor(volatile __global int *p, int val); +-unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xor(volatile __local int *p, int val); +-unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __local unsigned int *p, ++ unsigned int val); ++#ifdef __OPENCL_CPP_VERSION__ ++int __ovld atomic_xor(volatile int *p, int val); ++unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); ++#endif + + #if defined(cl_khr_global_int32_extended_atomics) + int __ovld atom_xor(volatile __global int *p, int val); +@@ -13661,120 +13722,78 @@ typedef enum memory_order + #endif + + // atomic_init() ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_init(volatile atomic_int *object, int value); + void __ovld atomic_init(volatile atomic_uint *object, uint value); + void __ovld atomic_init(volatile atomic_float *object, float value); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + void __ovld atomic_init(volatile atomic_long *object, long value); + void __ovld atomic_init(volatile atomic_ulong *object, ulong value); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_init(volatile atomic_double *object, double value); +-#endif //cl_khr_fp64 +-#endif ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_init(volatile atomic_int __global *object, int value); ++void __ovld atomic_init(volatile atomic_int __local *object, int value); ++void __ovld atomic_init(volatile atomic_uint __global *object, uint value); ++void __ovld atomic_init(volatile atomic_uint __local *object, uint value); ++void __ovld atomic_init(volatile atomic_float __global *object, float value); ++void __ovld atomic_init(volatile atomic_float __local *object, float value); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++void __ovld atomic_init(volatile atomic_long __global *object, long value); ++void __ovld atomic_init(volatile atomic_long __local *object, long value); ++void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value); ++void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_init(volatile atomic_double __global *object, double value); ++void __ovld atomic_init(volatile atomic_double __local *object, double value); ++#endif // cl_khr_fp64 ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_work_item_fence() +-void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); ++void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, ++ memory_scope scope); + + // atomic_fetch() +- ++#ifdef defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +-uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +-uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +-ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +-ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); + #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + + // OpenCL v2.0 s6.13.11.7.5: +@@ -13782,196 +13801,2236 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera + // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +- +-uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +-uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +- +-intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); +-intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#ifdef(__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_add(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object, ++ uint operand); ++int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __local *object, ++ uint operand); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++long __ovld atomic_fetch_add(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_add(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_or(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_and(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_and(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_min(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_min(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_max(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_max(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object, ++ ulong operand); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order, memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); + #endif ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_store() + ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_store(volatile atomic_int *object, int desired); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_uint *object, uint desired); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_float *object, float desired); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_store(volatile atomic_double *object, double desired); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 +-void __ovld atomic_store(volatile atomic_long *object, long desired); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); ++#endif + void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_store(volatile atomic_int __global *object, int desired); ++void __ovld atomic_store(volatile atomic_int __local *object, int desired); ++void __ovld atomic_store(volatile atomic_uint __global *object, uint desired); ++void __ovld atomic_store(volatile atomic_uint __local *object, uint desired); ++void __ovld atomic_store(volatile atomic_float __global *object, float desired); ++void __ovld atomic_store(volatile atomic_float __local *object, float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_store(volatile atomic_double __global *object, ++ double desired); ++void __ovld atomic_store(volatile atomic_double __local *object, ++ double desired); + #endif ++void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired); ++void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // cl_khr_fp64 ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_load() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_load(volatile atomic_int *object); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); + uint __ovld atomic_load(volatile atomic_uint *object); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); + float __ovld atomic_load(volatile atomic_float *object); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_load(volatile atomic_double *object); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif // cl_khr_fp64 + long __ovld atomic_load(volatile atomic_long *object); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); + ulong __ovld atomic_load(volatile atomic_ulong *object); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); +-#endif ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif //__opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_load(volatile atomic_int __global *object); ++int __ovld atomic_load(volatile atomic_int __local *object); ++uint __ovld atomic_load(volatile atomic_uint __global *object); ++uint __ovld atomic_load(volatile atomic_uint __local *object); ++float __ovld atomic_load(volatile atomic_float __global *object); ++float __ovld atomic_load(volatile atomic_float __local *object); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_load(volatile atomic_double __global *object); ++double __ovld atomic_load(volatile atomic_double __local *object); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++long __ovld atomic_load(volatile atomic_long __global *object); ++long __ovld atomic_load(volatile atomic_long __local *object); ++ulong __ovld atomic_load(volatile atomic_ulong __global *object); ++ulong __ovld atomic_load(volatile atomic_ulong __local *object); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order, memory_scope scope); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order, memory_scope scope); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order, memory_scope scope); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_exchange() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_exchange(volatile atomic_int *object, int desired); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + float __ovld atomic_exchange(volatile atomic_float *object, float desired); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_exchange(volatile atomic_double *object, double desired); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif + long __ovld atomic_exchange(volatile atomic_long *object, long desired); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); + ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_exchange(volatile atomic_int __global *object, int desired); ++int __ovld atomic_exchange(volatile atomic_int __local *object, int desired); ++uint __ovld atomic_exchange(volatile atomic_uint __global *object, ++ uint desired); ++uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired); ++float __ovld atomic_exchange(volatile atomic_float __global *object, ++ float desired); ++float __ovld atomic_exchange(volatile atomic_float __local *object, ++ float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_exchange(volatile atomic_double __global *object, ++ double desired); ++double __ovld atomic_exchange(volatile atomic_double __local *object, ++ double desired); + #endif ++long __ovld atomic_exchange(volatile atomic_long __global *object, ++ long desired); ++long __ovld atomic_exchange(volatile atomic_long __local *object, long desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __global *object, ++ ulong desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __local *object, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() +- +-bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, ++ float *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, ++ float *expected, float desired); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-#endif //cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, ++ double *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, ++ double *expected, double desired); + #endif ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __private *expected, ++ int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __private *expected, ++ int desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __global *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __local *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __private *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __private *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __private *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __private *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __private *expected, ++ float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __global *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __local *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __private *expected, ++ double desired); ++#endif ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __global *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __local *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __private *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __private *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __private *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __private *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __private *expected, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, ++ float *expected, ++ float desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ++ ulong *expected, ++ ulong desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_flag_test_and_set() and atomic_flag_clear() +- ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); + void __ovld atomic_flag_clear(volatile atomic_flag *object); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object); ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __global *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __local *object); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order, ++ memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __global *object, memory_order order); ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __local *object, memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +@@ -14199,7 +16258,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask); + float16 __ovld __cnfn shuffle(float8 x, uint16 mask); + float16 __ovld __cnfn shuffle(float16 x, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle(double2 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double4 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double8 x, ulong2 mask); +@@ -14219,7 +16278,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double4 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double8 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double16 x, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle(half2 x, ushort2 mask); +@@ -14423,7 +16482,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask); +@@ -14443,7 +16502,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask); +@@ -14501,6 +16560,7 @@ int printf(__constant const char* st, ...); + #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable + #endif //cl_khr_gl_msaa_sharing + ++#ifdef __opencl_c_images + /** + * Use the coordinate (coord.xy) to do an element lookup in + * the 2D image object specified by image. +@@ -14802,7 +16862,8 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); + #endif //cl_khr_fp16 + + // Image read functions for read_write images +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); + int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); + uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); +@@ -14845,7 +16906,8 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co + float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + #ifdef cl_khr_mipmap_image + float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); + int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); +@@ -14919,7 +16981,8 @@ float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler + int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); + uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); + #endif //cl_khr_mipmap_image +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Image read functions returning half4 type + #ifdef cl_khr_fp16 +@@ -14930,7 +16993,8 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); + half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); + half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Write color value to location specified by coordinate +@@ -15019,7 +17083,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); +@@ -15052,7 +17116,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); + void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); +@@ -15065,7 +17129,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); + void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); +@@ -15074,7 +17138,8 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col + #endif //cl_khr_fp16 + + // Image write functions for read_write images +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); + void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); + void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); +@@ -15095,7 +17160,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color); +@@ -15127,7 +17192,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color); + void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); +@@ -15140,14 +17205,15 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(read_write image1d_t image, int coord, half4 color); + void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color); + void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); + void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); + #endif //cl_khr_fp16 +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have + // access qualifier, which by default assume read_only access qualifier. Image query builtin +@@ -15160,7 +17226,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col + int __ovld __cnfn get_image_width(read_only image1d_t image); + int __ovld __cnfn get_image_width(read_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(read_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(read_only image1d_array_t image); +@@ -15179,7 +17245,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); + int __ovld __cnfn get_image_width(write_only image1d_t image); + int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(write_only image1d_array_t image); +@@ -15195,7 +17261,8 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image); + int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_width(read_write image1d_t image); + int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_write image2d_t image); +@@ -15212,7 +17279,8 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the image height in pixels. +@@ -15232,7 +17300,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + int __ovld __cnfn get_image_height(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_height(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_height(write_only image2d_array_t image); +@@ -15247,7 +17315,8 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image); + int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_height(read_write image2d_t image); + int __ovld __cnfn get_image_height(read_write image3d_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_t image); +@@ -15261,20 +17330,23 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the image depth in pixels. + */ + int __ovld __cnfn get_image_depth(read_only image3d_t image); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_depth(write_only image3d_t image); + #endif + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_depth(read_write image3d_t image); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.18 - Mipmaps + #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +@@ -15289,13 +17361,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); + + int __ovld get_image_num_mip_levels(write_only image1d_t image); + int __ovld get_image_num_mip_levels(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld get_image_num_mip_levels(write_only image3d_t image); + #endif + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_t image); + int __ovld get_image_num_mip_levels(read_write image2d_t image); + int __ovld get_image_num_mip_levels(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + + int __ovld get_image_num_mip_levels(read_only image1d_array_t image); + int __ovld get_image_num_mip_levels(read_only image2d_array_t image); +@@ -15307,10 +17381,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); + int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); ++#endif //__opencl_c_read_write_images + + #endif //cl_khr_mipmap_image + #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +@@ -15374,7 +17450,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth + int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); +@@ -15390,7 +17466,8 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im + int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); +@@ -15407,7 +17484,8 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the image channel order. Valid values are: +@@ -15470,7 +17548,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i + int __ovld __cnfn get_image_channel_order(write_only image1d_t image); + int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_order(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); +@@ -15486,7 +17564,8 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image) + int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_order(read_write image1d_t image); + int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_t image); +@@ -15503,7 +17582,8 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the 2D image width and height as an int2 +@@ -15536,7 +17616,8 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image); + int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int2 __ovld __cnfn get_image_dim(read_write image2d_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); + #ifdef cl_khr_depth_images +@@ -15549,7 +17630,8 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the 3D image width, height, and depth as an +@@ -15558,12 +17640,14 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + * component and the w component is 0. + */ + int4 __ovld __cnfn get_image_dim(read_only image3d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int4 __ovld __cnfn get_image_dim(write_only image3d_t image); + #endif +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int4 __ovld __cnfn get_image_dim(read_write image3d_t image); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the image array size. +@@ -15589,7 +17673,8 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_ + size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array); + #endif //cl_khr_gl_msaa_sharing + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); + #ifdef cl_khr_depth_images +@@ -15599,7 +17684,8 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); + #endif //cl_khr_gl_msaa_sharing +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Return the number of samples associated with image +@@ -15617,18 +17703,23 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); + int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); + int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_samples(read_write image2d_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_read_write_images ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + #endif + ++#endif //__opencl_c_images ++ + // OpenCL v2.0 s6.13.15 - Work-group Functions + +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_work_group_collective_functions + int __ovld __conv work_group_all(int predicate); + int __ovld __conv work_group_any(int predicate); + +@@ -15652,11 +17743,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); + float __ovld __conv work_group_broadcast(float a, size_t local_id); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_broadcast(double a, size_t local_id); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld __conv work_group_reduce_add(half x); +@@ -15714,7 +17805,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x); + float __ovld __conv work_group_scan_inclusive_add(float x); + float __ovld __conv work_group_scan_inclusive_min(float x); + float __ovld __conv work_group_scan_inclusive_max(float x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_reduce_add(double x); + double __ovld __conv work_group_reduce_min(double x); + double __ovld __conv work_group_reduce_max(double x); +@@ -15724,19 +17815,12 @@ double __ovld __conv work_group_scan_exclusive_max(double x); + double __ovld __conv work_group_scan_inclusive_add(double x); + double __ovld __conv work_group_scan_inclusive_min(double x); + double __ovld __conv work_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 +- +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- +-// OpenCL v2.0 s6.13.16 - Pipe Functions +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +-#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) +-bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +- ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_work_group_collective_functions ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + ++#ifdef __opencl_c_device_enqueue + // OpenCL v2.0 s6.13.17 - Enqueue Kernels +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 + + #define CL_COMPLETE 0x0 + #define CL_RUNNING 0x1 +@@ -15775,7 +17859,17 @@ typedef struct { + size_t globalWorkSize[MAX_WORK_DIM]; + size_t localWorkSize[MAX_WORK_DIM]; + } ndrange_t; ++#endif ++ ++// OpenCL v2.0 s6.13.16 - Pipe Functions ++#ifdef __opencl_c_pipes ++#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) ++bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); ++#endif //__opencl_c_pipes + ++// OpenCL v2.0 s6.13.17 - Enqueue Kernels ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_device_enqueue + ndrange_t __ovld ndrange_1D(size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); +@@ -15803,11 +17897,13 @@ bool __ovld is_valid_event (clk_event_t event); + void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); + + queue_t __ovld get_default_queue(void); +-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#endif //__opencl_c_device_enqueue ++#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.17 - Sub-groups + +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + // Shared Sub Group Functions + uint __ovld get_sub_group_size(void); + uint __ovld get_max_sub_group_size(void); +@@ -15893,7 +17989,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x); + half __ovld __conv sub_group_scan_inclusive_max(half x); + #endif //cl_khr_fp16 + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); + double __ovld __conv sub_group_reduce_add(double x); + double __ovld __conv sub_group_reduce_min(double x); +@@ -15904,7 +18000,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x); + double __ovld __conv sub_group_scan_inclusive_add(double x); + double __ovld __conv sub_group_scan_inclusive_min(double x); + double __ovld __conv sub_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif //cl_khr_subgroups cl_intel_subgroups + +@@ -16006,34 +18102,46 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c ); + long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c ); + ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord ); + uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord ); + uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); + uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); + uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); + uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); + uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); + uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); + uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // __opencl_c_read_write_images ++#endif // __opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); + void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data ); +@@ -16047,7 +18155,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c ); + half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c ); + #endif + +-#if defined(cl_khr_fp64) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv intel_sub_group_shuffle( double x, uint c ); + double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c ); + double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c ); +@@ -16146,68 +18254,92 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); + short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); + ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); + ++#ifdef __opencl_c_images + ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord ); + ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord ); + ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); + ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); + ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); + ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); + ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p ); + ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); ++#endif //__opencl_c_images + +-#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); +-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); + void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data ); +@@ -16457,6 +18589,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( + short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, + ushort2 image_size); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ime_result_t __ovld + intel_sub_group_avc_ime_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -16497,6 +18630,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( + read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, + intel_sub_group_avc_ime_payload_t payload, + intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); ++#endif //__opencl_c_images + + intel_sub_group_avc_ime_single_reference_streamin_t __ovld + intel_sub_group_avc_ime_get_single_reference_streamin( +@@ -16561,6 +18695,7 @@ intel_sub_group_avc_ref_payload_t __ovld + intel_sub_group_avc_ref_set_bilinear_filter_enable( + intel_sub_group_avc_ref_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ref_result_t __ovld + intel_sub_group_avc_ref_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -16579,6 +18714,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_ref_payload_t payload); ++#endif //__opencl_c_images + + // SIC built-in functions + intel_sub_group_avc_sic_payload_t __ovld +@@ -16629,6 +18765,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad( + uchar block_based_skip_type, + intel_sub_group_avc_sic_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_sic_result_t __ovld + intel_sub_group_avc_sic_evaluate_ipe( + read_only image2d_t src_image, sampler_t vme_media_sampler, +@@ -16651,6 +18788,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_sic_payload_t payload); ++#endif //__opencl_c_images + + uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( + intel_sub_group_avc_sic_result_t result); +diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp +index 298a2bad56..1ccd37bbe3 100644 +--- a/lib/Parse/ParseDecl.cpp ++++ b/lib/Parse/ParseDecl.cpp +@@ -3675,7 +3675,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); + break; + case tok::kw_pipe: +- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200)) { ++ if (!getLangOpts().OpenCLPipeKeyword) { + // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should + // support the "pipe" word as identifier. + Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); +@@ -3807,8 +3807,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + case tok::kw___generic: + // generic address space is introduced only in OpenCL v2.0 + // see OpenCL C Spec v2.0 s6.5.5 +- if (Actions.getLangOpts().OpenCLVersion < 200 && +- !Actions.getLangOpts().OpenCLCPlusPlus) { ++ if (!Actions.getLangOpts().OpenCLGenericKeyword) { + DiagID = diag::err_opencl_unknown_type_specifier; + PrevSpec = Tok.getIdentifierInfo()->getNameStart(); + isInvalid = true; +@@ -4801,7 +4800,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { + default: return false; + + case tok::kw_pipe: +- return getLangOpts().OpenCL && (getLangOpts().OpenCLVersion >= 200); ++ return getLangOpts().OpenCLPipeKeyword; + + case tok::identifier: // foo::bar + // Unfortunate hack to support "Class.factoryMethod" notation. +@@ -5282,7 +5281,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, + if (Kind == tok::star || Kind == tok::caret) + return true; + +- if ((Kind == tok::kw_pipe) && Lang.OpenCL && (Lang.OpenCLVersion >= 200)) ++ if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword) + return true; + + if (!Lang.CPlusPlus) +diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp +index 7e9b1011e8..759ff59889 100644 +--- a/lib/Parse/ParsePragma.cpp ++++ b/lib/Parse/ParsePragma.cpp +@@ -693,12 +693,14 @@ void Parser::HandlePragmaOpenCLExtension() { + if (Name == "all") { + if (State == Disable) { + Opt.disableAll(); +- Opt.enableSupportedCore(getLangOpts()); ++ Opt.enableSupportedCore(); + } else { + PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; + } ++ } else if (Opt.isFeature(Name)) { ++ PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident; + } else if (State == Begin) { +- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { ++ if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) { + Opt.support(Name); + } + Actions.setCurrentOpenCLExtension(Name); +@@ -708,9 +710,9 @@ void Parser::HandlePragmaOpenCLExtension() { + Actions.setCurrentOpenCLExtension(""); + } else if (!Opt.isKnown(Name)) + PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; +- else if (Opt.isSupportedExtension(Name, getLangOpts())) ++ else if (Opt.isSupportedExtension(Name)) + Opt.enable(Name, State == Enable); +- else if (Opt.isSupportedCore(Name, getLangOpts())) ++ else if (Opt.isSupportedCore(Name)) + PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; + else + PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; +diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp +index 9d33ec5190..90ec1142bd 100644 +--- a/lib/Sema/Sema.cpp ++++ b/lib/Sema/Sema.cpp +@@ -256,9 +256,10 @@ void Sema::Initialize() { + // Initialize predefined OpenCL types and supported extensions and (optional) + // core features. + if (getLangOpts().OpenCL) { ++ getOpenCLOptions().setOpenCLVersion(getLangOpts()); + getOpenCLOptions().addSupport( + Context.getTargetInfo().getSupportedOpenCLOpts()); +- getOpenCLOptions().enableSupportedCore(getLangOpts()); ++ getOpenCLOptions().enableSupportedCore(); + addImplicitTypedef("sampler_t", Context.OCLSamplerTy); + addImplicitTypedef("event_t", Context.OCLEventTy); + if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { +@@ -314,12 +315,18 @@ void Sema::Initialize() { + "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); + + setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); ++ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); ++ setOpenCLExtensionForType(Context.OCLClkEventTy, ++ "__opencl_c_device_enqueue"); ++ setOpenCLExtensionForType(Context.OCLQueueTy, ++ "__opencl_c_device_enqueue"); + } + + setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); + +-#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ +- setOpenCLExtensionForType(Context.Id, Ext); ++#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ ++ setOpenCLExtensionForType(Context.Id, Ext); \ ++ setOpenCLExtensionForType(Context.Id, "__opencl_c_images"); + #include "clang/Basic/OpenCLImageTypes.def" + #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + addImplicitTypedef(#ExtType, Context.Id##Ty); \ +@@ -1960,6 +1967,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { + return false; + } + ++template ++void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, ++ SourceRange SrcRange) { ++ const auto &LO = getLangOpts(); ++ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ // For versions higher that 3.0 diagnosing feature ++ if (CLVer >= 300) { ++ OpenCLOptName = ++ llvm::StringSwitch(OpenCLOptName) ++ .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes") ++ .Case("cl_khr_subgroups", "__opencl_c_subgroups") ++ .Case("cl_khr_fp64", "__opencl_c_fp64") ++ .Default(OpenCLOptName); ++ } ++ ++ Diag(DiagLoc, diag::err_opencl_requires_extension) ++ << Selector << DiagInfo << OpenCLOptName << SrcRange; ++} ++ + template + bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + DiagInfoT DiagInfo, MapT &Map, +@@ -1971,8 +1999,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + bool Disabled = false; + for (auto &I : Loc->second) { + if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { +- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo +- << I << SrcRange; ++ DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange); + Disabled = true; + } + } +@@ -2008,3 +2035,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) { + return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName, + OpenCLDeclExtMap, 1, D.getSourceRange()); + } ++ ++bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) { ++ if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) { ++ DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(), ++ Call->getDirectCallee(), 1, ++ Call->getSourceRange()); ++ return true; ++ } ++ return false; ++} +diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp +index b2c727b5c4..0f0d7ac79c 100644 +--- a/lib/Sema/SemaChecking.cpp ++++ b/lib/Sema/SemaChecking.cpp +@@ -401,20 +401,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { + return IllegalParams; + } + +-static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { +- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { +- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) +- << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; +- return true; +- } +- return false; +-} +- + static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 2)) + return true; + +- if (checkOpenCLSubgroupExt(S, TheCall)) ++ if (S.checkOpenCLSubgroupExtForCallExpr(TheCall)) + return true; + + // First argument is an ndrange_t type. +@@ -1383,7 +1374,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinReserveRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -1396,7 +1387,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinCommitRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -4030,6 +4021,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S, + } + } + ++bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const { ++ assert(llvm::isValidAtomicOrderingCABI(Ordering)); ++ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; ++ switch (OrderingCABI) { ++ case llvm::AtomicOrderingCABI::acquire: ++ case llvm::AtomicOrderingCABI::release: ++ case llvm::AtomicOrderingCABI::acq_rel: ++ return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel"); ++ ++ default: ++ return true; ++ } ++} ++ + /// Determine whether the given type has a non-null nullability annotation. + static bool isNonNullType(ASTContext &ctx, QualType type) { + if (auto nullability = type->getNullability(ctx)) +@@ -4741,10 +4746,17 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, + if (SubExprs.size() >= 2 && Form != Init) { + llvm::APSInt Result(32); + if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && +- !isValidOrderingForOp(Result.getSExtValue(), Op)) ++ !isValidOrderingForOp(Result.getSExtValue(), Op)) { + Diag(SubExprs[1]->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << SubExprs[1]->getSourceRange(); ++ } else if (IsOpenCL && ++ !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) { ++ Diag(SubExprs[1]->getBeginLoc(), ++ diag::err_opencl_memory_ordering_require_feat) ++ << SubExprs[1]->getSourceRange(); ++ return ExprError(); ++ } + } + + if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { +diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp +index 7ddf2e88b7..8105b799e0 100644 +--- a/lib/Sema/SemaDecl.cpp ++++ b/lib/Sema/SemaDecl.cpp +@@ -7355,15 +7355,16 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { + // OpenCL C++ v1.0 s2.5 inherits rule from OpenCL C v2.0 and allows local + // address space additionally. + // FIXME: Add local AS for OpenCL C++. +- if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || ++ if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || + NewVD->hasExternalStorage()) { + if (!T->isSamplerT() && + !(T.getAddressSpace() == LangAS::opencl_constant || + (T.getAddressSpace() == LangAS::opencl_global && +- (getLangOpts().OpenCLVersion == 200 || +- getLangOpts().OpenCLCPlusPlus)))) { ++ (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables"))))) { + int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; +- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) ++ if (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables")) + Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) + << Scope << "global or constant"; + else +diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp +index c4c3598ee7..646aea2bd5 100644 +--- a/lib/Sema/SemaDeclAttr.cpp ++++ b/lib/Sema/SemaDeclAttr.cpp +@@ -6194,6 +6194,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + << AL << PDecl->getType() << DeclTy->isImageType(); + D->setInvalidDecl(true); + return; ++ } else if ((!S.getLangOpts().OpenCLCPlusPlus && ++ S.getLangOpts().OpenCLVersion >= 200) && ++ !S.getOpenCLOptions().isEnabled( ++ "__opencl_c_read_write_images")) { ++ S.Diag(AL.getLoc(), diag::err_opencl_requires_extension) ++ << 0 << PDecl->getType() << "__opencl_c_read_write_images"; ++ return; + } + } + } +diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp +index 950a63aa45..85e09cd195 100644 +--- a/lib/Sema/SemaDeclCXX.cpp ++++ b/lib/Sema/SemaDeclCXX.cpp +@@ -13173,6 +13173,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + if (auto *PtrTy = ResultType->getAs()) { + ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedResultType->getAs()) { ++ ExpectedResultType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + + // Check that the result type is what we expect. +@@ -13206,6 +13211,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + FnDecl->getParamDecl(0)->getType()->getAs()) { + FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedFirstParamType->getAs()) { ++ ExpectedFirstParamType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() != + ExpectedFirstParamType) +diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp +index 1ae94c8aec..87f22619ca 100644 +--- a/lib/Sema/SemaType.cpp ++++ b/lib/Sema/SemaType.cpp +@@ -1924,7 +1924,7 @@ static bool checkQualifiedFunction(Sema &S, QualType T, SourceLocation Loc, + return true; + } + +-/// Build a pointer type. ++// Build a pointer type. + /// + /// \param T The type to which we'll be building a pointer. + /// +@@ -4907,9 +4907,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, + } + + case DeclaratorChunk::Pipe: { +- T = S.BuildReadPipeType(T, DeclType.Loc); +- processTypeAttrs(state, T, TAL_DeclSpec, +- D.getMutableDeclSpec().getAttributes()); ++ if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) { ++ T = S.BuildReadPipeType(T, DeclType.Loc); ++ processTypeAttrs(state, T, TAL_DeclSpec, ++ D.getMutableDeclSpec().getAttributes()); ++ } else { ++ D.setInvalidType(true); ++ T = Context.IntTy; ++ S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat); ++ } + break; + } + } +@@ -7273,16 +7279,18 @@ static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State, + // (...) + // Pointers that are declared without pointing to a named address space + // point to the generic address space. +- if (IsPointee) { ++ if (IsPointee && State.getSema().getOpenCLOptions().isEnabled("__opencl_c_generic_address_space")) { + ImpAddr = LangAS::opencl_generic; + } else { + if (D.getContext() == DeclaratorContext::TemplateArgContext) { + // Do not deduce address space for non-pointee type in template arg. +- } else if (D.getContext() == DeclaratorContext::FileContext) { ++ } else if (D.getContext() == DeclaratorContext::FileContext && ++ State.getSema().getOpenCLOptions().isEnabled("__opencl_c_program_scope_global_variables")) { + ImpAddr = LangAS::opencl_global; + } else { +- if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || +- D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern) { ++ if ((D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || ++ D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern) && ++ State.getSema().getOpenCLOptions().isEnabled("__opencl_c_program_scope_global_variables")) { + ImpAddr = LangAS::opencl_global; + } else { + ImpAddr = LangAS::opencl_private; +diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl +index 6f923b7fd4..a1ae110375 100644 +--- a/test/CodeGenOpenCL/addr-space-struct-arg.cl ++++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl +@@ -2,7 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s +- ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s + typedef int int2 __attribute__((ext_vector_type(2))); + + typedef struct { +@@ -39,7 +40,7 @@ struct LargeStructTwoMember { + int2 y[20]; + }; + +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + struct LargeStructOneMember g_s; + #endif + +@@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { + // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* + // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) + // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]]) +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + void test_indirect_arg_globl(void) { + FuncOneLargeMember(g_s); + } +diff --git a/test/CodeGenOpenCL/address-spaces-conversions.cl b/test/CodeGenOpenCL/address-spaces-conversions.cl +index c947db41e0..94ab838a1b 100644 +--- a/test/CodeGenOpenCL/address-spaces-conversions.cl ++++ b/test/CodeGenOpenCL/address-spaces-conversions.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s + // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64. + + // test that we generate address space casts everywhere we need conversions of +diff --git a/test/CodeGenOpenCL/address-spaces-mangling.cl b/test/CodeGenOpenCL/address-spaces-mangling.cl +index b6e6b87d9e..f1018ecf42 100644 +--- a/test/CodeGenOpenCL/address-spaces-mangling.cl ++++ b/test/CodeGenOpenCL/address-spaces-mangling.cl +@@ -6,6 +6,7 @@ + // We check that the address spaces are mangled the same in both version of OpenCL + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s + + // We can't name this f as private is equivalent to default + // no specifier given address space so we get multiple definition +@@ -47,7 +48,7 @@ void f(constant int *arg) { } + // OCL-20-DAG: @_Z1fPU3AS2i + // OCL-12-DAG: @_Z1fPU3AS2i + +-#if __OPENCL_C_VERSION__ >= 200 ++#if __OPENCL_C_VERSION__ == 200 + __attribute__((overloadable)) + void f(generic int *arg) { } + // ASMANG20: @_Z1fPU3AS4i +diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl +index 3c8fea2a80..26a741338b 100644 +--- a/test/CodeGenOpenCL/address-spaces.cl ++++ b/test/CodeGenOpenCL/address-spaces.cl +@@ -1,9 +1,13 @@ + // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR + // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN + // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + + // SPIR: %struct.S = type { i32, i32, i32* } + // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* } +diff --git a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +index 59f38f80dc..22330a3f07 100644 +--- a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl ++++ b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s ++// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s + // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s + + // CL12-LABEL: define void @func1(i32 addrspace(5)* %x) +diff --git a/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +index a5d438933f..059260d061 100644 +--- a/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl ++++ b/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +@@ -4,6 +4,17 @@ + // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s + + #ifdef __AMDGCN__ + #define PTSIZE 8 +@@ -11,7 +22,7 @@ + #define PTSIZE 4 + #endif + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif + #ifdef cl_khr_fp16 +@@ -59,8 +70,12 @@ void test() { + check(__alignof__(double) == 8); + #endif + +- check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); +- check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); ++ check(sizeof(private void*) == 4); ++ check(__alignof__(private void*) == 4); ++#ifdef __opencl_c_generic_address_space ++ check(sizeof(generic void*) == 8); ++ check(__alignof__(generic void*) == 8); ++#endif + check(sizeof(global_ptr_t) == PTSIZE); + check(__alignof__(global_ptr_t) == PTSIZE); + check(sizeof(constant_ptr_t) == PTSIZE); +diff --git a/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/test/CodeGenOpenCL/cl-uniform-wg-size.cl +index 76ace5dca2..5dc43e222f 100644 +--- a/test/CodeGenOpenCL/cl-uniform-wg-size.cl ++++ b/test/CodeGenOpenCL/cl-uniform-wg-size.cl +@@ -1,6 +1,8 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + + kernel void ker() {}; + // CHECK: define{{.*}}@ker() #0 +diff --git a/test/CodeGenOpenCL/fpmath.cl b/test/CodeGenOpenCL/fpmath.cl +index 0108d909c9..b28392739c 100644 +--- a/test/CodeGenOpenCL/fpmath.cl ++++ b/test/CodeGenOpenCL/fpmath.cl +@@ -2,6 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s + + typedef __attribute__(( ext_vector_type(4) )) float float4; + +diff --git a/test/CodeGenOpenCL/generic-address-space-feature.cl b/test/CodeGenOpenCL/generic-address-space-feature.cl +new file mode 100644 +index 0000000000..3be428a5f9 +--- /dev/null ++++ b/test/CodeGenOpenCL/generic-address-space-feature.cl +@@ -0,0 +1,26 @@ ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC ++ ++void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) { ++ // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src1); ++ // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src2); ++ ++ // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CHECK: addrspacecast ++ tmp = sincos(tmp, src4); ++} ++ ++ +diff --git a/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +index 515f13f6e7..5aa31ac6f3 100644 +--- a/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl ++++ b/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s + + // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque + // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque +diff --git a/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +index 5bb52e9beb..3caa8b7a50 100644 +--- a/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl ++++ b/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +@@ -1,5 +1,8 @@ + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++ + // Test that the kernels always use the SPIR calling convention + // to have unambiguous mapping of arguments to feasibly implement + // clSetKernelArg(). +diff --git a/test/CodeGenOpenCL/logical-ops.cl b/test/CodeGenOpenCL/logical-ops.cl +index ac1c1b5454..d830716df4 100644 +--- a/test/CodeGenOpenCL/logical-ops.cl ++++ b/test/CodeGenOpenCL/logical-ops.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + +diff --git a/test/CodeGenOpenCL/no-half.cl b/test/CodeGenOpenCL/no-half.cl +index aee8f678f0..46da7fa339 100644 +--- a/test/CodeGenOpenCL/no-half.cl ++++ b/test/CodeGenOpenCL/no-half.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64:enable + +diff --git a/test/CodeGenOpenCL/pipe_builtin.cl b/test/CodeGenOpenCL/pipe_builtin.cl +index 2a533c54c1..a66f8c2c79 100644 +--- a/test/CodeGenOpenCL/pipe_builtin.cl ++++ b/test/CodeGenOpenCL/pipe_builtin.cl +@@ -1,4 +1,9 @@ + // RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s ++ ++// FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple ++// above to support OpenCL in the MS C++ ABI. + + // CHECK-DAG: %opencl.pipe_ro_t = type opaque + // CHECK-DAG: %opencl.pipe_wo_t = type opaque +diff --git a/test/CodeGenOpenCL/pipe_types.cl b/test/CodeGenOpenCL/pipe_types.cl +index ba064c6d75..b7a523d4f0 100644 +--- a/test/CodeGenOpenCL/pipe_types.cl ++++ b/test/CodeGenOpenCL/pipe_types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s + + // CHECK: %opencl.pipe_ro_t = type opaque + // CHECK: %opencl.pipe_wo_t = type opaque +diff --git a/test/CodeGenOpenCL/printf.cl b/test/CodeGenOpenCL/printf.cl +index fc139d776d..0133c5595d 100644 +--- a/test/CodeGenOpenCL/printf.cl ++++ b/test/CodeGenOpenCL/printf.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s + + typedef __attribute__((ext_vector_type(2))) float float2; + typedef __attribute__((ext_vector_type(2))) half half2; +diff --git a/test/CodeGenOpenCL/unroll-hint.cl b/test/CodeGenOpenCL/unroll-hint.cl +index 6a9ba87a5e..f5e9da70cd 100644 +--- a/test/CodeGenOpenCL/unroll-hint.cl ++++ b/test/CodeGenOpenCL/unroll-hint.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s + + /*** for ***/ + void for_count() +diff --git a/test/Driver/autocomplete.c b/test/Driver/autocomplete.c +index f8271770d0..c2ce49255d 100644 +--- a/test/Driver/autocomplete.c ++++ b/test/Driver/autocomplete.c +@@ -34,7 +34,6 @@ + // RUN: %clang --autocomplete=-cl-std=,CL2 | FileCheck %s -check-prefix=CLSTD + // CLSTD: CL2.0 + // RUN: %clang --autocomplete=-cl-std= | FileCheck %s -check-prefix=CLSTDALL +-// CLSTDALL: c++ + // CLSTDALL-NEXT: cl + // CLSTDALL-NEXT: CL + // CLSTDALL-NEXT: cl1.1 +@@ -43,6 +42,8 @@ + // CLSTDALL-NEXT: CL1.2 + // CLSTDALL-NEXT: cl2.0 + // CLSTDALL-NEXT: CL2.0 ++// CLSTDALL-NEXT: cl3.0 ++// CLSTDALL-NEXT: CL3.0 + // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER + // FNOSANICOVER: func + // RUN: %clang --autocomplete=-fno-sanitize-coverage= | FileCheck %s -check-prefix=FNOSANICOVERALL +diff --git a/test/Driver/opencl.cl b/test/Driver/opencl.cl +index baff86fb90..46dba0016b 100644 +--- a/test/Driver/opencl.cl ++++ b/test/Driver/opencl.cl +@@ -3,6 +3,7 @@ + // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s + // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s + // RUN: %clang -S -### -cl-std=c++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s ++// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s + // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s + // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s + // RUN: %clang -S -### -cl-single-precision-constant %s 2>&1 | FileCheck --check-prefix=CHECK-SINGLE-PRECISION-CONST %s +@@ -23,6 +24,7 @@ + // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2" + // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0" + // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=c++" ++// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0" + // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable" + // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing" + // CHECK-SINGLE-PRECISION-CONST: "-cc1" {{.*}} "-cl-single-precision-constant" +diff --git a/test/Driver/unknown-std.cl b/test/Driver/unknown-std.cl +index 90ee97b77f..289ff776ae 100644 +--- a/test/Driver/unknown-std.cl ++++ b/test/Driver/unknown-std.cl +@@ -11,6 +11,7 @@ + // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard + // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard + // CHECK-NEXT: note: use 'c++' for 'OpenCL C++ 1.0' standard ++// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard + + // Make sure that no other output is present. + // CHECK-NOT: {{^.+$}} +diff --git a/test/Frontend/stdlang.c b/test/Frontend/stdlang.c +index 2b24c2dfea..ff2b89e79f 100644 +--- a/test/Frontend/stdlang.c ++++ b/test/Frontend/stdlang.c +@@ -9,6 +9,7 @@ + // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s ++// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s + // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s + // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s + // CHECK-C99: error: invalid argument '-std=c99' not allowed with 'OpenCL' +diff --git a/test/Headers/opencl-c-header.cl b/test/Headers/opencl-c-header.cl +index b26e61bf1a..083be6723b 100644 +--- a/test/Headers/opencl-c-header.cl ++++ b/test/Headers/opencl-c-header.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s +-// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1| FileCheck %s +-// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2| FileCheck %s ++// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s ++// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s ++// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s + + // Test including the default header as a module. + // The module should be compiled only once and loaded from cache afterwards. +@@ -38,9 +39,11 @@ + // RUN: rm -rf %t + // RUN: mkdir -p %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u-w %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u+w %t + +@@ -67,7 +70,7 @@ char f(char x) { + // from OpenCL 2.0 onwards. + + // CHECK20: _Z12write_imagef14ocl_image3d_wo +-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) + void test_image3dwo(write_only image3d_t img) { + write_imagef(img, (0), (0.0f)); + } +diff --git a/test/Index/pipe-size.cl b/test/Index/pipe-size.cl +index 94a1255f0a..59b76051ed 100644 +--- a/test/Index/pipe-size.cl ++++ b/test/Index/pipe-size.cl +@@ -2,6 +2,13 @@ + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++ ++ ++ + __kernel void testPipe( pipe int test ) + { + int s = sizeof(test); +diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c +index 770e52cc78..6e033bf103 100644 +--- a/test/Preprocessor/init.c ++++ b/test/Preprocessor/init.c +@@ -9301,6 +9301,7 @@ + // WEBASSEMBLY64-NEXT:#define __LP64__ 1 + // WEBASSEMBLY-NEXT:#define __NO_INLINE__ 1 + // WEBASSEMBLY-NEXT:#define __OBJC_BOOL_IS_BOOL 0 ++// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_DEVICES 5 + // WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 + // WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 + // WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +diff --git a/test/Preprocessor/predefined-macros.c b/test/Preprocessor/predefined-macros.c +index 9296b1cf5a..89572950ee 100644 +--- a/test/Preprocessor/predefined-macros.c ++++ b/test/Preprocessor/predefined-macros.c +@@ -129,6 +129,8 @@ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20 ++// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \ ++// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=c++ \ +@@ -137,26 +139,37 @@ + // CHECK-CL10: #define CL_VERSION_1_1 110 + // CHECK-CL10: #define CL_VERSION_1_2 120 + // CHECK-CL10: #define CL_VERSION_2_0 200 ++// CHECK-CL10: #define CL_VERSION_3_0 300 + // CHECK-CL10: #define __OPENCL_C_VERSION__ 100 + // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL11: #define CL_VERSION_1_0 100 + // CHECK-CL11: #define CL_VERSION_1_1 110 + // CHECK-CL11: #define CL_VERSION_1_2 120 + // CHECK-CL11: #define CL_VERSION_2_0 200 ++// CHECK-CL11: #define CL_VERSION_3_0 300 + // CHECK-CL11: #define __OPENCL_C_VERSION__ 110 + // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL12: #define CL_VERSION_1_0 100 + // CHECK-CL12: #define CL_VERSION_1_1 110 + // CHECK-CL12: #define CL_VERSION_1_2 120 + // CHECK-CL12: #define CL_VERSION_2_0 200 ++// CHECK-CL12: #define CL_VERSION_3_0 300 + // CHECK-CL12: #define __OPENCL_C_VERSION__ 120 + // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL20: #define CL_VERSION_1_0 100 + // CHECK-CL20: #define CL_VERSION_1_1 110 + // CHECK-CL20: #define CL_VERSION_1_2 120 + // CHECK-CL20: #define CL_VERSION_2_0 200 ++// CHECK-CL20: #define CL_VERSION_3_0 300 + // CHECK-CL20: #define __OPENCL_C_VERSION__ 200 + // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 ++// CHECK-CL30: #define CL_VERSION_1_0 100 ++// CHECK-CL30: #define CL_VERSION_1_1 110 ++// CHECK-CL30: #define CL_VERSION_1_2 120 ++// CHECK-CL30: #define CL_VERSION_2_0 200 ++// CHECK-CL30: #define CL_VERSION_3_0 300 ++// CHECK-CL30: #define __OPENCL_C_VERSION__ 300 ++// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100 + // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100 +diff --git a/test/Sema/feature-extensions-simult-support.cl b/test/Sema/feature-extensions-simult-support.cl +new file mode 100644 +index 0000000000..0789105002 +--- /dev/null ++++ b/test/Sema/feature-extensions-simult-support.cl +@@ -0,0 +1,75 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups ++ ++// expected-no-diagnostics ++ ++#ifdef cl_khr_fp64 ++ #ifndef __opencl_c_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_fp64 ++ #ifndef cl_khr_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_3d_image_writes ++ #ifndef __opencl_c_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_3d_image_writes ++ #ifndef cl_khr_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_subgroups ++ #ifndef __opencl_c_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_subgroups ++ #ifndef cl_khr_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue) ++ #ifndef __opencl_c_generic_address_space ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images) ++ #ifndef __opencl_c_images ++ #error macros were not properly set up ++ #endif ++#endif ++ ++kernel void test(){} +diff --git a/test/Sema/features-ignore-pragma.cl b/test/Sema/features-ignore-pragma.cl +new file mode 100644 +index 0000000000..046ce53907 +--- /dev/null ++++ b/test/Sema/features-ignore-pragma.cl +@@ -0,0 +1,24 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes ++ ++#pragma OPENCL EXTENSION __opencl_c_fp64 : enable ++// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}} ++ ++#pragma OPENCL EXTENSION cl_khr_fp64 : enable ++#ifndef __opencl_c_fp64 ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_subgroups : enable ++#ifndef __opencl_c_subgroups ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable ++#ifndef __opencl_c_3d_image_writes ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}} ++#endif ++ ++kernel void foo() {} +diff --git a/test/Sema/opencl-features-pipes.cl b/test/Sema/opencl-features-pipes.cl +new file mode 100644 +index 0000000000..c0ac778f24 +--- /dev/null ++++ b/test/Sema/opencl-features-pipes.cl +@@ -0,0 +1,18 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS ++// expected-no-diagnostics ++ ++#ifdef HAS ++ #ifndef __opencl_c_pipes ++ #error Feature should be defined ++ #endif ++#else ++ #ifdef __opencl_c_pipes ++ #error Feature should not be defined ++ #endif ++#endif ++ ++kernel void foo() {} +diff --git a/test/Sema/opencl-features.cl b/test/Sema/opencl-features.cl +new file mode 100644 +index 0000000000..e75e04e214 +--- /dev/null ++++ b/test/Sema/opencl-features.cl +@@ -0,0 +1,127 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// expected-no-diagnostics ++ ++#ifndef __opencl_c_int64 ++ #error Feature __opencl_c_int64 shouldn't be defined ++#endif ++ ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++ #ifndef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices should be defined ++ #endif ++ ++ #ifndef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue should be defined ++ #endif ++ ++ #ifndef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space should be defined ++ #endif ++ ++ #ifndef __opencl_c_pipes ++ #error Feature __opencl_c_pipes should be defined ++ #endif ++ ++ #ifndef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables should be defined ++ #endif ++ ++ #ifndef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images should be defined ++ #endif ++ ++ #ifndef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups should be defined ++ #endif ++ ++ #ifndef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions should be defined ++ #endif ++ ++ #ifndef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 should be defined ++ #endif ++ ++ #ifndef __opencl_c_images ++ #error Feature __opencl_c_images should be defined ++ #endif ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++ ++#if __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ #ifdef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_pipes ++ #error Feature __opencl_c_pipes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_images ++ #error Feature __opencl_c_images shouldn't be defined ++ #endif ++#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ ++kernel void foo() {} +diff --git a/test/Sema/pipe_builtins_feature.cl b/test/Sema/pipe_builtins_feature.cl +new file mode 100644 +index 0000000000..17673e2387 +--- /dev/null ++++ b/test/Sema/pipe_builtins_feature.cl +@@ -0,0 +1,23 @@ ++// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++ ++#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes) ++ // expected-no-diagnostics ++#endif ++ ++__kernel void write_3d_image(__write_only image3d_t i) {} ++ ++#ifndef __opencl_c_3d_image_writes ++ // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}} ++#endif ++ ++__kernel void read_write_3d_image(__read_write image3d_t i) { } ++ ++#ifndef __opencl_c_read_write_images ++ // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}} ++#endif ++ ++ ++ +diff --git a/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +index 619ecc4e47..084efbc43e 100644 +--- a/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl ++++ b/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +@@ -1,6 +1,9 @@ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=c++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=c++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=c++ +diff --git a/test/SemaOpenCL/address-spaces.cl b/test/SemaOpenCL/address-spaces.cl +index 30f311d6ef..72d01b5f1c 100644 +--- a/test/SemaOpenCL/address-spaces.cl ++++ b/test/SemaOpenCL/address-spaces.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=c++ -verify -pedantic -fsyntax-only + + __constant int ci = 1; +diff --git a/test/SemaOpenCL/cl20-device-side-enqueue.cl b/test/SemaOpenCL/cl20-device-side-enqueue.cl +index 8946911c09..49ceec183d 100644 +--- a/test/SemaOpenCL/cl20-device-side-enqueue.cl ++++ b/test/SemaOpenCL/cl20-device-side-enqueue.cl +@@ -2,6 +2,12 @@ + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++ ++ + + typedef struct {int a;} ndrange_t; + // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec. +@@ -233,11 +239,17 @@ kernel void bar(global unsigned int *buf) + kernel void foo1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } + + kernel void bar1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } +diff --git a/test/SemaOpenCL/forget-unsupported-builtins.cl b/test/SemaOpenCL/forget-unsupported-builtins.cl +new file mode 100644 +index 0000000000..2c94fb0309 +--- /dev/null ++++ b/test/SemaOpenCL/forget-unsupported-builtins.cl +@@ -0,0 +1,23 @@ ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES ++ ++#ifndef FEATURES ++ // expected-no-diagnostics ++#else ++ // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}} ++ // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}} ++ // expected-error@+9 {{cannot redeclare builtin function 'to_local'}} ++ // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}} ++ // expected-error@+8 {{cannot redeclare builtin function 'to_global'}} ++ // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}} ++ // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}} ++ // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}} ++#endif ++ ++int get_pipe_max_packets(int); ++int to_local(int); ++int to_global(int); ++int get_kernel_work_group_size(int); ++ ++kernel void test(global int *dst) {} ++ +diff --git a/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +index 619b359c7a..6d285e3104 100644 +--- a/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl ++++ b/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes + + #pragma OPENCL EXTENSION cl_khr_subgroups : enable + +diff --git a/test/SemaOpenCL/storageclass-cl20.cl b/test/SemaOpenCL/storageclass-cl20.cl +index 581701d2a6..469c526ebc 100644 +--- a/test/SemaOpenCL/storageclass-cl20.cl ++++ b/test/SemaOpenCL/storageclass-cl20.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space + + int G2 = 0; + global int G3 = 0; +-- +2.17.1 + diff --git a/patches/clang/0007-Add-cl_khr_extended_subgroup-extensions.patch b/patches/clang/0007-OpenCL-Add-cl_khr_extended_subgroup-extensions.patch similarity index 97% rename from patches/clang/0007-Add-cl_khr_extended_subgroup-extensions.patch rename to patches/clang/0007-OpenCL-Add-cl_khr_extended_subgroup-extensions.patch index 1b266391..11a26f6d 100644 --- a/patches/clang/0007-Add-cl_khr_extended_subgroup-extensions.patch +++ b/patches/clang/0007-OpenCL-Add-cl_khr_extended_subgroup-extensions.patch @@ -1,7 +1,7 @@ -From 4a4402f0d72167477a6252e4c3daf5089ebc8f9a Mon Sep 17 00:00:00 2001 +From fe6f30499053cf9bd2c5c4acc82e06947af1eff2 Mon Sep 17 00:00:00 2001 From: Anastasia Stulova -Date: Thu, 4 Jun 2020 12:29:02 +0100 -Subject: [PATCH] [OpenCL] Add cl_khr_extended_subgroup extensions. +Date: Thu, 24 Sep 2020 12:08:28 +0300 +Subject: [PATCH] [PATCH] [OpenCL] Add cl_khr_extended_subgroup extensions. Added extensions and their function declarations into the standard header. @@ -12,16 +12,16 @@ Tags: #clang Differential Revision: https://reviews.llvm.org/D79781 --- - .../include/clang/Basic/OpenCLExtensions.def | 7 + - clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++ - clang/test/SemaOpenCL/extension-version.cl | 84 +++ + include/clang/Basic/OpenCLExtensions.def | 7 + + lib/Headers/opencl-c.h | 668 +++++++++++++++++++++++ + test/SemaOpenCL/extension-version.cl | 84 +++ 3 files changed, 759 insertions(+) -diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def -index 517481584313..1ae36b32fb0a 100644 ---- a/clang/include/clang/Basic/OpenCLExtensions.def -+++ b/clang/include/clang/Basic/OpenCLExtensions.def -@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U) +diff --git a/include/clang/Basic/OpenCLExtensions.def b/include/clang/Basic/OpenCLExtensions.def +index 77c905ac6c..92959e2b28 100644 +--- a/include/clang/Basic/OpenCLExtensions.def ++++ b/include/clang/Basic/OpenCLExtensions.def +@@ -75,6 +75,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U) OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U) OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U) OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U) @@ -35,11 +35,11 @@ index 517481584313..1ae36b32fb0a 100644 // Clang Extensions. OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U) -diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index 6ac9f92d23a2..66e18bdd47bb 100644 ---- a/clang/lib/Headers/opencl-c.h -+++ b/clang/lib/Headers/opencl-c.h -@@ -15460,6 +15460,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x); +diff --git a/lib/Headers/opencl-c.h b/lib/Headers/opencl-c.h +index 9dcd10d54f..812d7ccf85 100644 +--- a/lib/Headers/opencl-c.h ++++ b/lib/Headers/opencl-c.h +@@ -18004,6 +18004,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x); #endif //cl_khr_subgroups cl_intel_subgroups @@ -714,10 +714,10 @@ index 6ac9f92d23a2..66e18bdd47bb 100644 #if defined(cl_intel_subgroups) // Intel-Specific Sub Group Functions float __ovld __conv intel_sub_group_shuffle( float x, uint c ); -diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl -index 0e6bbb7d3bcd..4d0e79cd39c5 100644 ---- a/clang/test/SemaOpenCL/extension-version.cl -+++ b/clang/test/SemaOpenCL/extension-version.cl +diff --git a/test/SemaOpenCL/extension-version.cl b/test/SemaOpenCL/extension-version.cl +index dcac9568c4..4b08a1c59c 100644 +--- a/test/SemaOpenCL/extension-version.cl ++++ b/test/SemaOpenCL/extension-version.cl @@ -333,3 +333,87 @@ #endif #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable @@ -806,3 +806,6 @@ index 0e6bbb7d3bcd..4d0e79cd39c5 100644 +#endif +#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable + +-- +2.17.1 +