From 97725f19e786ca0c0d641f31062b58cee66f340c Mon Sep 17 00:00:00 2001
From: aelovikov-intel <andrei.elovikov@intel.com>
Date: Thu, 29 Sep 2022 09:39:54 -0700
Subject: [PATCH] [SYCL][Reduction] Prefer fast group reduce over fast atomics
 (#6890)

https://github.com/intel/llvm/pull/6434 enabled treating "float" as
suitable for Reduction::has_fast_atomics implementation but that is
slower than the one available under Reduction::has_fast_reduce. Make
sure to check for the latter first.
---
 sycl/include/sycl/reduction.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp
index 1734220a1fd0..b478710c2be3 100644
--- a/sycl/include/sycl/reduction.hpp
+++ b/sycl/include/sycl/reduction.hpp
@@ -1133,12 +1133,12 @@ bool reduCGFuncForRange(handler &CGH, KernelType KernelFunc,
   size_t NDRItems = NWorkGroups * WGSize;
   nd_range<1> NDRange{range<1>{NDRItems}, range<1>{WGSize}};
 
-  if constexpr (Reduction::has_fast_atomics)
-    return reduCGFuncForRangeFastAtomics<KernelName>(CGH, KernelFunc, Range,
-                                                     NDRange, Redu);
-  else if constexpr (Reduction::has_fast_reduce)
+  if constexpr (Reduction::has_fast_reduce)
     return reduCGFuncForRangeFastReduce<KernelName>(CGH, KernelFunc, Range,
                                                     NDRange, Redu);
+  else if constexpr (Reduction::has_fast_atomics)
+    return reduCGFuncForRangeFastAtomics<KernelName>(CGH, KernelFunc, Range,
+                                                     NDRange, Redu);
   else
     return reduCGFuncForRangeBasic<KernelName>(CGH, KernelFunc, Range, NDRange,
                                                Redu);