From 97725f19e786ca0c0d641f31062b58cee66f340c Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 29 Sep 2022 09:39:54 -0700 Subject: [PATCH] [SYCL][Reduction] Prefer fast group reduce over fast atomics (#6890) https://github.com/intel/llvm/pull/6434 enabled treating "float" as suitable for Reduction::has_fast_atomics implementation but that is slower than the one available under Reduction::has_fast_reduce. Make sure to check for the latter first. --- sycl/include/sycl/reduction.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 1734220a1fd0..b478710c2be3 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1133,12 +1133,12 @@ bool reduCGFuncForRange(handler &CGH, KernelType KernelFunc, size_t NDRItems = NWorkGroups * WGSize; nd_range<1> NDRange{range<1>{NDRItems}, range<1>{WGSize}}; - if constexpr (Reduction::has_fast_atomics) - return reduCGFuncForRangeFastAtomics(CGH, KernelFunc, Range, - NDRange, Redu); - else if constexpr (Reduction::has_fast_reduce) + if constexpr (Reduction::has_fast_reduce) return reduCGFuncForRangeFastReduce(CGH, KernelFunc, Range, NDRange, Redu); + else if constexpr (Reduction::has_fast_atomics) + return reduCGFuncForRangeFastAtomics(CGH, KernelFunc, Range, + NDRange, Redu); else return reduCGFuncForRangeBasic(CGH, KernelFunc, Range, NDRange, Redu);