diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 0b1195ed9c8fb..b7bc0f215421d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -90,9 +90,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase { return true; } - // Increase the inlining cost threshold by a factor of 5, reflecting that + // Increase the inlining cost threshold by a factor of 11, reflecting that // calls are particularly expensive in NVPTX. - unsigned getInliningThresholdMultiplier() { return 5; } + unsigned getInliningThresholdMultiplier() { return 11; } InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 9fcc02a3c3766..14e32b8af0de0 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -241,16 +241,6 @@ struct AccHostDataT { void *Reserved = nullptr; }; -// To ensure loop unrolling is done when processing dimensions. -template -void dim_loop_impl(std::integer_sequence, F &&f) { - (f(Inds), ...); -} - -template void dim_loop(F &&f) { - dim_loop_impl(std::make_index_sequence{}, std::forward(f)); -} - void __SYCL_EXPORT constructorNotification(void *BufferObj, void *AccessorObj, access::target Target, access::mode Mode, diff --git a/sycl/include/sycl/detail/helpers.hpp b/sycl/include/sycl/detail/helpers.hpp index 8ab015674e499..d3409dc092fd3 100644 --- a/sycl/include/sycl/detail/helpers.hpp +++ b/sycl/include/sycl/detail/helpers.hpp @@ -245,6 +245,16 @@ getSPIRVMemorySemanticsMask(const access::fence_space AccessSpace, LocalScopeMask); } +// To ensure loop unrolling is done when processing dimensions. +template +void dim_loop_impl(std::integer_sequence, F &&f) { + (f(Inds), ...); +} + +template void dim_loop(F &&f) { + dim_loop_impl(std::make_index_sequence{}, std::forward(f)); +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/include/sycl/group_algorithm.hpp b/sycl/include/sycl/group_algorithm.hpp index 72b5bc712e2d9..d8910d320f662 100644 --- a/sycl/include/sycl/group_algorithm.hpp +++ b/sycl/include/sycl/group_algorithm.hpp @@ -214,23 +214,24 @@ reduce_over_group(Group g, T x, BinaryOperation binary_op) { #endif } -template -detail::enable_if_t<(is_group_v> && - detail::is_vector_arithmetic::value && - detail::is_native_op::value), - T> -reduce_over_group(Group g, T x, BinaryOperation binary_op) { +template +detail::enable_if_t< + (is_group_v> && + detail::is_vector_arithmetic>::value && + detail::is_native_op, BinaryOperation>::value), + sycl::vec> +reduce_over_group(Group g, sycl::vec x, BinaryOperation binary_op) { // FIXME: Do not special-case for half precision static_assert( std::is_same::value || - (std::is_same::value && + typename sycl::vec::element_type>::value || + (std::is_same, half>::value && std::is_same::value), "Result type of binary_op must match reduction accumulation type."); - T result; - for (int s = 0; s < x.size(); ++s) { - result[s] = reduce_over_group(g, x[s], binary_op); - } + sycl::vec result; + + detail::dim_loop( + [&](size_t s) { result[s] = reduce_over_group(g, x[s], binary_op); }); return result; }