Skip to content

Commit

Permalink
Fix TeamThreadMDRange parallel_reduce (kokkos#6511)
Browse files Browse the repository at this point in the history
* Fix TeamThreadRangeMD parallel_reduce

* Use vector_reduce for Cuda, HIP, and SYCL

* Initialize reduction variable

* Test with maximal vector length

* TeamHandle::execution_space->typename TeamHandle::execution_space

* Fix OpenMPTarget

* Restore tests

* reducer is unused

* Workarounds for OpenMPTarget

* Reduce

* Fix OpenMPTarget tests

* Restore tests

* Strengthen test case for OpenMPTarget

* Add some static_asserts
  • Loading branch information
masterleinad committed Jan 11, 2024
1 parent 89ba3fb commit ee5cbfc
Show file tree
Hide file tree
Showing 4 changed files with 448 additions and 111 deletions.
8 changes: 0 additions & 8 deletions algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,16 +239,8 @@ KOKKOS_FUNCTION bool team_members_have_matching_result(
// set accum to 1 if a mismach is found
const bool mismatch = memberValue != target;
int accum = static_cast<int>(mismatch);
// FIXME_OPENMPTARGET: team API does not meet the TeamHandle concept and
// ignores the reducer passed
#if defined KOKKOS_ENABLE_OPENMPTARGET
Kokkos::Sum<int> dummyReducer(accum);
const auto result = teamHandle.team_reduce(accum, dummyReducer);
return (result == 0);
#else
teamHandle.team_reduce(Kokkos::Sum<int>(accum));
return (accum == 0);
#endif
}

template <class ValueType1, class ValueType2>
Expand Down
55 changes: 55 additions & 0 deletions core/src/Kokkos_ExecPolicy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,16 @@ template <typename Rank, typename TeamHandle, typename Lambda,
KOKKOS_INLINE_FUNCTION void parallel_reduce(
TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
ReducerValueType& val) {
static_assert(/*!Kokkos::is_view_v<ReducerValueType> &&*/
!std::is_array_v<ReducerValueType> &&
!std::is_pointer_v<ReducerValueType> &&
!Kokkos::is_reducer_v<ReducerValueType>,
"Only scalar return types are allowed!");

val = ReducerValueType{};
Impl::md_parallel_impl<Rank>(policy, lambda, val);
policy.team.team_reduce(
Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
}

template <typename Rank, typename TeamHandle, typename Lambda>
Expand All @@ -997,7 +1006,29 @@ template <typename Rank, typename TeamHandle, typename Lambda,
KOKKOS_INLINE_FUNCTION void parallel_reduce(
ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
ReducerValueType& val) {
static_assert(/*!Kokkos::is_view_v<ReducerValueType> &&*/
!std::is_array_v<ReducerValueType> &&
!std::is_pointer_v<ReducerValueType> &&
!Kokkos::is_reducer_v<ReducerValueType>,
"Only a scalar return types are allowed!");

val = ReducerValueType{};
Impl::md_parallel_impl<Rank>(policy, lambda, val);
if constexpr (false
#ifdef KOKKOS_ENABLE_CUDA
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::Cuda>
#elif defined(KOKKOS_ENABLE_HIP)
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::HIP>
#elif defined(KOKKOS_ENABLE_SYCL)
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::Experimental::SYCL>
#endif
)
policy.team.vector_reduce(
Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
val});
}

template <typename Rank, typename TeamHandle, typename Lambda>
Expand All @@ -1011,7 +1042,31 @@ template <typename Rank, typename TeamHandle, typename Lambda,
KOKKOS_INLINE_FUNCTION void parallel_reduce(
TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
ReducerValueType& val) {
static_assert(/*!Kokkos::is_view_v<ReducerValueType> &&*/
!std::is_array_v<ReducerValueType> &&
!std::is_pointer_v<ReducerValueType> &&
!Kokkos::is_reducer_v<ReducerValueType>,
"Only a scalar return types are allowed!");

val = ReducerValueType{};
Impl::md_parallel_impl<Rank>(policy, lambda, val);
if constexpr (false
#ifdef KOKKOS_ENABLE_CUDA
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::Cuda>
#elif defined(KOKKOS_ENABLE_HIP)
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::HIP>
#elif defined(KOKKOS_ENABLE_SYCL)
|| std::is_same_v<typename TeamHandle::execution_space,
Kokkos::Experimental::SYCL>
#endif
)
policy.team.vector_reduce(
Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
val});
policy.team.team_reduce(
Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
}

template <typename Rank, typename TeamHandle, typename Lambda>
Expand Down
21 changes: 14 additions & 7 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,20 @@ class OpenMPTargetExecTeamMember {
team_broadcast(value, thread_id);
}

// FIXME_OPENMPTARGET this function has the wrong interface and currently
// ignores the reducer passed.
template <class ValueType, class JoinOp>
KOKKOS_INLINE_FUNCTION ValueType team_reduce(const ValueType& value,
const JoinOp&) const {
template <typename ReducerType>
KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value>
team_reduce(ReducerType const& reducer) const noexcept {
team_reduce(reducer, reducer.reference());
}

// FIXME_OPENMPTARGET this function currently ignores the reducer passed.
template <typename ReducerType>
KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value>
team_reduce(ReducerType const&, typename ReducerType::value_type& value) const
noexcept {
#pragma omp barrier

using value_type = ValueType;
using value_type = typename ReducerType::value_type;
// const JoinLambdaAdapter<value_type, JoinOp> op(op_in);

// Make sure there is enough scratch space:
Expand Down Expand Up @@ -143,8 +149,9 @@ class OpenMPTargetExecTeamMember {
}
#pragma omp barrier
}
return team_scratch[0];
value = team_scratch[0];
}

/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
Expand Down

0 comments on commit ee5cbfc

Please sign in to comment.