Skip to content

Commit

Permalink
Fixup use provided execution space when copying host inaccessible red…
Browse files Browse the repository at this point in the history
…uction result (kokkos#6777)

* Cuda/HIP use provided execution space when copying reduction result to the host

* OpenMPTarget: DeepCopy on the provided execution space in ParallelScanWithTotal

* Add trailing execution space template parameter that was missing
  • Loading branch information
dalg24 committed Feb 2, 2024
1 parent 7d2ea72 commit 63a1208
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
3 changes: 2 additions & 1 deletion core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,8 @@ class ParallelReduce<CombinedFunctorReducerType,
}
} else {
const int size = m_functor_reducer.get_reducer().value_size();
DeepCopy<HostSpace, CudaSpace>(m_result_ptr, m_scratch_space, size);
DeepCopy<HostSpace, CudaSpace, Cuda>(m_policy.space(), m_result_ptr,
m_scratch_space, size);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion core/src/HIP/Kokkos_HIP_ParallelReduce_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ class ParallelReduce<CombinedFunctorReducerType,

if (m_result_ptr) {
const int size = reducer.value_size();
DeepCopy<HostSpace, HIPSpace>(m_result_ptr, m_scratch_space, size);
DeepCopy<HostSpace, HIPSpace, HIP>(m_policy.space(), m_result_ptr,
m_scratch_space, size);
}
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,10 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>,

if (!base_t::m_result_ptr_device_accessible) {
const int size = base_t::m_functor_reducer.get_reducer().value_size();
DeepCopy<HostSpace, Kokkos::Experimental::OpenMPTargetSpace>(
base_t::m_result_ptr, chunk_values.data() + (n_chunks - 1), size);
DeepCopy<HostSpace, Kokkos::Experimental::OpenMPTargetSpace,
Kokkos::Experimental::OpenMPTarget>(
base_t::m_policy.space(), base_t::m_result_ptr,
chunk_values.data() + (n_chunks - 1), size);
}
} else if (!base_t::m_result_ptr_device_accessible) {
base_t::m_functor_reducer.get_reducer().init(base_t::m_result_ptr);
Expand Down

0 comments on commit 63a1208

Please sign in to comment.