diff --git a/benchmarks/dbscan/dbscan_timpl.hpp b/benchmarks/dbscan/dbscan_timpl.hpp index 5e03020b6..10bcb4029 100644 --- a/benchmarks/dbscan/dbscan_timpl.hpp +++ b/benchmarks/dbscan/dbscan_timpl.hpp @@ -120,7 +120,7 @@ void sortAndFilterClusters(ExecutionSpace const &exec_space, }, num_clusters); Kokkos::resize(Kokkos::WithoutInitializing, cluster_offset, num_clusters + 1); - KokkosExt::exclusive_scan(exec_space, cluster_offset, cluster_offset); + KokkosExt::exclusive_scan(exec_space, cluster_offset, cluster_offset, 0); auto cluster_starts = KokkosExt::clone(exec_space, cluster_offset); KokkosExt::reallocWithoutInitializing( diff --git a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp index 048ca7a27..6dd9bc745 100644 --- a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp +++ b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp @@ -206,7 +206,7 @@ void queryImpl(ExecutionSpace const &space, Tree const &tree, "ArborX::CrsGraphWrapper::copy_counts_to_offsets", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int const i) { permuted_offset(i) = counts(i); }); - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); int const n_results = KokkosExt::lastElement(space, offset); @@ -302,7 +302,7 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space, if (buffer_size != 0) { - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); // Use calculation for the size to avoid calling lastElement(space, offset) // as it will launch an extra kernel to copy to host. @@ -325,7 +325,7 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space, "scan_queries_for_numbers_of_nearest_neighbors", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int i) { offset(i) = getK(predicates(i)); }); - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); KokkosExt::reallocWithoutInitializing(space, out, KokkosExt::lastElement(space, offset)); diff --git a/src/details/ArborX_DetailsDistributedTreeImpl.hpp b/src/details/ArborX_DetailsDistributedTreeImpl.hpp index 27c3fcbda..4a08b78b1 100644 --- a/src/details/ArborX_DetailsDistributedTreeImpl.hpp +++ b/src/details/ArborX_DetailsDistributedTreeImpl.hpp @@ -353,7 +353,7 @@ void DistributedTreeImpl::deviseStrategy( } }); - KokkosExt::exclusive_scan(space, new_offset, new_offset); + KokkosExt::exclusive_scan(space, new_offset, new_offset, 0); // Truncate results so that queries will only be forwarded to as many local // trees as necessary to find k neighbors. @@ -685,7 +685,7 @@ void DistributedTreeImpl::countResults( Kokkos::atomic_increment(&offset(query_ids(i))); }); - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); } template @@ -893,7 +893,7 @@ void DistributedTreeImpl::filterResults( new_offset(q) = min(offset(q + 1) - offset(q), getK(queries(q))); }); - KokkosExt::exclusive_scan(space, new_offset, new_offset); + KokkosExt::exclusive_scan(space, new_offset, new_offset, 0); int const n_truncated_results = KokkosExt::lastElement(space, new_offset); Kokkos::View new_indices( diff --git a/src/details/ArborX_DetailsExpandHalfToFull.hpp b/src/details/ArborX_DetailsExpandHalfToFull.hpp index b071cea15..10cf9e413 100644 --- a/src/details/ArborX_DetailsExpandHalfToFull.hpp +++ b/src/details/ArborX_DetailsExpandHalfToFull.hpp @@ -41,7 +41,7 @@ void expandHalfToFull(ExecutionSpace const &space, Offsets &offsets, Kokkos::atomic_increment(&offsets(k)); } }); - KokkosExt::exclusive_scan(space, offsets, offsets); + KokkosExt::exclusive_scan(space, offsets, offsets, 0); auto const m = KokkosExt::lastElement(space, offsets); KokkosExt::reallocWithoutInitializing(space, indices, m); diff --git a/src/details/ArborX_DetailsTreeTraversal.hpp b/src/details/ArborX_DetailsTreeTraversal.hpp index 8e72a414f..980630f08 100644 --- a/src/details/ArborX_DetailsTreeTraversal.hpp +++ b/src/details/ArborX_DetailsTreeTraversal.hpp @@ -158,7 +158,7 @@ struct TreeTraversal "scan_queries_for_numbers_of_neighbors", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_CLASS_LAMBDA(int i) { offset(i) = getK(_predicates(i)); }); - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); int const buffer_size = KokkosExt::lastElement(space, offset); // Allocate buffer over which to perform heap operations in // TreeTraversal::nearestQuery() to store nearest leaf nodes found so far. diff --git a/src/details/ArborX_DetailsUtils.hpp b/src/details/ArborX_DetailsUtils.hpp index 94fedeefa..4c918e523 100644 --- a/src/details/ArborX_DetailsUtils.hpp +++ b/src/details/ArborX_DetailsUtils.hpp @@ -167,7 +167,7 @@ template const &dst) { Details::KokkosExt::exclusive_scan(std::forward(space), src, - dst); + dst, 0); } template diff --git a/src/details/ArborX_NeighborList.hpp b/src/details/ArborX_NeighborList.hpp index 866606f60..7f57e9769 100644 --- a/src/details/ArborX_NeighborList.hpp +++ b/src/details/ArborX_NeighborList.hpp @@ -57,7 +57,7 @@ void findHalfNeighborList(ExecutionSpace const &space, space, bvh, KOKKOS_LAMBDA(int, int j) { Kokkos::atomic_increment(&offsets(j)); }, NeighborListPredicateGetter{radius}); - KokkosExt::exclusive_scan(space, offsets, offsets); + KokkosExt::exclusive_scan(space, offsets, offsets, 0); KokkosExt::reallocWithoutInitializing(space, indices, KokkosExt::lastElement(space, offsets)); @@ -105,7 +105,7 @@ void findFullNeighborList(ExecutionSpace const &space, Kokkos::atomic_increment(&offsets(j)); }, NeighborListPredicateGetter{radius}); - KokkosExt::exclusive_scan(space, offsets, offsets); + KokkosExt::exclusive_scan(space, offsets, offsets, 0); KokkosExt::reallocWithoutInitializing(space, indices, KokkosExt::lastElement(space, offsets)); diff --git a/src/kokkos_ext/ArborX_DetailsKokkosExtStdAlgorithms.hpp b/src/kokkos_ext/ArborX_DetailsKokkosExtStdAlgorithms.hpp index 0c6518d28..9d7fc45fc 100644 --- a/src/kokkos_ext/ArborX_DetailsKokkosExtStdAlgorithms.hpp +++ b/src/kokkos_ext/ArborX_DetailsKokkosExtStdAlgorithms.hpp @@ -21,12 +21,11 @@ namespace ArborX::Details::KokkosExt { template + typename InitValueType> void exclusive_scan(ExecutionSpace const &space, SrcView const &src, - DstView const &dst, InitValueType init = 0) + DstView const &dst, InitValueType init) { - static_assert( - Kokkos::is_execution_space>::value); + static_assert(Kokkos::is_execution_space::value); static_assert(Kokkos::is_view::value); static_assert(Kokkos::is_view::value); static_assert( @@ -63,8 +62,7 @@ typename ViewType::non_const_value_type reduce(ExecutionSpace const &space, ViewType const &v, typename ViewType::non_const_value_type init) { - static_assert( - Kokkos::is_execution_space>::value); + static_assert(Kokkos::is_execution_space::value); static_assert(Kokkos::is_view::value); static_assert(is_accessible_from::value, @@ -93,8 +91,7 @@ template void adjacent_difference(ExecutionSpace const &space, SrcView const &src, DstView const &dst) { - static_assert( - Kokkos::is_execution_space>::value); + static_assert(Kokkos::is_execution_space::value); static_assert(Kokkos::is_view::value); static_assert(Kokkos::is_view::value); static_assert( @@ -133,6 +130,9 @@ void iota(ExecutionSpace const &space, ViewType const &v, { static_assert(Kokkos::is_execution_space::value); static_assert(Kokkos::is_view::value); + static_assert(is_accessible_from::value, + "View must be accessible from the execution space"); static_assert(unsigned(ViewType::rank) == unsigned(1), "iota requires a View of rank 1"); diff --git a/test/ArborX_BoostRTreeHelpers.hpp b/test/ArborX_BoostRTreeHelpers.hpp index a0b68ff7b..3dee2120c 100644 --- a/test/ArborX_BoostRTreeHelpers.hpp +++ b/test/ArborX_BoostRTreeHelpers.hpp @@ -201,7 +201,7 @@ performQueries(RTree const &rtree, InputView const &queries) std::back_inserter(returned_values)); using ExecutionSpace = typename InputView::execution_space; ExecutionSpace space; - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); auto const n_results = KokkosExt::lastElement(space, offset); OutputView indices("indices", n_results); for (int i = 0; i < n_queries; ++i) @@ -230,7 +230,7 @@ performQueries(ParallelRTree const &rtree, InputView const &queries) std::back_inserter(returned_values)); using ExecutionSpace = typename InputView::execution_space; ExecutionSpace space; - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); auto const n_results = KokkosExt::lastElement(space, offset); OutputView1 values("values", n_results); for (int i = 0; i < n_queries; ++i) diff --git a/test/tstDetailsCrsGraphWrapperImpl.cpp b/test/tstDetailsCrsGraphWrapperImpl.cpp index e799500b6..002fa731e 100644 --- a/test/tstDetailsCrsGraphWrapperImpl.cpp +++ b/test/tstDetailsCrsGraphWrapperImpl.cpp @@ -73,7 +73,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(query_impl, DeviceType, ARBORX_DEVICE_TYPES) ExecutionSpace space; KokkosExt::iota(space, permute); - KokkosExt::exclusive_scan(space, offset, offset); + KokkosExt::exclusive_scan(space, offset, offset, 0); Kokkos::realloc(indices, KokkosExt::lastElement(space, offset)); ArborX::Details::CrsGraphWrapperImpl::queryImpl( space, Test1{}, predicates, ArborX::Details::DefaultCallback{}, indices, diff --git a/test/tstDetailsKokkosExtStdAlgorithms.cpp b/test/tstDetailsKokkosExtStdAlgorithms.cpp index 6c2ae4eca..a55c4df2f 100644 --- a/test/tstDetailsKokkosExtStdAlgorithms.cpp +++ b/test/tstDetailsKokkosExtStdAlgorithms.cpp @@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(exclusive_scan, DeviceType, ARBORX_DEVICE_TYPES) Kokkos::deep_copy(x, x_host); Kokkos::View y("y", n); - KokkosExt::exclusive_scan(space, x, y); + KokkosExt::exclusive_scan(space, x, y, 0); std::vector y_ref(n); std::iota(y_ref.begin(), y_ref.end(), 0); @@ -78,13 +78,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(exclusive_scan, DeviceType, ARBORX_DEVICE_TYPES) BOOST_TEST(y_host == y_ref, tt::per_element()); BOOST_TEST(x_host == x_ref, tt::per_element()); // in-place - KokkosExt::exclusive_scan(space, x, x); + KokkosExt::exclusive_scan(space, x, x, 0); Kokkos::deep_copy(x_host, x); BOOST_TEST(x_host == y_ref, tt::per_element()); int const m = 11; BOOST_TEST(n != m); Kokkos::View z("z", m); - BOOST_CHECK_THROW(KokkosExt::exclusive_scan(space, x, z), + BOOST_CHECK_THROW(KokkosExt::exclusive_scan(space, x, z, 0), ArborX::SearchException); Kokkos::View v("v"); auto v_host = Kokkos::create_mirror_view(v); @@ -92,19 +92,19 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(exclusive_scan, DeviceType, ARBORX_DEVICE_TYPES) v_host(1) = 1.; v_host(2) = 0.; Kokkos::deep_copy(v, v_host); - KokkosExt::exclusive_scan(space, v, v); + KokkosExt::exclusive_scan(space, v, v, 5.); Kokkos::deep_copy(v_host, v); - std::vector v_ref = {0., 1., 2.}; + std::vector v_ref = {5., 6., 7.}; BOOST_TEST(v_host == v_ref, tt::per_element()); Kokkos::View w("w", 4); - BOOST_CHECK_THROW(KokkosExt::exclusive_scan(space, v, w), + BOOST_CHECK_THROW(KokkosExt::exclusive_scan(space, v, w, 0), ArborX::SearchException); v_host(0) = 1.; v_host(1) = 0.; v_host(2) = 0.; Kokkos::deep_copy(v, v_host); Kokkos::resize(w, 3); - KokkosExt::exclusive_scan(space, v, w); + KokkosExt::exclusive_scan(space, v, w, 0); auto w_host = Kokkos::create_mirror_view(w); Kokkos::deep_copy(w_host, w); std::vector w_ref = {0., 1., 1.};