From c5cbbf34726b0629892e848b74a336594bfd2665 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Sat, 5 Dec 2020 20:59:33 -0500 Subject: [PATCH 01/19] Move crs graph construction out of BVH --- .../example_cuda_access_traits.cpp | 2 +- src/ArborX_CrsGraphWrapper.hpp | 34 + src/ArborX_LinearBVH.hpp | 76 ++- .../ArborX_DetailsCrsGraphWrapperImpl.hpp | 636 ++++++++++++++++++ src/details/ArborX_DetailsPermutedData.hpp | 67 ++ src/details/ArborX_TraversalPolicy.hpp | 52 ++ test/tstDetailsBufferOptimization.cpp | 21 +- test/tstKokkosToolsAnnotations.cpp | 2 +- test/tstKokkosToolsDistributedAnnotations.cpp | 2 +- 9 files changed, 868 insertions(+), 24 deletions(-) create mode 100644 src/ArborX_CrsGraphWrapper.hpp create mode 100644 src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp create mode 100644 src/details/ArborX_DetailsPermutedData.hpp create mode 100644 src/details/ArborX_TraversalPolicy.hpp diff --git a/examples/access_traits/example_cuda_access_traits.cpp b/examples/access_traits/example_cuda_access_traits.cpp index 6626aeb69..0123a6565 100644 --- a/examples/access_traits/example_cuda_access_traits.cpp +++ b/examples/access_traits/example_cuda_access_traits.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - bvh.query(cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); + ArborX::query_crs(cuda, bvh, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); Kokkos::parallel_for(Kokkos::RangePolicy(cuda, 0, N), KOKKOS_LAMBDA(int i) { diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp new file mode 100644 index 000000000..e91d70a04 --- /dev/null +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -0,0 +1,34 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_CRS_GRAPH_WRAPPER_HPP +#define ARBORX_CRS_GRAPH_WRAPPER_HPP + +#include "ArborX_DetailsCrsGraphWrapperImpl.hpp" + +namespace ArborX +{ + +template +inline void query_crs(ExecutionSpace const &space, Tree const &tree, + Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, + Args &&... args) +{ + Details::CrsGraphWrapperImpl::query( + space, tree, predicates, std::forward(callback_or_view), + std::forward(view), std::forward(args)...); +} + +} // namespace ArborX + +#endif diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 6e58762c0..9056438b6 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -14,17 +14,23 @@ #include #include -#include +#include +#include +#include #include #include #include +#include #include #include +#include +#include #include namespace ArborX { + namespace Details { template @@ -55,18 +61,25 @@ class BoundingVolumeHierarchy KOKKOS_FUNCTION bounding_volume_type bounds() const noexcept { return _bounds; } - template + template void query(ExecutionSpace const &space, Predicates const &predicates, - Args &&... args) const + Callback const &callback, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) const; + + template + // clang-format off + [[deprecated( "For crs format output, use standalone ArborX::query_crs instead.")]] + // clang-format on + std::enable_if_t>{}> + query(ExecutionSpace const &space, Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - Details::check_valid_access_traits(PredicatesTag{}, predicates); - using Access = AccessTraits; - static_assert(KokkosExt::is_accessible_from::value, - "Predicates must be accessible from the execution space"); - - Details::BoundingVolumeHierarchyImpl::query(space, *this, predicates, - std::forward(args)...); + auto const &bvh = *this; + ArborX::query_crs(space, bvh, predicates, + std::forward(callback_or_view), + std::forward(view), std::forward(args)...); } private: @@ -161,7 +174,7 @@ class BoundingVolumeHierarchy< void query(Args &&... args) const { BoundingVolumeHierarchy::query( - typename DeviceType::execution_space{}, std::forward(args)...); + std::forward(args)...); } }; @@ -235,6 +248,45 @@ BoundingVolumeHierarchy::BoundingVolumeHierarchy( Kokkos::Profiling::popRegion(); } +template +template +void BoundingVolumeHierarchy::query( + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, Experimental::TraversalPolicy const &policy) const +{ + Details::check_valid_access_traits(PredicatesTag{}, predicates); + using Access = AccessTraits; + static_assert(KokkosExt::is_accessible_from::value, + "Predicates must be accessible from the execution space"); + + Details::check_valid_callback(callback, predicates); + + Kokkos::Profiling::pushRegion("ArborX::BVH::query"); + + auto const &bvh = *this; + if (policy._sort_predicates) + { + Kokkos::Profiling::pushRegion("ArborX::BVH::query::compute_permutation"); + using DeviceType = Kokkos::Device; + auto permute = + Details::BatchedQueries::sortQueriesAlongZOrderCurve( + space, bounds(), predicates); + Kokkos::Profiling::popRegion(); + + using PermutedPredicates = + Details::PermutedData; + Details::traverse(space, bvh, PermutedPredicates{predicates, permute}, + callback); + } + else + { + Details::traverse(space, bvh, predicates, callback); + } + + Kokkos::Profiling::popRegion(); +} + } // namespace ArborX #endif diff --git a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp new file mode 100644 index 000000000..fe51cf42a --- /dev/null +++ b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp @@ -0,0 +1,636 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_DETAIL_CRS_GRAPH_WRAPPER_IMPL_HPP +#define ARBORX_DETAIL_CRS_GRAPH_WRAPPER_IMPL_HPP + +#include +#include +#include +#include +#include +#include + +namespace ArborX +{ +namespace Details +{ + +enum BufferStatus +{ + PreallocationNone = 0, + PreallocationHard = -1, + PreallocationSoft = 1 +}; + +inline BufferStatus toBufferStatus(int buffer_size) +{ + if (buffer_size == 0) + return BufferStatus::PreallocationNone; + if (buffer_size > 0) + return BufferStatus::PreallocationSoft; + return BufferStatus::PreallocationHard; +} + +struct FirstPassTag +{ +}; +struct FirstPassNoBufferOptimizationTag +{ +}; +struct SecondPassTag +{ +}; + +template +struct InsertGenerator +{ + Callback _callback; + OutputView _out; + CountView _counts; + PermutedOffset _permuted_offset; + + using ValueType = typename OutputView::value_type; + using Access = AccessTraits; + using Tag = typename AccessTraitsHelper::tag; + using PredicateType = typename AccessTraitsHelper::type; + + template < + typename U = PassTag, typename V = Tag, + std::enable_if_t{} && + std::is_same{}> * = nullptr> + KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, + int primitive_index) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + // With permutation, we access offset in random manner, and + // _offset(permutated_predicate_index+1) may be in a completely different + // place. Instead, use pointers to get the correct value for the buffer + // size. For this reason, also take a reference for offset. + auto const &offset = _permuted_offset(predicate_index); + auto const buffer_size = *(&offset + 1) - offset; + auto &count = _counts(predicate_index); + + return _callback(raw_predicate, primitive_index, + [&](ValueType const &value) { + int count_old = Kokkos::atomic_fetch_add(&count, 1); + if (count_old < buffer_size) + _out(offset + count_old) = value; + }); + } + template + KOKKOS_FUNCTION std::enable_if_t{} && + std::is_same{}> + operator()(PredicateType const &predicate, int primitive_index, + float distance) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + // With permutation, we access offset in random manner, and + // _offset(permutated_predicate_index+1) may be in a completely different + // place. Instead, use pointers to get the correct value for the buffer + // size. For this reason, also take a reference for offset. + auto const &offset = _permuted_offset(predicate_index); + auto const buffer_size = *(&offset + 1) - offset; + auto &count = _counts(predicate_index); + + _callback(raw_predicate, primitive_index, distance, + [&](ValueType const &value) { + int count_old = Kokkos::atomic_fetch_add(&count, 1); + if (count_old < buffer_size) + _out(offset + count_old) = value; + }); + } + + template < + typename U = PassTag, typename V = Tag, + std::enable_if_t{} && + std::is_same{}> * = nullptr> + KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, + int primitive_index) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + + auto &count = _counts(predicate_index); + + return _callback(raw_predicate, primitive_index, [&](ValueType const &) { + Kokkos::atomic_fetch_add(&count, 1); + }); + } + + template + KOKKOS_FUNCTION + std::enable_if_t{} && + std::is_same{}> + operator()(PredicateType const &predicate, int primitive_index, + float distance) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + + auto &count = _counts(predicate_index); + + _callback(raw_predicate, primitive_index, distance, + [&](ValueType const &) { Kokkos::atomic_fetch_add(&count, 1); }); + } + + template < + typename U = PassTag, typename V = Tag, + std::enable_if_t{} && + std::is_same{}> * = nullptr> + KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, + int primitive_index) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + + // we store offsets in counts, and offset(permute(i)) = counts(i) + auto &offset = _counts(predicate_index); + + // TODO: there is a tradeoff here between skipping computation offset + + // count, and atomic increment of count. I think atomically incrementing + // offset is problematic for OpenMP as you potentially constantly steal + // cache lines. + return _callback(raw_predicate, primitive_index, + [&](ValueType const &value) { + _out(Kokkos::atomic_fetch_add(&offset, 1)) = value; + }); + } + + template + KOKKOS_FUNCTION std::enable_if_t{} && + std::is_same{}> + operator()(PredicateType const &predicate, int primitive_index, + float distance) const + { + auto const predicate_index = getData(predicate); + auto const &raw_predicate = getPredicate(predicate); + + // we store offsets in counts, and offset(permute(i)) = counts(i) + auto &offset = _counts(predicate_index); + + // TODO: there is a tradeoff here between skipping computation offset + + // count, and atomic increment of count. I think atomically incrementing + // offset is problematic for OpenMP as you potentially constantly steal + // cache lines. + _callback(raw_predicate, primitive_index, distance, + [&](ValueType const &value) { + _out(Kokkos::atomic_fetch_add(&offset, 1)) = value; + }); + } +}; + +namespace CrsGraphWrapperImpl +{ + +template +void queryImpl(ExecutionSpace const &space, Tree const &tree, + Predicates const &predicates, Callback const &callback, + OutputView &out, OffsetView &offset, PermuteType permute, + BufferStatus buffer_status) +{ + // pre-condition: offset and out are preallocated. If buffer_size > 0, offset + // is pre-initialized + + static_assert(Kokkos::is_execution_space{}, ""); + + using Access = AccessTraits; + auto const n_queries = Access::size(predicates); + + Kokkos::Profiling::pushRegion("ArborX::CrsGraphWrapper::two_pass"); + + using CountView = OffsetView; + CountView counts(Kokkos::view_alloc("ArborX::CrsGraphWrapper::counts", space), + n_queries); + + using PermutedPredicates = + PermutedData; + PermutedPredicates permuted_predicates = {predicates, permute}; + + using PermutedOffset = PermutedData; + PermutedOffset permuted_offset = {offset, permute}; + + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::two_pass::first_pass"); + bool underflow = false; + bool overflow = false; + if (buffer_status != BufferStatus::PreallocationNone) + { + tree.query( + space, permuted_predicates, + InsertGenerator{callback, out, counts, + permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); + + // Detecting overflow is a local operation that needs to be done for every + // index. We allow individual buffer sizes to differ, so it's not as easy + // as computing max counts. + int overflow_int = 0; + Kokkos::parallel_reduce( + "ArborX::CrsGraphWrapper::compute_overflow", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int i, int &update) { + auto const *const offset_ptr = &permuted_offset(i); + if (counts(i) > *(offset_ptr + 1) - *offset_ptr) + update = 1; + }, + overflow_int); + overflow = (overflow_int > 0); + + if (!overflow) + { + int n_results = 0; + Kokkos::parallel_reduce( + "ArborX::CrsGraphWrapper::compute_underflow", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int i, int &update) { update += counts(i); }, + n_results); + underflow = (n_results < out.extent_int(0)); + } + } + else + { + tree.query( + space, permuted_predicates, + InsertGenerator{ + callback, out, counts, permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); + // This may not be true, but it does not matter. As long as we have + // (n_results == 0) check before second pass, this value is not used. + // Otherwise, we know it's overflowed as there is no allocation. + overflow = true; + } + + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::first_pass_postprocess"); + + OffsetView preallocated_offset("ArborX::CrsGraphWrapper::offset_copy", 0); + if (underflow) + { + // Store a copy of the original offset. We'll need it for compression. + preallocated_offset = clone(space, offset); + } + + Kokkos::parallel_for( + "ArborX::CrsGraphWrapper::copy_counts_to_offsets", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int const i) { permuted_offset(i) = counts(i); }); + exclusivePrefixSum(space, offset); + + int const n_results = lastElement(offset); + + Kokkos::Profiling::popRegion(); + + if (n_results == 0) + { + // Exit early if either no results were found for any of the queries, or + // nothing was inserted inside a callback for found results. This check + // guarantees that the second pass will not be executed. + Kokkos::resize(out, 0); + // FIXME: do we need to reset offset if it was preallocated here? + Kokkos::Profiling::popRegion(); + return; + } + + if (overflow || buffer_status == BufferStatus::PreallocationNone) + { + // Not enough (individual) storage for results + + // If it was hard preallocation, we simply throw + ARBORX_ASSERT(buffer_status != BufferStatus::PreallocationHard); + + // Otherwise, do the second pass + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::two_pass:second_pass"); + + Kokkos::parallel_for( + "ArborX::CrsGraphWrapper::copy_offsets_to_counts", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int const i) { counts(i) = permuted_offset(i); }); + + reallocWithoutInitializing(out, n_results); + + tree.query( + space, permuted_predicates, + InsertGenerator{callback, out, counts, + permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); + + Kokkos::Profiling::popRegion(); + } + else if (underflow) + { + // More than enough storage for results, need compression + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::two_pass:copy_values"); + + OutputView tmp_out(Kokkos::ViewAllocateWithoutInitializing(out.label()), + n_results); + + Kokkos::parallel_for( + "ArborX::CrsGraphWrapper::copy_valid_values", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int i) { + int count = offset(i + 1) - offset(i); + for (int j = 0; j < count; ++j) + { + tmp_out(offset(i) + j) = out(preallocated_offset(i) + j); + } + }); + out = tmp_out; + + Kokkos::Profiling::popRegion(); + } + else + { + // The allocated storage was exactly enough for results, do nothing + } + Kokkos::Profiling::popRegion(); +} + +struct Iota +{ + KOKKOS_FUNCTION unsigned int operator()(int const i) const { return i; } +}; + +// Views are passed by reference here because internally Kokkos::realloc() +// is called. +template +std::enable_if_t{} && + Kokkos::is_view{} && Kokkos::is_view{}> +queryDispatch(SpatialPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + using DeviceType = Kokkos::Device; + + check_valid_callback(callback, predicates, out); + + Kokkos::Profiling::pushRegion("ArborX::CrsGraphWrapper::query::spatial"); + + using Access = AccessTraits; + auto const n_queries = Access::size(predicates); + + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::query::spatial::init_and_alloc"); + reallocWithoutInitializing(offset, n_queries + 1); + + int const buffer_size = std::abs(policy._buffer_size); + if (buffer_size > 0) + { + Kokkos::deep_copy(space, offset, buffer_size); + exclusivePrefixSum(space, offset); + // Use calculation for the size to avoid calling lastElement(offset) as it + // will launch an extra kernel to copy to host. And there is unnecessary to + // fill with invalid indices. + reallocWithoutInitializing(out, n_queries * buffer_size); + } + else + { + Kokkos::deep_copy(offset, 0); + } + Kokkos::Profiling::popRegion(); + + if (policy._sort_predicates) + { + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::query::spatial::compute_permutation"); + auto permute = + Details::BatchedQueries::sortQueriesAlongZOrderCurve( + space, tree.bounds(), predicates); + Kokkos::Profiling::popRegion(); + + queryImpl(space, tree, predicates, callback, out, offset, permute, + toBufferStatus(policy._buffer_size)); + } + else + { + Iota permute; + queryImpl(space, tree, predicates, callback, out, offset, permute, + toBufferStatus(policy._buffer_size)); + } + + Kokkos::Profiling::popRegion(); +} + +template +inline std::enable_if_t{} && Kokkos::is_view{}> +queryDispatch(SpatialPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Indices &indices, Offset &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + queryDispatch(SpatialPredicateTag{}, tree, space, predicates, + CallbackDefaultSpatialPredicate{}, indices, offset, policy); +} + +template +inline std::enable_if_t{}> +queryDispatch(SpatialPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + Kokkos::View indices( + "ArborX::CrsGraphWrapper::query::spatial::indices", 0); + queryDispatch(SpatialPredicateTag{}, tree, space, predicates, indices, offset, + policy); + callback(predicates, offset, indices, out); +} + +template +std::enable_if_t{} && + Kokkos::is_view{} && Kokkos::is_view{}> +queryDispatch(NearestPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + using DeviceType = Kokkos::Device; + + check_valid_callback(callback, predicates, out); + + Kokkos::Profiling::pushRegion("ArborX::CrsGraphWrapper::query::nearest"); + + using Access = AccessTraits; + auto const n_queries = Access::size(predicates); + + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::query::nearest::init_and_alloc"); + + reallocWithoutInitializing(offset, n_queries + 1); + Kokkos::parallel_for( + "ArborX::CrsGraphWrapper::query::nearest::" + "scan_queries_for_numbers_of_nearest_neighbors", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int i) { offset(i) = getK(Access::get(predicates, i)); }); + exclusivePrefixSum(space, offset); + + int const n_results = lastElement(offset); + reallocWithoutInitializing(out, n_results); + + Kokkos::Profiling::popRegion(); + + if (policy._sort_predicates) + { + Kokkos::Profiling::pushRegion( + "ArborX::CrsGraphWrapper::query::nearest::compute_permutation"); + auto permute = + Details::BatchedQueries::sortQueriesAlongZOrderCurve( + space, tree.bounds(), predicates); + Kokkos::Profiling::popRegion(); + + queryImpl(space, tree, predicates, callback, out, offset, permute, + BufferStatus::PreallocationSoft); + } + else + { + Iota permute; + queryImpl(space, tree, predicates, callback, out, offset, permute, + BufferStatus::PreallocationSoft); + } + + Kokkos::Profiling::popRegion(); +} + +template +inline std::enable_if_t{}> +queryDispatch(NearestPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + Kokkos::View *, MemorySpace> pairs( + "ArborX::CrsGraphWrapper::query::nearest::pairs_index_distance", 0); + queryDispatch(NearestPredicateTag{}, tree, space, predicates, + CallbackDefaultNearestPredicateWithDistance{}, pairs, offset, + policy); + callback(predicates, offset, pairs, out); +} + +template +inline std::enable_if_t{} && Kokkos::is_view{}> +queryDispatch(NearestPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Indices &indices, Offset &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + queryDispatch(NearestPredicateTag{}, tree, space, predicates, + CallbackDefaultNearestPredicate{}, indices, offset, policy); +} + +template +inline std::enable_if_t{} && + Kokkos::is_view{} && + Kokkos::is_view{}> +queryDispatch(NearestPredicateTag, Tree const &tree, + ExecutionSpace const &space, Predicates const &predicates, + Indices &indices, Offset &offset, Distances &distances, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + Kokkos::View *, MemorySpace> out( + "ArborX::CrsGraphWrapper::query::nearest::pairs_index_distance", 0); + queryDispatch(NearestPredicateTag{}, tree, space, predicates, + CallbackDefaultNearestPredicateWithDistance{}, out, offset, + policy); + auto const n = out.extent(0); + reallocWithoutInitializing(indices, n); + reallocWithoutInitializing(distances, n); + Kokkos::parallel_for("ArborX::CrsGraphWrapper::query::nearest::split_pairs", + Kokkos::RangePolicy(space, 0, n), + KOKKOS_LAMBDA(int i) { + indices(i) = out(i).first; + distances(i) = out(i).second; + }); +} + +template +std::enable_if_t{} && + !is_tagged_post_callback{}> +check_valid_callback_if_first_argument_is_not_a_view( + Callback const &callback, Predicates const &predicates, + OutputView const &out) +{ + check_valid_callback(callback, predicates, out); +} + +template +std::enable_if_t{} && + is_tagged_post_callback{}> +check_valid_callback_if_first_argument_is_not_a_view(Callback const &, + Predicates const &, + OutputView const &) +{ + // TODO +} + +template +std::enable_if_t{}> +check_valid_callback_if_first_argument_is_not_a_view(View const &, + Predicates const &, + OutputView const &) +{ + // do nothing +} + +template +inline std::enable_if_t>{}> +query(ExecutionSpace const &space, Tree const &tree, + Predicates const &predicates, CallbackOrView &&callback_or_view, + View &&view, Args &&... args) +{ + check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, + predicates, view); + + using Access = AccessTraits; + using Tag = typename AccessTraitsHelper::tag; + + queryDispatch(Tag{}, tree, space, predicates, + std::forward(callback_or_view), + std::forward(view), std::forward(args)...); +} + +} // namespace CrsGraphWrapperImpl + +} // namespace Details +} // namespace ArborX + +#endif diff --git a/src/details/ArborX_DetailsPermutedData.hpp b/src/details/ArborX_DetailsPermutedData.hpp new file mode 100644 index 000000000..10313b5e1 --- /dev/null +++ b/src/details/ArborX_DetailsPermutedData.hpp @@ -0,0 +1,67 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_DETAILS_PERMUTED_DATA_HPP +#define ARBORX_DETAILS_PERMUTED_DATA_HPP + +#include + +namespace ArborX +{ + +namespace Details +{ + +template +struct PermutedData +{ + Data _data; + Permute _permute; + KOKKOS_FUNCTION auto &operator()(int i) const { return _data(_permute(i)); } +}; + +} // namespace Details + +template +struct AccessTraits, + PredicatesTag> +{ + using PermutedPredicates = + Details::PermutedData; + using NativeAccess = AccessTraits; + + static std::size_t size(PermutedPredicates const &permuted_predicates) + { + return NativeAccess::size(permuted_predicates._data); + } + + template + KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, + std::enable_if_t<_Attach, std::size_t> index) + { + auto const permuted_index = permuted_predicates._permute(index); + return attach(NativeAccess::get(permuted_predicates._data, permuted_index), + (int)index); + } + + template + KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, + std::enable_if_t index) + { + auto const permuted_index = permuted_predicates._permute(index); + return NativeAccess::get(permuted_predicates._data, permuted_index); + } + using memory_space = typename NativeAccess::memory_space; +}; + +} // namespace ArborX + +#endif diff --git a/src/details/ArborX_TraversalPolicy.hpp b/src/details/ArborX_TraversalPolicy.hpp new file mode 100644 index 000000000..4d859bfba --- /dev/null +++ b/src/details/ArborX_TraversalPolicy.hpp @@ -0,0 +1,52 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_TRAVERSAL_POLICY_HPP +#define ARBORX_TRAVERSAL_POLICY_HPP + +namespace ArborX +{ +namespace Experimental +{ +struct TraversalPolicy +{ + // Buffer size lets a user provide an upper bound for the number of results + // per query. If the guess is accurate, it avoids performing the tree + // traversals twice (the first one to count the number of results per query, + // the second to actually write down the results at the right location in + // the flattened array) + // + // The default value zero disables the buffer optimization. The sign of the + // integer is used to specify the policy in the case the size insufficient. + // If it is positive, the code falls back to the default behavior and + // performs a second pass. If it is negative, it throws an exception. + int _buffer_size = 0; + + // Sort predicates allows disabling predicate sorting. + bool _sort_predicates = true; + + TraversalPolicy &setBufferSize(int buffer_size) + { + _buffer_size = buffer_size; + return *this; + } + + TraversalPolicy &setPredicateSorting(bool sort_predicates) + { + _sort_predicates = sort_predicates; + return *this; + } +}; + +} // namespace Experimental +} // namespace ArborX + +#endif diff --git a/test/tstDetailsBufferOptimization.cpp b/test/tstDetailsBufferOptimization.cpp index 47e720cd9..98878bfc1 100644 --- a/test/tstDetailsBufferOptimization.cpp +++ b/test/tstDetailsBufferOptimization.cpp @@ -11,10 +11,9 @@ #include "ArborX_EnableDeviceTypes.hpp" // ARBORX_DEVICE_TYPES #include "ArborX_EnableViewComparison.hpp" -#include -//#include -#include // FIXME +#include #include +#include #include @@ -26,11 +25,15 @@ struct Test1 { template - void launch(ExecutionSpace const &space, Predicates const &predicates, - InsertGenerator const &insert_generator) const + void query(ExecutionSpace const &space, Predicates const &predicates, + InsertGenerator const &insert_generator, + ArborX::Experimental::TraversalPolicy const &policy = + ArborX::Experimental::TraversalPolicy()) const { using Access = ArborX::AccessTraits; + std::ignore = policy; + Kokkos::parallel_for( Kokkos::RangePolicy(space, 0, Access::size(predicates)), KOKKOS_LAMBDA(int predicate_index) { @@ -69,10 +72,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(query_impl, DeviceType, ARBORX_DEVICE_TYPES) ArborX::exclusivePrefixSum(ExecutionSpace{}, offset); ArborX::reallocWithoutInitializing(indices, ArborX::lastElement(offset)); - ArborX::Details::queryImpl(ExecutionSpace{}, Test1{}, predicates, - ArborX::Details::CallbackDefaultSpatialPredicate{}, - indices, offset, permute, - ArborX::Details::BufferStatus::PreallocationHard); + ArborX::Details::CrsGraphWrapperImpl::queryImpl( + ExecutionSpace{}, Test1{}, predicates, + ArborX::Details::CallbackDefaultSpatialPredicate{}, indices, offset, + permute, ArborX::Details::BufferStatus::PreallocationHard); auto indices_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, indices); diff --git a/test/tstKokkosToolsAnnotations.cpp b/test/tstKokkosToolsAnnotations.cpp index 9e4d72a4e..abe369050 100644 --- a/test/tstKokkosToolsAnnotations.cpp +++ b/test/tstKokkosToolsAnnotations.cpp @@ -100,7 +100,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(bvh_query_allocations_prefixed, DeviceType, (isPrefixedWith(label, "ArborX::BVH::query::") || isPrefixedWith(label, "ArborX::TreeTraversal::spatial::") || isPrefixedWith(label, "ArborX::TreeTraversal::nearest::") || - isPrefixedWith(label, "ArborX::BufferOptimization::") || + isPrefixedWith(label, "ArborX::CrsGraphWrapper::") || isPrefixedWith(label, "ArborX::Sorting::") || isPrefixedWith(label, "Kokkos::SortImpl::BinSortFunctor::") || isPrefixedWith(label, "Testing::"))); diff --git a/test/tstKokkosToolsDistributedAnnotations.cpp b/test/tstKokkosToolsDistributedAnnotations.cpp index afb056785..6e0824c57 100644 --- a/test/tstKokkosToolsDistributedAnnotations.cpp +++ b/test/tstKokkosToolsDistributedAnnotations.cpp @@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE( isPrefixedWith(label, "ArborX::BVH::query::") || isPrefixedWith(label, "ArborX::TreeTraversal::spatial::") || isPrefixedWith(label, "ArborX::TreeTraversal::nearest::") || - isPrefixedWith(label, "ArborX::BufferOptimization::") || + isPrefixedWith(label, "ArborX::CrsGraphWrapper::") || isPrefixedWith(label, "ArborX::Sorting::") || isPrefixedWith(label, "Kokkos::SortImpl::") || isPrefixedWith(label, "Testing::"))); From 827fcfe81e9ecdeb60640599cd1740b833bb8462 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Sat, 5 Dec 2020 22:01:35 -0500 Subject: [PATCH 02/19] Fix all crs calls (ignoring MPI) --- benchmarks/bvh_driver/bvh_driver.cpp | 22 ++-- examples/callback/example_callback.cpp | 35 +++--- examples/dbscan/ArborX_DBSCAN.hpp | 2 +- src/ArborX.hpp | 1 + test/Search_UnitTestHelpers.hpp | 31 +++++ test/tstLinearBVH.cpp | 161 +++++++++++++------------ 6 files changed, 146 insertions(+), 106 deletions(-) diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index bd87717be..392e15ee9 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -10,6 +10,7 @@ ****************************************************************************/ #include +#include #include #include @@ -225,9 +226,10 @@ void BM_knn_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - index.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setPredicateSorting( - spec.sort_predicates)); + ArborX::query_crs( + ExecutionSpace{}, index, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setPredicateSorting( + spec.sort_predicates)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); @@ -295,10 +297,10 @@ void BM_radius_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - index.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy() - .setPredicateSorting(spec.sort_predicates) - .setBufferSize(spec.buffer_size)); + ArborX::query_crs(ExecutionSpace{}, index, queries, indices, offset, + ArborX::Experimental::TraversalPolicy() + .setPredicateSorting(spec.sort_predicates) + .setBufferSize(spec.buffer_size)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); @@ -613,9 +615,9 @@ int main(int argc, char *argv[]) #endif #ifdef KOKKOS_ENABLE_SERIAL - if (spec.backends == "all" || spec.backends == "rtree") - register_benchmark>( - "BoostRTree", spec); + // if (spec.backends == "all" || spec.backends == "rtree") + // register_benchmark>( + // "BoostRTree", spec); #endif } diff --git a/examples/callback/example_callback.cpp b/examples/callback/example_callback.cpp index 95a8f2fe5..117dd6acb 100644 --- a/examples/callback/example_callback.cpp +++ b/examples/callback/example_callback.cpp @@ -105,17 +105,17 @@ int main(int argc, char *argv[]) { Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - bvh.query(ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, values, - offsets); + ArborX::query_crs(ExecutionSpace{}, bvh, FirstOctant{}, PrintfCallback{}, + values, offsets); #ifndef __NVCC__ - bvh.query(ExecutionSpace{}, FirstOctant{}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, - auto /*output_functor*/) { + ArborX::query_crs(ExecutionSpace{}, bvh, FirstOctant{}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL - printf("Found %d from generic lambda\n", primitive); + printf("Found %d from generic lambda\n", primitive); #endif - }, - values, offsets); + }, + values, offsets); #endif } @@ -123,18 +123,19 @@ int main(int argc, char *argv[]) int const k = 10; Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - bvh.query(ExecutionSpace{}, NearestToOrigin{k}, PrintfCallback{}, values, - offsets); + ArborX::query_crs(ExecutionSpace{}, bvh, NearestToOrigin{k}, + PrintfCallback{}, values, offsets); #ifndef __NVCC__ - bvh.query(ExecutionSpace{}, NearestToOrigin{k}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, float distance, - auto /*output_functor*/) { + ArborX::query_crs(ExecutionSpace{}, bvh, NearestToOrigin{k}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + float distance, auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL - printf("Found %d with distance %.3f from generic lambda\n", - primitive, distance); + printf( + "Found %d with distance %.3f from generic lambda\n", + primitive, distance); #endif - }, - values, offsets); + }, + values, offsets); #endif } diff --git a/examples/dbscan/ArborX_DBSCAN.hpp b/examples/dbscan/ArborX_DBSCAN.hpp index 04d901383..82bacf588 100644 --- a/examples/dbscan/ArborX_DBSCAN.hpp +++ b/examples/dbscan/ArborX_DBSCAN.hpp @@ -221,7 +221,7 @@ void dbscan(ExecutionSpace exec_space, Primitives const &primitives, Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - bvh.query(exec_space, predicates, indices, offset); + ArborX::query_crs(exec_space, bvh, predicates, indices, offset); auto passed = Details::verifyClusters(exec_space, indices, offset, clusters, core_min_size); diff --git a/src/ArborX.hpp b/src/ArborX.hpp index 376b15aab..594679dad 100644 --- a/src/ArborX.hpp +++ b/src/ArborX.hpp @@ -18,6 +18,7 @@ #ifdef ARBORX_ENABLE_MPI #include #endif +#include #include #include #include diff --git a/test/Search_UnitTestHelpers.hpp b/test/Search_UnitTestHelpers.hpp index 704a968c7..55f563960 100644 --- a/test/Search_UnitTestHelpers.hpp +++ b/test/Search_UnitTestHelpers.hpp @@ -103,6 +103,19 @@ auto query(ExecutionSpace const &exec_space, Tree const &tree, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); } +template +auto query(ExecutionSpace const &exec_space, + ArborX::BVH const &tree, Queries const &queries) +{ + using memory_space = MemorySpace; + Kokkos::View values("Testing::values", 0); + Kokkos::View offsets("Testing::offsets", 0); + ArborX::query_crs(exec_space, tree, queries, values, offsets); + return make_compressed_storage( + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); +} + #define ARBORX_TEST_QUERY_TREE(exec_space, tree, queries, reference) \ BOOST_TEST(query(exec_space, tree, queries) == (reference), \ boost::test_tools::per_element()); @@ -124,6 +137,24 @@ auto query_with_distance(ExecutionSpace const &exec_space, Tree const &tree, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); } +template +auto query_with_distance(ExecutionSpace const &exec_space, + ArborX::BVH const &tree, + Queries const &queries) +{ + using memory_space = MemorySpace; + Kokkos::View *, memory_space> values( + "Testing::values", 0); + Kokkos::View offsets("Testing::offsets", 0); + ArborX::query_crs( + exec_space, tree, queries, + ArborX::Details::CallbackDefaultNearestPredicateWithDistance{}, values, + offsets); + return make_compressed_storage( + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); +} + // Workaround for NVCC that complains that the enclosing parent function // (query_with_distance) for an extended __host__ __device__ lambda must not // have deduced return type diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index a288a1088..bbd27d1bd 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -309,7 +309,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, tt::per_element()); }; - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, queries, indices, offset)); + BOOST_CHECK_NO_THROW( + ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query @@ -321,37 +322,38 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, // optimal size BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize( - -max_results_per_query))); + ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize( + -max_results_per_query))); checkResultsAreFine(); // buffer size insufficient BOOST_TEST(max_results_per_query > 1); - BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); checkResultsAreFine(); BOOST_CHECK_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), + ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), ArborX::SearchException); // adequate buffer size BOOST_TEST(max_results_per_query < 5); - BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); checkResultsAreFine(); // passing null size skips the buffer optimization and never throws - BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(0))); + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(0))); checkResultsAreFine(); } @@ -391,13 +393,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0., 0., 0.}}, {{1., 1., 1.}}}, }); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -409,13 +411,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0.5, 0.5, 0.5}}, 2}, }); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query_crs( + ExecutionSpace{}, bvh, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -442,20 +444,20 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(not_exceeding_stack_capacity, DeviceType, Kokkos::View offset("offset", 0); // query number of nearest neighbors that exceed capacity of the stack is // not a problem - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {{{0., 0., 0.}}, n}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query_crs(ExecutionSpace{}, bvh, + makeNearestQueries({ + {{{0., 0., 0.}}, n}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); // spatial query that find all indexable in the tree is also fine - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - {}, - {{{0., 0., 0.}}, {{n, n, n}}}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query_crs(ExecutionSpace{}, bvh, + makeIntersectsBoxQueries({ + {}, + {{{0., 0., 0.}}, {{n, n, n}}}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); } @@ -556,12 +558,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomInlineCallbackSpatialPredicate{points}, custom, - offset); + ArborX::query_crs(ExecutionSpace{}, bvh, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomInlineCallbackSpatialPredicate{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -576,12 +578,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomPostCallbackSpatialPredicate{points}, custom, - offset); + ArborX::query_crs(ExecutionSpace{}, bvh, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomPostCallbackSpatialPredicate{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -596,12 +598,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomInlineCallbackNearestPredicate{}, custom, - offset); + ArborX::query_crs(ExecutionSpace{}, bvh, + makeNearestQueries({ + {origin, n}, + }), + CustomInlineCallbackNearestPredicate{}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -612,11 +614,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomPostCallbackNearestPredicate{}, custom, offset); + ArborX::query_crs(ExecutionSpace{}, bvh, + makeNearestQueries({ + {origin, n}, + }), + CustomPostCallbackNearestPredicate{}, custom, + offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -783,8 +786,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query( - ExecutionSpace{}, + ArborX::query_crs( + ExecutionSpace{}, bvh, makeIntersectsBoxWithAttachmentQueries( {bvh.bounds()}, {delta}), CustomInlineCallbackAttachmentSpatialPredicate{points}, @@ -803,12 +806,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxWithAttachmentQueries>( - {bvh.bounds()}, {{0., delta}}), - CustomPostCallbackAttachmentSpatialPredicate{points}, - custom, offset); + ArborX::query_crs( + ExecutionSpace{}, bvh, + makeIntersectsBoxWithAttachmentQueries>( + {bvh.bounds()}, {{0., delta}}), + CustomPostCallbackAttachmentSpatialPredicate{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -823,11 +827,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestWithAttachmentQueries({{origin, n}}, - {delta}), - CustomInlineCallbackAttachmentNearestPredicate{}, - custom, offset); + ArborX::query_crs( + ExecutionSpace{}, bvh, + makeNearestWithAttachmentQueries({{origin, n}}, + {delta}), + CustomInlineCallbackAttachmentNearestPredicate{}, custom, + offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -838,8 +843,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query( - ExecutionSpace{}, + ArborX::query_crs( + ExecutionSpace{}, bvh, makeNearestWithAttachmentQueries>( {{origin, n}}, {{0, delta}}), CustomPostCallbackAttachmentNearestPredicate{}, custom, @@ -904,7 +909,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) Kokkos::View indices("indices", n); Kokkos::View offset("offset", n); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); auto indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); @@ -1014,7 +1019,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); @@ -1073,7 +1078,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); From eefb7373685964f4a33354b2c3d33b8148d73feb Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Sat, 5 Dec 2020 22:43:57 -0500 Subject: [PATCH 03/19] Fix DistributedTree to call query_crs --- src/details/ArborX_DetailsDistributedTreeImpl.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/details/ArborX_DetailsDistributedTreeImpl.hpp b/src/details/ArborX_DetailsDistributedTreeImpl.hpp index 38edaf1dc..4ea763fb7 100644 --- a/src/details/ArborX_DetailsDistributedTreeImpl.hpp +++ b/src/details/ArborX_DetailsDistributedTreeImpl.hpp @@ -288,7 +288,7 @@ void DistributedTreeImpl::deviseStrategy( auto const &bottom_tree_sizes = tree._bottom_tree_sizes; // Find the k nearest local trees. - top_tree.query(space, queries, indices, offset); + query_crs(space, top_tree, queries, indices, offset); // Accumulate total leave count in the local trees until it reaches k which // is the number of neighbors queried for. Stop if local trees get @@ -380,7 +380,7 @@ void DistributedTreeImpl::reassessStrategy( getGeometry(Access::get(queries, i)), farthest_distances(i)}); }); - top_tree.query(space, radius_searches, indices, offset); + query_crs(space, top_tree, radius_searches, indices, offset); // NOTE: in principle, we could perform radius searches on the bottom_tree // rather than nearest queries. @@ -448,7 +448,7 @@ DistributedTreeImpl::queryDispatchImpl( ranks); // Perform queries that have been received - bottom_tree.query(space, fwd_queries, indices, offset, distances); + query_crs(space, bottom_tree, fwd_queries, indices, offset, distances); // Communicate results back communicateResultsBack(comm, space, indices, offset, ranks, ids, @@ -490,7 +490,7 @@ DistributedTreeImpl::queryDispatch( "ArborX::DistributedTree::query::spatial::indices", 0); Kokkos::View ranks( "ArborX::DistributedTree::query::spatial::ranks", 0); - top_tree.query(space, queries, indices, offset); + query_crs(space, top_tree, queries, indices, offset); { // NOTE_COMM_SPATIAL: The communication pattern here for the spatial search @@ -511,7 +511,7 @@ DistributedTreeImpl::queryDispatch( ranks); // Perform queries that have been received - bottom_tree.query(space, fwd_queries, callback, out, offset); + query_crs(space, bottom_tree, fwd_queries, callback, out, offset); // Communicate results back communicateResultsBack(comm, space, out, offset, ranks, ids); From 20743b622a80ee06ad2afb3e1531233518ba927e Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 15:24:51 -0500 Subject: [PATCH 04/19] Remove old files --- ...orX_DetailsBoundingVolumeHierarchyImpl.hpp | 381 ---------------- .../ArborX_DetailsBufferOptimization.hpp | 409 ------------------ 2 files changed, 790 deletions(-) delete mode 100644 src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp delete mode 100644 src/details/ArborX_DetailsBufferOptimization.hpp diff --git a/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp b/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp deleted file mode 100644 index 94ebbb905..000000000 --- a/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp +++ /dev/null @@ -1,381 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2012-2020 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#ifndef ARBORX_DETAILS_BOUNDING_VOLUME_HIERARCHY_IMPL_HPP -#define ARBORX_DETAILS_BOUNDING_VOLUME_HIERARCHY_IMPL_HPP - -#include -#include -#include -#include -#include // ArithmeticTraits -#include -#include -#include - -#include - -namespace ArborX -{ - -namespace Experimental -{ -struct TraversalPolicy -{ - // Buffer size lets a user provide an upper bound for the number of results - // per query. If the guess is accurate, it avoids performing the tree - // traversals twice (the first one to count the number of results per query, - // the second to actually write down the results at the right location in - // the flattened array) - // - // The default value zero disables the buffer optimization. The sign of the - // integer is used to specify the policy in the case the size insufficient. - // If it is positive, the code falls back to the default behavior and - // performs a second pass. If it is negative, it throws an exception. - int _buffer_size = 0; - - // Sort predicates allows disabling predicate sorting. - bool _sort_predicates = true; - - TraversalPolicy &setBufferSize(int buffer_size) - { - _buffer_size = buffer_size; - return *this; - } - - TraversalPolicy &setPredicateSorting(bool sort_predicates) - { - _sort_predicates = sort_predicates; - return *this; - } -}; - -} // namespace Experimental - -namespace Details -{ - -// This class is the top level query distribution and search algorithm. It is -// implementation specific tree traversal. -// NOTE: There is nothing specific here about spatial, thus one should be able -// to rewrite nearest using the same structure, with a benefit of potentially -// adding threading. -template -struct BVHParallelTreeTraversal -{ - BVH _bvh; - - template - void launch(ExecutionSpace const &space, Predicates const predicates, - InsertGenerator const &insert_generator) const - { - traverse(space, _bvh, predicates, insert_generator); - } -}; - -struct Iota -{ - KOKKOS_FUNCTION unsigned int operator()(int const i) const { return i; } -}; - -namespace BoundingVolumeHierarchyImpl -{ -// Views are passed by reference here because internally Kokkos::realloc() -// is called. -template -std::enable_if_t{} && - Kokkos::is_view{} && Kokkos::is_view{}> -queryDispatch(SpatialPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - using DeviceType = Kokkos::Device; - - check_valid_callback(callback, predicates, out); - - Kokkos::Profiling::pushRegion("ArborX::BVH::query::spatial"); - - using Access = AccessTraits; - auto const n_queries = Access::size(predicates); - - Kokkos::Profiling::pushRegion("ArborX::BVH::query::spatial::init_and_alloc"); - reallocWithoutInitializing(offset, n_queries + 1); - - int const buffer_size = std::abs(policy._buffer_size); - if (buffer_size > 0) - { - Kokkos::deep_copy(space, offset, buffer_size); - exclusivePrefixSum(space, offset); - // Use calculation for the size to avoid calling lastElement(offset) as it - // will launch an extra kernel to copy to host. And there is unnecessary to - // fill with invalid indices. - reallocWithoutInitializing(out, n_queries * buffer_size); - } - else - { - Kokkos::deep_copy(offset, 0); - } - Kokkos::Profiling::popRegion(); - - if (policy._sort_predicates) - { - Kokkos::Profiling::pushRegion( - "ArborX::BVH::query::spatial::compute_permutation"); - auto permute = - Details::BatchedQueries::sortQueriesAlongZOrderCurve( - space, bvh.bounds(), predicates); - Kokkos::Profiling::popRegion(); - - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, toBufferStatus(policy._buffer_size)); - } - else - { - Iota permute; - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, toBufferStatus(policy._buffer_size)); - } - - Kokkos::Profiling::popRegion(); -} - -template -inline std::enable_if_t{} && Kokkos::is_view{}> -queryDispatch(SpatialPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Indices &indices, Offset &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - queryDispatch(SpatialPredicateTag{}, bvh, space, predicates, - CallbackDefaultSpatialPredicate{}, indices, offset, policy); -} - -template -inline std::enable_if_t{}> -queryDispatch(SpatialPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - Kokkos::View indices( - "ArborX::BVH::query::spatial::indices", 0); - queryDispatch(SpatialPredicateTag{}, bvh, space, predicates, indices, offset, - policy); - callback(predicates, offset, indices, out); -} - -template -std::enable_if_t{} && - Kokkos::is_view{} && Kokkos::is_view{}> -queryDispatch(NearestPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - using DeviceType = Kokkos::Device; - - check_valid_callback(callback, predicates, out); - - Kokkos::Profiling::pushRegion("ArborX::BVH::query::nearest"); - - using Access = AccessTraits; - auto const n_queries = Access::size(predicates); - - Kokkos::Profiling::pushRegion("ArborX::BVH::query::nearest::init_and_alloc"); - - reallocWithoutInitializing(offset, n_queries + 1); - Kokkos::parallel_for( - "ArborX::BVH::query::nearest::" - "scan_queries_for_numbers_of_nearest_neighbors", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int i) { offset(i) = getK(Access::get(predicates, i)); }); - exclusivePrefixSum(space, offset); - - int const n_results = lastElement(offset); - reallocWithoutInitializing(out, n_results); - - Kokkos::Profiling::popRegion(); - - if (policy._sort_predicates) - { - Kokkos::Profiling::pushRegion( - "ArborX::BVH::query::nearest::compute_permutation"); - auto permute = - Details::BatchedQueries::sortQueriesAlongZOrderCurve( - space, bvh.bounds(), predicates); - Kokkos::Profiling::popRegion(); - - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, BufferStatus::PreallocationSoft); - } - else - { - Iota permute; - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, BufferStatus::PreallocationSoft); - } - - Kokkos::Profiling::popRegion(); -} - -template -inline std::enable_if_t{}> -queryDispatch(NearestPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - Kokkos::View *, MemorySpace> pairs( - "ArborX::BVH::query::nearest::pairs_index_distance", 0); - queryDispatch(NearestPredicateTag{}, bvh, space, predicates, - CallbackDefaultNearestPredicateWithDistance{}, pairs, offset, - policy); - callback(predicates, offset, pairs, out); -} - -template -inline std::enable_if_t{} && Kokkos::is_view{}> -queryDispatch(NearestPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Indices &indices, Offset &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - queryDispatch(NearestPredicateTag{}, bvh, space, predicates, - CallbackDefaultNearestPredicate{}, indices, offset, policy); -} - -template -inline std::enable_if_t{} && - Kokkos::is_view{} && - Kokkos::is_view{}> -queryDispatch(NearestPredicateTag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Indices &indices, Offset &offset, - Distances &distances, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - Kokkos::View *, MemorySpace> out( - "ArborX::BVH::query::nearest::pairs_index_distance", 0); - queryDispatch(NearestPredicateTag{}, bvh, space, predicates, - CallbackDefaultNearestPredicateWithDistance{}, out, offset, - policy); - auto const n = out.extent(0); - reallocWithoutInitializing(indices, n); - reallocWithoutInitializing(distances, n); - Kokkos::parallel_for("ArborX::BVH::query::nearest::split_pairs", - Kokkos::RangePolicy(space, 0, n), - KOKKOS_LAMBDA(int i) { - indices(i) = out(i).first; - distances(i) = out(i).second; - }); -} - -template -std::enable_if_t{} && - !is_tagged_post_callback{}> -check_valid_callback_if_first_argument_is_not_a_view( - Callback const &callback, Predicates const &predicates, - OutputView const &out) -{ - check_valid_callback(callback, predicates, out); -} - -template -std::enable_if_t{} && - is_tagged_post_callback{}> -check_valid_callback_if_first_argument_is_not_a_view(Callback const &, - Predicates const &, - OutputView const &) -{ - // TODO -} - -template -std::enable_if_t{}> -check_valid_callback_if_first_argument_is_not_a_view(View const &, - Predicates const &, - OutputView const &) -{ - // do nothing -} - -template -inline std::enable_if_t>{}> -query(ExecutionSpace const &space, BVH const &bvh, Predicates const &predicates, - CallbackOrView &&callback_or_view, View &&view, Args &&... args) -{ - check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, - predicates, view); - - using Access = AccessTraits; - using Tag = typename AccessTraitsHelper::tag; - - queryDispatch(Tag{}, bvh, space, predicates, - std::forward(callback_or_view), - std::forward(view), std::forward(args)...); -} - -template -inline void query(ExecutionSpace const &space, BVH const &bvh, - Predicates const &predicates, Callback const &callback, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - check_valid_callback(callback, predicates); - - Kokkos::Profiling::pushRegion("ArborX::BVH::query"); - - if (policy._sort_predicates) - { - Kokkos::Profiling::pushRegion("ArborX::BVH::query::compute_permutation"); - using MemorySpace = typename BVH::memory_space; - using DeviceType = Kokkos::Device; - auto permute = - Details::BatchedQueries::sortQueriesAlongZOrderCurve( - space, bvh.bounds(), predicates); - Kokkos::Profiling::popRegion(); - - using PermutedPredicates = PermutedData; - traverse(space, bvh, PermutedPredicates{predicates, permute}, callback); - } - else - { - traverse(space, bvh, predicates, callback); - } - - Kokkos::Profiling::popRegion(); -} - -} // namespace BoundingVolumeHierarchyImpl -} // namespace Details -} // namespace ArborX - -#endif diff --git a/src/details/ArborX_DetailsBufferOptimization.hpp b/src/details/ArborX_DetailsBufferOptimization.hpp deleted file mode 100644 index 30516d13d..000000000 --- a/src/details/ArborX_DetailsBufferOptimization.hpp +++ /dev/null @@ -1,409 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2012-2020 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ -#ifndef ARBORX_DETAILS_BUFFER_OPTIMIZATON_HPP -#define ARBORX_DETAILS_BUFFER_OPTIMIZATON_HPP - -#include -#include -#include - -#include - -namespace ArborX -{ -namespace Details -{ - -enum BufferStatus -{ - PreallocationNone = 0, - PreallocationHard = -1, - PreallocationSoft = 1 -}; - -inline BufferStatus toBufferStatus(int buffer_size) -{ - if (buffer_size == 0) - return BufferStatus::PreallocationNone; - if (buffer_size > 0) - return BufferStatus::PreallocationSoft; - return BufferStatus::PreallocationHard; -} - -struct FirstPassTag -{ -}; -struct FirstPassNoBufferOptimizationTag -{ -}; -struct SecondPassTag -{ -}; - -template -struct InsertGenerator -{ - Callback _callback; - OutputView _out; - CountView _counts; - PermutedOffset _permuted_offset; - - using ValueType = typename OutputView::value_type; - using Access = AccessTraits; - using Tag = typename AccessTraitsHelper::tag; - using PredicateType = typename AccessTraitsHelper::type; - - template < - typename U = PassTag, typename V = Tag, - std::enable_if_t{} && - std::is_same{}> * = nullptr> - KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, - int primitive_index) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - // With permutation, we access offset in random manner, and - // _offset(permutated_predicate_index+1) may be in a completely different - // place. Instead, use pointers to get the correct value for the buffer - // size. For this reason, also take a reference for offset. - auto const &offset = _permuted_offset(predicate_index); - auto const buffer_size = *(&offset + 1) - offset; - auto &count = _counts(predicate_index); - - return _callback(raw_predicate, primitive_index, - [&](ValueType const &value) { - int count_old = Kokkos::atomic_fetch_add(&count, 1); - if (count_old < buffer_size) - _out(offset + count_old) = value; - }); - } - template - KOKKOS_FUNCTION std::enable_if_t{} && - std::is_same{}> - operator()(PredicateType const &predicate, int primitive_index, - float distance) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - // With permutation, we access offset in random manner, and - // _offset(permutated_predicate_index+1) may be in a completely different - // place. Instead, use pointers to get the correct value for the buffer - // size. For this reason, also take a reference for offset. - auto const &offset = _permuted_offset(predicate_index); - auto const buffer_size = *(&offset + 1) - offset; - auto &count = _counts(predicate_index); - - _callback(raw_predicate, primitive_index, distance, - [&](ValueType const &value) { - int count_old = Kokkos::atomic_fetch_add(&count, 1); - if (count_old < buffer_size) - _out(offset + count_old) = value; - }); - } - - template < - typename U = PassTag, typename V = Tag, - std::enable_if_t{} && - std::is_same{}> * = nullptr> - KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, - int primitive_index) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - - auto &count = _counts(predicate_index); - - return _callback(raw_predicate, primitive_index, [&](ValueType const &) { - Kokkos::atomic_fetch_add(&count, 1); - }); - } - - template - KOKKOS_FUNCTION - std::enable_if_t{} && - std::is_same{}> - operator()(PredicateType const &predicate, int primitive_index, - float distance) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - - auto &count = _counts(predicate_index); - - _callback(raw_predicate, primitive_index, distance, - [&](ValueType const &) { Kokkos::atomic_fetch_add(&count, 1); }); - } - - template < - typename U = PassTag, typename V = Tag, - std::enable_if_t{} && - std::is_same{}> * = nullptr> - KOKKOS_FUNCTION auto operator()(PredicateType const &predicate, - int primitive_index) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - - // we store offsets in counts, and offset(permute(i)) = counts(i) - auto &offset = _counts(predicate_index); - - // TODO: there is a tradeoff here between skipping computation offset + - // count, and atomic increment of count. I think atomically incrementing - // offset is problematic for OpenMP as you potentially constantly steal - // cache lines. - return _callback(raw_predicate, primitive_index, - [&](ValueType const &value) { - _out(Kokkos::atomic_fetch_add(&offset, 1)) = value; - }); - } - - template - KOKKOS_FUNCTION std::enable_if_t{} && - std::is_same{}> - operator()(PredicateType const &predicate, int primitive_index, - float distance) const - { - auto const predicate_index = getData(predicate); - auto const &raw_predicate = getPredicate(predicate); - - // we store offsets in counts, and offset(permute(i)) = counts(i) - auto &offset = _counts(predicate_index); - - // TODO: there is a tradeoff here between skipping computation offset + - // count, and atomic increment of count. I think atomically incrementing - // offset is problematic for OpenMP as you potentially constantly steal - // cache lines. - _callback(raw_predicate, primitive_index, distance, - [&](ValueType const &value) { - _out(Kokkos::atomic_fetch_add(&offset, 1)) = value; - }); - } -}; - -template -struct PermutedData -{ - Data _data; - Permute _permute; - KOKKOS_FUNCTION auto &operator()(int i) const { return _data(_permute(i)); } -}; - -} // namespace Details - -template -struct AccessTraits, - PredicatesTag> -{ - using PermutedPredicates = - Details::PermutedData; - using NativeAccess = AccessTraits; - - static std::size_t size(PermutedPredicates const &permuted_predicates) - { - return NativeAccess::size(permuted_predicates._data); - } - - template - KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, - std::enable_if_t<_Attach, std::size_t> index) - { - auto const permuted_index = permuted_predicates._permute(index); - return attach(NativeAccess::get(permuted_predicates._data, permuted_index), - (int)index); - } - - template - KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, - std::enable_if_t index) - { - auto const permuted_index = permuted_predicates._permute(index); - return NativeAccess::get(permuted_predicates._data, permuted_index); - } - using memory_space = typename NativeAccess::memory_space; -}; - -namespace Details -{ - -template -void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, PermuteType permute, - BufferStatus buffer_status) -{ - // pre-condition: offset and out are preallocated. If buffer_size > 0, offset - // is pre-initialized - - static_assert(Kokkos::is_execution_space{}, ""); - - using Access = AccessTraits; - auto const n_queries = Access::size(predicates); - - Kokkos::Profiling::pushRegion("ArborX::BufferOptimization::two_pass"); - - using CountView = OffsetView; - CountView counts( - Kokkos::view_alloc("ArborX::BufferOptimization::counts", space), - n_queries); - - using PermutedPredicates = - PermutedData; - PermutedPredicates permuted_predicates = {predicates, permute}; - - using PermutedOffset = PermutedData; - PermutedOffset permuted_offset = {offset, permute}; - - Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass::first_pass"); - bool underflow = false; - bool overflow = false; - if (buffer_status != BufferStatus::PreallocationNone) - { - tree_traversal.launch( - space, permuted_predicates, - InsertGenerator{callback, out, counts, - permuted_offset}); - - // Detecting overflow is a local operation that needs to be done for every - // index. We allow individual buffer sizes to differ, so it's not as easy - // as computing max counts. - int overflow_int = 0; - Kokkos::parallel_reduce( - "ArborX::BufferOptimization::compute_overflow", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int i, int &update) { - auto const *const offset_ptr = &permuted_offset(i); - if (counts(i) > *(offset_ptr + 1) - *offset_ptr) - update = 1; - }, - overflow_int); - overflow = (overflow_int > 0); - - if (!overflow) - { - int n_results = 0; - Kokkos::parallel_reduce( - "ArborX::BufferOptimization::compute_underflow", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int i, int &update) { update += counts(i); }, - n_results); - underflow = (n_results < out.extent_int(0)); - } - } - else - { - tree_traversal.launch( - space, permuted_predicates, - InsertGenerator{ - callback, out, counts, permuted_offset}); - // This may not be true, but it does not matter. As long as we have - // (n_results == 0) check before second pass, this value is not used. - // Otherwise, we know it's overflowed as there is no allocation. - overflow = true; - } - - Kokkos::Profiling::popRegion(); - Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::first_pass_postprocess"); - - OffsetView preallocated_offset("ArborX::BufferOptimization::offset_copy", 0); - if (underflow) - { - // Store a copy of the original offset. We'll need it for compression. - preallocated_offset = clone(space, offset); - } - - Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_counts_to_offsets", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int const i) { permuted_offset(i) = counts(i); }); - exclusivePrefixSum(space, offset); - - int const n_results = lastElement(offset); - - Kokkos::Profiling::popRegion(); - - if (n_results == 0) - { - // Exit early if either no results were found for any of the queries, or - // nothing was inserted inside a callback for found results. This check - // guarantees that the second pass will not be executed. - Kokkos::resize(out, 0); - // FIXME: do we need to reset offset if it was preallocated here? - Kokkos::Profiling::popRegion(); - return; - } - - if (overflow || buffer_status == BufferStatus::PreallocationNone) - { - // Not enough (individual) storage for results - - // If it was hard preallocation, we simply throw - ARBORX_ASSERT(buffer_status != BufferStatus::PreallocationHard); - - // Otherwise, do the second pass - Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass:second_pass"); - - Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_offsets_to_counts", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int const i) { counts(i) = permuted_offset(i); }); - - reallocWithoutInitializing(out, n_results); - - tree_traversal.launch( - space, permuted_predicates, - InsertGenerator{callback, out, counts, - permuted_offset}); - - Kokkos::Profiling::popRegion(); - } - else if (underflow) - { - // More than enough storage for results, need compression - Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass:copy_values"); - - OutputView tmp_out(Kokkos::ViewAllocateWithoutInitializing(out.label()), - n_results); - - Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_valid_values", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int i) { - int count = offset(i + 1) - offset(i); - for (int j = 0; j < count; ++j) - { - tmp_out(offset(i) + j) = out(preallocated_offset(i) + j); - } - }); - out = tmp_out; - - Kokkos::Profiling::popRegion(); - } - else - { - // The allocated storage was exactly enough for results, do nothing - } - Kokkos::Profiling::popRegion(); -} - -} // namespace Details -} // namespace ArborX - -#endif From a8b47783ccdeccf9baf8f9a37e65cee2b58d876f Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 17:02:02 -0500 Subject: [PATCH 05/19] Remove unnecessary argument in the test Co-authored-by: Daniel Arndt --- test/tstDetailsBufferOptimization.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/tstDetailsBufferOptimization.cpp b/test/tstDetailsBufferOptimization.cpp index 98878bfc1..af4597766 100644 --- a/test/tstDetailsBufferOptimization.cpp +++ b/test/tstDetailsBufferOptimization.cpp @@ -27,13 +27,11 @@ struct Test1 typename InsertGenerator> void query(ExecutionSpace const &space, Predicates const &predicates, InsertGenerator const &insert_generator, - ArborX::Experimental::TraversalPolicy const &policy = + ArborX::Experimental::TraversalPolicy const & = ArborX::Experimental::TraversalPolicy()) const { using Access = ArborX::AccessTraits; - std::ignore = policy; - Kokkos::parallel_for( Kokkos::RangePolicy(space, 0, Access::size(predicates)), KOKKOS_LAMBDA(int predicate_index) { From 39297d76fc1677eace23b095269d79fbf1dd3828 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 18:55:02 -0500 Subject: [PATCH 06/19] Allow query in BVH to take optional execution space argument --- src/ArborX_LinearBVH.hpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 9056438b6..572c16fdd 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -170,11 +170,20 @@ class BoundingVolumeHierarchy< { } // clang-format on - template - void query(Args &&... args) const + template + std::enable_if_t::value> + query(FirstArgumentType const &space, Args &&... args) const { BoundingVolumeHierarchy::query( - std::forward(args)...); + space, std::forward(args)...); + } + template + std::enable_if_t::value> + query(FirstArgumentType &&arg1, Args &&... args) const + { + BoundingVolumeHierarchy::query( + typename DeviceType::execution_space{}, + std::forward(arg1), std::forward(args)...); } }; From 8fd65388ad371f8f9f94b0ded0af8e12eaa14278 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 18:55:39 -0500 Subject: [PATCH 07/19] Do now alias *this to bvh --- src/ArborX_LinearBVH.hpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 572c16fdd..8b41afebc 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -76,8 +76,7 @@ class BoundingVolumeHierarchy query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - auto const &bvh = *this; - ArborX::query_crs(space, bvh, predicates, + ArborX::query_crs(space, *this, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); } @@ -273,7 +272,6 @@ void BoundingVolumeHierarchy::query( Kokkos::Profiling::pushRegion("ArborX::BVH::query"); - auto const &bvh = *this; if (policy._sort_predicates) { Kokkos::Profiling::pushRegion("ArborX::BVH::query::compute_permutation"); @@ -285,12 +283,12 @@ void BoundingVolumeHierarchy::query( using PermutedPredicates = Details::PermutedData; - Details::traverse(space, bvh, PermutedPredicates{predicates, permute}, + Details::traverse(space, *this, PermutedPredicates{predicates, permute}, callback); } else { - Details::traverse(space, bvh, predicates, callback); + Details::traverse(space, *this, predicates, callback); } Kokkos::Profiling::popRegion(); From 1799f746a4eb459b88e06fda5d2b1852d9ebd069 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 19:44:06 -0500 Subject: [PATCH 08/19] Swap order of tree and execution space in query_crs --- benchmarks/bvh_driver/bvh_driver.cpp | 4 +- .../example_cuda_access_traits.cpp | 2 +- examples/callback/example_callback.cpp | 8 ++-- examples/dbscan/ArborX_DBSCAN.hpp | 2 +- src/ArborX_CrsGraphWrapper.hpp | 6 +-- src/ArborX_LinearBVH.hpp | 2 +- .../ArborX_DetailsCrsGraphWrapperImpl.hpp | 4 +- .../ArborX_DetailsDistributedTreeImpl.hpp | 10 ++-- test/Search_UnitTestHelpers.hpp | 4 +- test/tstLinearBVH.cpp | 48 +++++++++---------- 10 files changed, 45 insertions(+), 45 deletions(-) diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index 392e15ee9..0f52ec8b5 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -227,7 +227,7 @@ void BM_knn_search(benchmark::State &state, Spec const &spec) Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); ArborX::query_crs( - ExecutionSpace{}, index, queries, indices, offset, + index, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting( spec.sort_predicates)); auto const end = std::chrono::high_resolution_clock::now(); @@ -297,7 +297,7 @@ void BM_radius_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - ArborX::query_crs(ExecutionSpace{}, index, queries, indices, offset, + ArborX::query_crs(index, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy() .setPredicateSorting(spec.sort_predicates) .setBufferSize(spec.buffer_size)); diff --git a/examples/access_traits/example_cuda_access_traits.cpp b/examples/access_traits/example_cuda_access_traits.cpp index 0123a6565..a4dd10f9f 100644 --- a/examples/access_traits/example_cuda_access_traits.cpp +++ b/examples/access_traits/example_cuda_access_traits.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(cuda, bvh, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); + ArborX::query_crs(bvh, cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); Kokkos::parallel_for(Kokkos::RangePolicy(cuda, 0, N), KOKKOS_LAMBDA(int i) { diff --git a/examples/callback/example_callback.cpp b/examples/callback/example_callback.cpp index 117dd6acb..0949db43a 100644 --- a/examples/callback/example_callback.cpp +++ b/examples/callback/example_callback.cpp @@ -105,10 +105,10 @@ int main(int argc, char *argv[]) { Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, FirstOctant{}, PrintfCallback{}, + ArborX::query_crs(bvh, ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, values, offsets); #ifndef __NVCC__ - ArborX::query_crs(ExecutionSpace{}, bvh, FirstOctant{}, + ArborX::query_crs(bvh, ExecutionSpace{}, FirstOctant{}, KOKKOS_LAMBDA(auto /*predicate*/, int primitive, auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL @@ -123,10 +123,10 @@ int main(int argc, char *argv[]) int const k = 10; Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, NearestToOrigin{k}, + ArborX::query_crs(bvh, ExecutionSpace{}, NearestToOrigin{k}, PrintfCallback{}, values, offsets); #ifndef __NVCC__ - ArborX::query_crs(ExecutionSpace{}, bvh, NearestToOrigin{k}, + ArborX::query_crs(bvh, ExecutionSpace{}, NearestToOrigin{k}, KOKKOS_LAMBDA(auto /*predicate*/, int primitive, float distance, auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL diff --git a/examples/dbscan/ArborX_DBSCAN.hpp b/examples/dbscan/ArborX_DBSCAN.hpp index 82bacf588..ec0c1eeaa 100644 --- a/examples/dbscan/ArborX_DBSCAN.hpp +++ b/examples/dbscan/ArborX_DBSCAN.hpp @@ -221,7 +221,7 @@ void dbscan(ExecutionSpace exec_space, Primitives const &primitives, Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(exec_space, bvh, predicates, indices, offset); + ArborX::query_crs(bvh, exec_space, predicates, indices, offset); auto passed = Details::verifyClusters(exec_space, indices, offset, clusters, core_min_size); diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp index e91d70a04..7e065e687 100644 --- a/src/ArborX_CrsGraphWrapper.hpp +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -17,15 +17,15 @@ namespace ArborX { -template -inline void query_crs(ExecutionSpace const &space, Tree const &tree, +inline void query_crs(Tree const &tree, ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) { Details::CrsGraphWrapperImpl::query( - space, tree, predicates, std::forward(callback_or_view), + tree, space, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); } diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 8b41afebc..8dd3dc5ff 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -76,7 +76,7 @@ class BoundingVolumeHierarchy query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - ArborX::query_crs(space, *this, predicates, + ArborX::query_crs(*this, space, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); } diff --git a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp index fe51cf42a..24c29a567 100644 --- a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp +++ b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp @@ -610,10 +610,10 @@ check_valid_callback_if_first_argument_is_not_a_view(View const &, // do nothing } -template inline std::enable_if_t>{}> -query(ExecutionSpace const &space, Tree const &tree, +query(Tree const &tree, ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) { diff --git a/src/details/ArborX_DetailsDistributedTreeImpl.hpp b/src/details/ArborX_DetailsDistributedTreeImpl.hpp index 4ea763fb7..b5a924d27 100644 --- a/src/details/ArborX_DetailsDistributedTreeImpl.hpp +++ b/src/details/ArborX_DetailsDistributedTreeImpl.hpp @@ -288,7 +288,7 @@ void DistributedTreeImpl::deviseStrategy( auto const &bottom_tree_sizes = tree._bottom_tree_sizes; // Find the k nearest local trees. - query_crs(space, top_tree, queries, indices, offset); + query_crs(top_tree, space, queries, indices, offset); // Accumulate total leave count in the local trees until it reaches k which // is the number of neighbors queried for. Stop if local trees get @@ -380,7 +380,7 @@ void DistributedTreeImpl::reassessStrategy( getGeometry(Access::get(queries, i)), farthest_distances(i)}); }); - query_crs(space, top_tree, radius_searches, indices, offset); + query_crs(top_tree, space, radius_searches, indices, offset); // NOTE: in principle, we could perform radius searches on the bottom_tree // rather than nearest queries. @@ -448,7 +448,7 @@ DistributedTreeImpl::queryDispatchImpl( ranks); // Perform queries that have been received - query_crs(space, bottom_tree, fwd_queries, indices, offset, distances); + query_crs(bottom_tree, space, fwd_queries, indices, offset, distances); // Communicate results back communicateResultsBack(comm, space, indices, offset, ranks, ids, @@ -490,7 +490,7 @@ DistributedTreeImpl::queryDispatch( "ArborX::DistributedTree::query::spatial::indices", 0); Kokkos::View ranks( "ArborX::DistributedTree::query::spatial::ranks", 0); - query_crs(space, top_tree, queries, indices, offset); + query_crs(top_tree, space, queries, indices, offset); { // NOTE_COMM_SPATIAL: The communication pattern here for the spatial search @@ -511,7 +511,7 @@ DistributedTreeImpl::queryDispatch( ranks); // Perform queries that have been received - query_crs(space, bottom_tree, fwd_queries, callback, out, offset); + query_crs(bottom_tree, space, fwd_queries, callback, out, offset); // Communicate results back communicateResultsBack(comm, space, out, offset, ranks, ids); diff --git a/test/Search_UnitTestHelpers.hpp b/test/Search_UnitTestHelpers.hpp index 55f563960..4b353dead 100644 --- a/test/Search_UnitTestHelpers.hpp +++ b/test/Search_UnitTestHelpers.hpp @@ -110,7 +110,7 @@ auto query(ExecutionSpace const &exec_space, using memory_space = MemorySpace; Kokkos::View values("Testing::values", 0); Kokkos::View offsets("Testing::offsets", 0); - ArborX::query_crs(exec_space, tree, queries, values, offsets); + ArborX::query_crs(tree, exec_space, queries, values, offsets); return make_compressed_storage( Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); @@ -147,7 +147,7 @@ auto query_with_distance(ExecutionSpace const &exec_space, "Testing::values", 0); Kokkos::View offsets("Testing::offsets", 0); ArborX::query_crs( - exec_space, tree, queries, + tree, exec_space, queries, ArborX::Details::CallbackDefaultNearestPredicateWithDistance{}, values, offsets); return make_compressed_storage( diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index bbd27d1bd..e56db4295 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -310,7 +310,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, }; BOOST_CHECK_NO_THROW( - ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset)); + ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query @@ -322,7 +322,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, // optimal size BOOST_CHECK_NO_THROW( - ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset, + ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize( -max_results_per_query))); checkResultsAreFine(); @@ -330,29 +330,29 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, // buffer size insufficient BOOST_TEST(max_results_per_query > 1); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); checkResultsAreFine(); BOOST_CHECK_THROW( ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), ArborX::SearchException); // adequate buffer size BOOST_TEST(max_results_per_query < 5); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); checkResultsAreFine(); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); checkResultsAreFine(); // passing null size skips the buffer optimization and never throws BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setBufferSize(0))); checkResultsAreFine(); } @@ -394,12 +394,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, }); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -412,12 +412,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, }); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); BOOST_CHECK_NO_THROW(ArborX::query_crs( - ExecutionSpace{}, bvh, queries, indices, offset, + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -444,7 +444,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(not_exceeding_stack_capacity, DeviceType, Kokkos::View offset("offset", 0); // query number of nearest neighbors that exceed capacity of the stack is // not a problem - BOOST_CHECK_NO_THROW(ArborX::query_crs(ExecutionSpace{}, bvh, + BOOST_CHECK_NO_THROW(ArborX::query_crs(bvh, ExecutionSpace{}, makeNearestQueries({ {{{0., 0., 0.}}, n}, }), @@ -452,7 +452,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(not_exceeding_stack_capacity, DeviceType, BOOST_TEST(ArborX::lastElement(offset) == n); // spatial query that find all indexable in the tree is also fine - BOOST_CHECK_NO_THROW(ArborX::query_crs(ExecutionSpace{}, bvh, + BOOST_CHECK_NO_THROW(ArborX::query_crs(bvh, ExecutionSpace{}, makeIntersectsBoxQueries({ {}, {{{0., 0., 0.}}, {{n, n, n}}}, @@ -558,7 +558,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, + ArborX::query_crs(bvh, ExecutionSpace{}, makeIntersectsBoxQueries({ bvh.bounds(), }), @@ -578,7 +578,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, + ArborX::query_crs(bvh, ExecutionSpace{}, makeIntersectsBoxQueries({ bvh.bounds(), }), @@ -598,7 +598,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, + ArborX::query_crs(bvh, ExecutionSpace{}, makeNearestQueries({ {origin, n}, }), @@ -614,7 +614,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(ExecutionSpace{}, bvh, + ArborX::query_crs(bvh, ExecutionSpace{}, makeNearestQueries({ {origin, n}, }), @@ -787,7 +787,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); ArborX::query_crs( - ExecutionSpace{}, bvh, + bvh, ExecutionSpace{}, makeIntersectsBoxWithAttachmentQueries( {bvh.bounds()}, {delta}), CustomInlineCallbackAttachmentSpatialPredicate{points}, @@ -807,7 +807,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); ArborX::query_crs( - ExecutionSpace{}, bvh, + bvh, ExecutionSpace{}, makeIntersectsBoxWithAttachmentQueries>( {bvh.bounds()}, {{0., delta}}), @@ -828,7 +828,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); ArborX::query_crs( - ExecutionSpace{}, bvh, + bvh, ExecutionSpace{}, makeNearestWithAttachmentQueries({{origin, n}}, {delta}), CustomInlineCallbackAttachmentNearestPredicate{}, custom, @@ -844,7 +844,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); ArborX::query_crs( - ExecutionSpace{}, bvh, + bvh, ExecutionSpace{}, makeNearestWithAttachmentQueries>( {{origin, n}}, {{0, delta}}), CustomPostCallbackAttachmentNearestPredicate{}, custom, @@ -909,7 +909,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) Kokkos::View indices("indices", n); Kokkos::View offset("offset", n); - ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); + ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); auto indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); @@ -1019,7 +1019,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); + ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); @@ -1078,7 +1078,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - ArborX::query_crs(ExecutionSpace{}, bvh, queries, indices, offset); + ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); From 455a266844d9baace020b15f9836e7d7fc068775 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 8 Dec 2020 19:58:57 -0500 Subject: [PATCH 09/19] Rename query_crs back to query --- benchmarks/bvh_driver/bvh_driver.cpp | 15 +- .../example_cuda_access_traits.cpp | 2 +- examples/callback/example_callback.cpp | 35 +++-- examples/dbscan/ArborX_DBSCAN.hpp | 2 +- src/ArborX_CrsGraphWrapper.hpp | 8 +- src/ArborX_LinearBVH.hpp | 8 +- .../ArborX_DetailsDistributedTreeImpl.hpp | 10 +- test/Search_UnitTestHelpers.hpp | 9 +- test/tstLinearBVH.cpp | 138 +++++++++--------- 9 files changed, 111 insertions(+), 116 deletions(-) diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index 0f52ec8b5..49b9b6c32 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -226,10 +226,9 @@ void BM_knn_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - ArborX::query_crs( - index, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setPredicateSorting( - spec.sort_predicates)); + ArborX::query(index, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setPredicateSorting( + spec.sort_predicates)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); @@ -297,10 +296,10 @@ void BM_radius_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - ArborX::query_crs(index, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy() - .setPredicateSorting(spec.sort_predicates) - .setBufferSize(spec.buffer_size)); + ArborX::query(index, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy() + .setPredicateSorting(spec.sort_predicates) + .setBufferSize(spec.buffer_size)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); diff --git a/examples/access_traits/example_cuda_access_traits.cpp b/examples/access_traits/example_cuda_access_traits.cpp index a4dd10f9f..05a4c61f8 100644 --- a/examples/access_traits/example_cuda_access_traits.cpp +++ b/examples/access_traits/example_cuda_access_traits.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); + ArborX::query(bvh, cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); Kokkos::parallel_for(Kokkos::RangePolicy(cuda, 0, N), KOKKOS_LAMBDA(int i) { diff --git a/examples/callback/example_callback.cpp b/examples/callback/example_callback.cpp index 0949db43a..931acf812 100644 --- a/examples/callback/example_callback.cpp +++ b/examples/callback/example_callback.cpp @@ -105,17 +105,17 @@ int main(int argc, char *argv[]) { Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, - values, offsets); + ArborX::query(bvh, ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, + values, offsets); #ifndef __NVCC__ - ArborX::query_crs(bvh, ExecutionSpace{}, FirstOctant{}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, - auto /*output_functor*/) { + ArborX::query(bvh, ExecutionSpace{}, FirstOctant{}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL - printf("Found %d from generic lambda\n", primitive); + printf("Found %d from generic lambda\n", primitive); #endif - }, - values, offsets); + }, + values, offsets); #endif } @@ -123,19 +123,18 @@ int main(int argc, char *argv[]) int const k = 10; Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, NearestToOrigin{k}, - PrintfCallback{}, values, offsets); + ArborX::query(bvh, ExecutionSpace{}, NearestToOrigin{k}, PrintfCallback{}, + values, offsets); #ifndef __NVCC__ - ArborX::query_crs(bvh, ExecutionSpace{}, NearestToOrigin{k}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, - float distance, auto /*output_functor*/) { + ArborX::query(bvh, ExecutionSpace{}, NearestToOrigin{k}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + float distance, auto /*output_functor*/) { #ifndef KOKKOS_ENABLE_SYCL - printf( - "Found %d with distance %.3f from generic lambda\n", - primitive, distance); + printf("Found %d with distance %.3f from generic lambda\n", + primitive, distance); #endif - }, - values, offsets); + }, + values, offsets); #endif } diff --git a/examples/dbscan/ArborX_DBSCAN.hpp b/examples/dbscan/ArborX_DBSCAN.hpp index ec0c1eeaa..ba5154bfc 100644 --- a/examples/dbscan/ArborX_DBSCAN.hpp +++ b/examples/dbscan/ArborX_DBSCAN.hpp @@ -221,7 +221,7 @@ void dbscan(ExecutionSpace exec_space, Primitives const &primitives, Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, exec_space, predicates, indices, offset); + ArborX::query(bvh, exec_space, predicates, indices, offset); auto passed = Details::verifyClusters(exec_space, indices, offset, clusters, core_min_size); diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp index 7e065e687..703c7080a 100644 --- a/src/ArborX_CrsGraphWrapper.hpp +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -19,10 +19,10 @@ namespace ArborX template -inline void query_crs(Tree const &tree, ExecutionSpace const &space, - Predicates const &predicates, - CallbackOrView &&callback_or_view, View &&view, - Args &&... args) +inline void query(Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, + Args &&... args) { Details::CrsGraphWrapperImpl::query( tree, space, predicates, std::forward(callback_or_view), diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 8dd3dc5ff..24ddac5ae 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -70,15 +70,15 @@ class BoundingVolumeHierarchy template // clang-format off - [[deprecated( "For crs format output, use standalone ArborX::query_crs instead.")]] + [[deprecated( "For crs format output, use standalone ArborX::query instead.")]] // clang-format on std::enable_if_t>{}> query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - ArborX::query_crs(*this, space, predicates, - std::forward(callback_or_view), - std::forward(view), std::forward(args)...); + ArborX::query(*this, space, predicates, + std::forward(callback_or_view), + std::forward(view), std::forward(args)...); } private: diff --git a/src/details/ArborX_DetailsDistributedTreeImpl.hpp b/src/details/ArborX_DetailsDistributedTreeImpl.hpp index b5a924d27..77fefa5d2 100644 --- a/src/details/ArborX_DetailsDistributedTreeImpl.hpp +++ b/src/details/ArborX_DetailsDistributedTreeImpl.hpp @@ -288,7 +288,7 @@ void DistributedTreeImpl::deviseStrategy( auto const &bottom_tree_sizes = tree._bottom_tree_sizes; // Find the k nearest local trees. - query_crs(top_tree, space, queries, indices, offset); + query(top_tree, space, queries, indices, offset); // Accumulate total leave count in the local trees until it reaches k which // is the number of neighbors queried for. Stop if local trees get @@ -380,7 +380,7 @@ void DistributedTreeImpl::reassessStrategy( getGeometry(Access::get(queries, i)), farthest_distances(i)}); }); - query_crs(top_tree, space, radius_searches, indices, offset); + query(top_tree, space, radius_searches, indices, offset); // NOTE: in principle, we could perform radius searches on the bottom_tree // rather than nearest queries. @@ -448,7 +448,7 @@ DistributedTreeImpl::queryDispatchImpl( ranks); // Perform queries that have been received - query_crs(bottom_tree, space, fwd_queries, indices, offset, distances); + query(bottom_tree, space, fwd_queries, indices, offset, distances); // Communicate results back communicateResultsBack(comm, space, indices, offset, ranks, ids, @@ -490,7 +490,7 @@ DistributedTreeImpl::queryDispatch( "ArborX::DistributedTree::query::spatial::indices", 0); Kokkos::View ranks( "ArborX::DistributedTree::query::spatial::ranks", 0); - query_crs(top_tree, space, queries, indices, offset); + query(top_tree, space, queries, indices, offset); { // NOTE_COMM_SPATIAL: The communication pattern here for the spatial search @@ -511,7 +511,7 @@ DistributedTreeImpl::queryDispatch( ranks); // Perform queries that have been received - query_crs(bottom_tree, space, fwd_queries, callback, out, offset); + query(bottom_tree, space, fwd_queries, callback, out, offset); // Communicate results back communicateResultsBack(comm, space, out, offset, ranks, ids); diff --git a/test/Search_UnitTestHelpers.hpp b/test/Search_UnitTestHelpers.hpp index 4b353dead..b898b0da0 100644 --- a/test/Search_UnitTestHelpers.hpp +++ b/test/Search_UnitTestHelpers.hpp @@ -110,7 +110,7 @@ auto query(ExecutionSpace const &exec_space, using memory_space = MemorySpace; Kokkos::View values("Testing::values", 0); Kokkos::View offsets("Testing::offsets", 0); - ArborX::query_crs(tree, exec_space, queries, values, offsets); + ArborX::query(tree, exec_space, queries, values, offsets); return make_compressed_storage( Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); @@ -146,10 +146,9 @@ auto query_with_distance(ExecutionSpace const &exec_space, Kokkos::View *, memory_space> values( "Testing::values", 0); Kokkos::View offsets("Testing::offsets", 0); - ArborX::query_crs( - tree, exec_space, queries, - ArborX::Details::CallbackDefaultNearestPredicateWithDistance{}, values, - offsets); + ArborX::query(tree, exec_space, queries, + ArborX::Details::CallbackDefaultNearestPredicateWithDistance{}, + values, offsets); return make_compressed_storage( Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index e56db4295..6de5c78a6 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -310,7 +310,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, }; BOOST_CHECK_NO_THROW( - ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset)); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query @@ -322,38 +322,37 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, // optimal size BOOST_CHECK_NO_THROW( - ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize( - -max_results_per_query))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize( + -max_results_per_query))); checkResultsAreFine(); // buffer size insufficient BOOST_TEST(max_results_per_query > 1); - BOOST_CHECK_NO_THROW(ArborX::query_crs( - bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); checkResultsAreFine(); BOOST_CHECK_THROW( - ArborX::query_crs( - bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), ArborX::SearchException); // adequate buffer size BOOST_TEST(max_results_per_query < 5); - BOOST_CHECK_NO_THROW(ArborX::query_crs( - bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(ArborX::query_crs( - bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); checkResultsAreFine(); // passing null size skips the buffer optimization and never throws - BOOST_CHECK_NO_THROW(ArborX::query_crs( - bvh, ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(0))); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(0))); checkResultsAreFine(); } @@ -393,12 +392,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0., 0., 0.}}, {{1., 1., 1.}}}, }); - BOOST_CHECK_NO_THROW(ArborX::query_crs( + BOOST_CHECK_NO_THROW(ArborX::query( bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(ArborX::query_crs( + BOOST_CHECK_NO_THROW(ArborX::query( bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); @@ -411,12 +410,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0.5, 0.5, 0.5}}, 2}, }); - BOOST_CHECK_NO_THROW(ArborX::query_crs( + BOOST_CHECK_NO_THROW(ArborX::query( bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(ArborX::query_crs( + BOOST_CHECK_NO_THROW(ArborX::query( bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); @@ -444,20 +443,20 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(not_exceeding_stack_capacity, DeviceType, Kokkos::View offset("offset", 0); // query number of nearest neighbors that exceed capacity of the stack is // not a problem - BOOST_CHECK_NO_THROW(ArborX::query_crs(bvh, ExecutionSpace{}, - makeNearestQueries({ - {{{0., 0., 0.}}, n}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {{{0., 0., 0.}}, n}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); // spatial query that find all indexable in the tree is also fine - BOOST_CHECK_NO_THROW(ArborX::query_crs(bvh, ExecutionSpace{}, - makeIntersectsBoxQueries({ - {}, - {{{0., 0., 0.}}, {{n, n, n}}}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + {}, + {{{0., 0., 0.}}, {{n, n, n}}}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); } @@ -558,12 +557,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomInlineCallbackSpatialPredicate{points}, - custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomInlineCallbackSpatialPredicate{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -578,12 +577,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomPostCallbackSpatialPredicate{points}, - custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomPostCallbackSpatialPredicate{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -598,12 +597,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomInlineCallbackNearestPredicate{}, - custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {origin, n}, + }), + CustomInlineCallbackNearestPredicate{}, custom, + offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -614,12 +613,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs(bvh, ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomPostCallbackNearestPredicate{}, custom, - offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {origin, n}, + }), + CustomPostCallbackNearestPredicate{}, custom, + offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -786,7 +785,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs( + ArborX::query( bvh, ExecutionSpace{}, makeIntersectsBoxWithAttachmentQueries( {bvh.bounds()}, {delta}), @@ -806,7 +805,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs( + ArborX::query( bvh, ExecutionSpace{}, makeIntersectsBoxWithAttachmentQueries>( @@ -827,12 +826,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs( - bvh, ExecutionSpace{}, - makeNearestWithAttachmentQueries({{origin, n}}, - {delta}), - CustomInlineCallbackAttachmentNearestPredicate{}, custom, - offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestWithAttachmentQueries( + {{origin, n}}, {delta}), + CustomInlineCallbackAttachmentNearestPredicate{}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -843,7 +841,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - ArborX::query_crs( + ArborX::query( bvh, ExecutionSpace{}, makeNearestWithAttachmentQueries>( {{origin, n}}, {{0, delta}}), @@ -909,7 +907,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) Kokkos::View indices("indices", n); Kokkos::View offset("offset", n); - ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); auto indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); @@ -1019,7 +1017,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); @@ -1078,7 +1076,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - ArborX::query_crs(bvh, ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); From 8f63ec299b536bb44bfc30b5bde2af2a7c400a3d Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Wed, 9 Dec 2020 12:18:26 -0500 Subject: [PATCH 10/19] Remove unnecessary indirection level in ArborX::query Co-authored-by: Daniel Arndt --- src/ArborX_CrsGraphWrapper.hpp | 14 +++++++++++--- .../ArborX_DetailsCrsGraphWrapperImpl.hpp | 18 ------------------ 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp index 703c7080a..129abb9d7 100644 --- a/src/ArborX_CrsGraphWrapper.hpp +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -24,9 +24,17 @@ inline void query(Tree const &tree, ExecutionSpace const &space, CallbackOrView &&callback_or_view, View &&view, Args &&... args) { - Details::CrsGraphWrapperImpl::query( - tree, space, predicates, std::forward(callback_or_view), - std::forward(view), std::forward(args)...); + Details::CrsGraphWrapperImpl:: + check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, + predicates, view); + + using Access = AccessTraits; + using Tag = typename Details::AccessTraitsHelper::tag; + + ArborX::Details::CrsGraphWrapperImpl::queryDispatch( + Tag{}, tree, space, predicates, + std::forward(callback_or_view), std::forward(view), + std::forward(args)...); } } // namespace ArborX diff --git a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp index 24c29a567..dbadd47a5 100644 --- a/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp +++ b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp @@ -610,24 +610,6 @@ check_valid_callback_if_first_argument_is_not_a_view(View const &, // do nothing } -template -inline std::enable_if_t>{}> -query(Tree const &tree, ExecutionSpace const &space, - Predicates const &predicates, CallbackOrView &&callback_or_view, - View &&view, Args &&... args) -{ - check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, - predicates, view); - - using Access = AccessTraits; - using Tag = typename AccessTraitsHelper::tag; - - queryDispatch(Tag{}, tree, space, predicates, - std::forward(callback_or_view), - std::forward(view), std::forward(args)...); -} - } // namespace CrsGraphWrapperImpl } // namespace Details From 4d82132d890f99deac3004f283215d75f2abb44e Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Sun, 13 Dec 2020 09:46:24 -0500 Subject: [PATCH 11/19] Added missing Kokkos profiling regions --- src/ArborX_CrsGraphWrapper.hpp | 4 ++++ src/ArborX_LinearBVH.hpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp index 129abb9d7..826e6bbb2 100644 --- a/src/ArborX_CrsGraphWrapper.hpp +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -24,6 +24,8 @@ inline void query(Tree const &tree, ExecutionSpace const &space, CallbackOrView &&callback_or_view, View &&view, Args &&... args) { + Kokkos::Profiling::pushRegion("ArborX::query"); + Details::CrsGraphWrapperImpl:: check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, predicates, view); @@ -35,6 +37,8 @@ inline void query(Tree const &tree, ExecutionSpace const &space, Tag{}, tree, space, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); + + Kokkos::Profiling::popRegion(); } } // namespace ArborX diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 24ddac5ae..d15e26db8 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -76,9 +76,13 @@ class BoundingVolumeHierarchy query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { + Kokkos::Profiling::pushRegion("ArborX::BVH::query"); + ArborX::query(*this, space, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); + + Kokkos::Profiling::popRegion(); } private: From fb5b33996198a0228aceb800bdef1d90b93b24bd Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Sat, 19 Dec 2020 22:27:41 -0500 Subject: [PATCH 12/19] Restore boost rtree option in the benchmark --- benchmarks/bvh_driver/bvh_driver.cpp | 6 +++--- test/ArborX_BoostRTreeHelpers.hpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index a4d02ce9e..3535abb7c 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -601,9 +601,9 @@ int main(int argc, char *argv[]) #endif #ifdef KOKKOS_ENABLE_SERIAL - // if (spec.backends == "all" || spec.backends == "rtree") - // register_benchmark>( - // "BoostRTree", spec); + if (spec.backends == "all" || spec.backends == "rtree") + register_benchmark>( + "BoostRTree", spec); #endif } diff --git a/test/ArborX_BoostRTreeHelpers.hpp b/test/ArborX_BoostRTreeHelpers.hpp index 29d64f5ca..97d15a79d 100644 --- a/test/ArborX_BoostRTreeHelpers.hpp +++ b/test/ArborX_BoostRTreeHelpers.hpp @@ -312,4 +312,19 @@ class ParallelRTree } // namespace BoostExt +namespace ArborX +{ +// Specialization of ArborX::query +template +inline void query(BoostExt::RTree const &rtree, + ExecutionSpace const &space, Predicates const &predicates, + InputView &indices, InputView &offset, + TrailingArgs &&... args) +{ + rtree.query(space, predicates, indices, offset, + std::forward(args)...); +} +} // namespace ArborX + #endif From ee0400dda593a30dbc2f5499d67a0a0eee3579a9 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Mon, 21 Dec 2020 11:22:19 -0500 Subject: [PATCH 13/19] Remove profiling regions from a wrapper function --- src/ArborX_LinearBVH.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 46c7135b1..45ffa905d 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -78,13 +78,9 @@ class BoundingVolumeHierarchy query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - Kokkos::Profiling::pushRegion("ArborX::BVH::query"); - ArborX::query(*this, space, predicates, std::forward(callback_or_view), std::forward(view), std::forward(args)...); - - Kokkos::Profiling::popRegion(); } private: From 6a8ed3756e6e16a637a2ae0cdc15f52a7249b78b Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Mon, 21 Dec 2020 11:27:57 -0500 Subject: [PATCH 14/19] Rename BufferOptimization test to CrsGraphWrapper --- test/CMakeLists.txt | 10 +++++----- ...imization.cpp => tstDetailsCrsGraphWrapperImpl.cpp} | 2 +- test/tstLinearBVH.cpp | 3 +-- 3 files changed, 7 insertions(+), 8 deletions(-) rename test/{tstDetailsBufferOptimization.cpp => tstDetailsCrsGraphWrapperImpl.cpp} (98%) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 148ae3a7d..26af93827 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,11 +97,11 @@ target_compile_definitions(ArborX_DetailsBatchedQueries.exe PRIVATE BOOST_TEST_D target_include_directories(ArborX_DetailsBatchedQueries.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) add_test(NAME ArborX_DetailsBatchedQueries_Test COMMAND ./ArborX_DetailsBatchedQueries.exe) -add_executable(ArborX_DetailsBufferOptimization.exe tstDetailsBufferOptimization.cpp utf_main.cpp) -target_link_libraries(ArborX_DetailsBufferOptimization.exe PRIVATE ArborX Boost::unit_test_framework) -target_compile_definitions(ArborX_DetailsBufferOptimization.exe PRIVATE BOOST_TEST_DYN_LINK) -target_include_directories(ArborX_DetailsBufferOptimization.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_test(NAME ArborX_DetailsBufferOptimization_Test COMMAND ./ArborX_DetailsBufferOptimization.exe) +add_executable(ArborX_DetailsCrsGraphWrapperImpl.exe tstDetailsCrsGraphWrapperImpl.cpp utf_main.cpp) +target_link_libraries(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE ArborX Boost::unit_test_framework) +target_compile_definitions(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE BOOST_TEST_DYN_LINK) +target_include_directories(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +add_test(NAME ArborX_DetailsCrsGraphWrapperImpl_Test COMMAND ./ArborX_DetailsCrsGraphWrapperImpl.exe) if(ARBORX_ENABLE_MPI) add_executable(ArborX_DistributedTree.exe tstDistributedTree.cpp tstKokkosToolsDistributedAnnotations.cpp utf_main.cpp) diff --git a/test/tstDetailsBufferOptimization.cpp b/test/tstDetailsCrsGraphWrapperImpl.cpp similarity index 98% rename from test/tstDetailsBufferOptimization.cpp rename to test/tstDetailsCrsGraphWrapperImpl.cpp index d636e1500..325149fee 100644 --- a/test/tstDetailsBufferOptimization.cpp +++ b/test/tstDetailsCrsGraphWrapperImpl.cpp @@ -17,7 +17,7 @@ #include -#define BOOST_TEST_MODULE DetailsBufferOptiization +#define BOOST_TEST_MODULE DetailsCrsGraphWrapperImpl namespace tt = boost::test_tools; diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index 7f70bddea..39c88126a 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -273,8 +273,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, tt::per_element()); }; - BOOST_CHECK_NO_THROW( - ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset)); + BOOST_CHECK_NO_THROW(query(bvh, ExecutionSpace{}, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query From a7fd787daf2d07aa9b7f6c7064cc85307e7d048b Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Mon, 21 Dec 2020 11:28:06 -0500 Subject: [PATCH 15/19] Fix terminology: standalone function -> free function --- src/ArborX_LinearBVH.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 45ffa905d..4ea5c81fe 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -72,7 +72,7 @@ class BoundingVolumeHierarchy template // clang-format off - [[deprecated( "For crs format output, use standalone ArborX::query instead.")]] + [[deprecated( "For crs format output, use a free function ArborX::query instead.")]] // clang-format on std::enable_if_t>{}> query(ExecutionSpace const &space, Predicates const &predicates, From f0f26075599e5aa4f9fedfd6e25a0b96aa49d356 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 22 Dec 2020 13:44:16 -0500 Subject: [PATCH 16/19] Restore original interface of BVH Do not expose a `query` with an execution space in it. --- src/ArborX_LinearBVH.hpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 4ea5c81fe..73a9d5d1a 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -180,13 +180,6 @@ class BoundingVolumeHierarchy< } // clang-format on template - std::enable_if_t::value> - query(FirstArgumentType const &space, Args &&... args) const - { - BoundingVolumeHierarchy::query( - space, std::forward(args)...); - } - template std::enable_if_t::value> query(FirstArgumentType &&arg1, Args &&... args) const { @@ -194,6 +187,22 @@ class BoundingVolumeHierarchy< typename DeviceType::execution_space{}, std::forward(arg1), std::forward(args)...); } + +private: + template + friend void ArborX::query(Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, + Args &&... args); + + template + std::enable_if_t::value> + query(FirstArgumentType const &space, Args &&... args) const + { + BoundingVolumeHierarchy::query( + space, std::forward(args)...); + } }; template From a65f312ce3b1c91eb3a54dc6936a6400ed3fe11b Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 22 Dec 2020 18:49:59 -0500 Subject: [PATCH 17/19] Fix last missed instance of unqualified query() in the test --- test/tstLinearBVH.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index 39c88126a..7f70bddea 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -273,7 +273,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, tt::per_element()); }; - BOOST_CHECK_NO_THROW(query(bvh, ExecutionSpace{}, queries, indices, offset)); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query From dd3d45eed24e17df096414a238014ad16ddf17bd Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 22 Dec 2020 18:50:24 -0500 Subject: [PATCH 18/19] Be conservative about deprecating BVH::query(..., offset, out) call Because Damien said so. --- src/ArborX_LinearBVH.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 73a9d5d1a..0a4449290 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -71,9 +71,6 @@ class BoundingVolumeHierarchy template - // clang-format off - [[deprecated( "For crs format output, use a free function ArborX::query instead.")]] - // clang-format on std::enable_if_t>{}> query(ExecutionSpace const &space, Predicates const &predicates, CallbackOrView &&callback_or_view, View &&view, Args &&... args) const From 2f9e76dbe6b182791c963373726780cf5a2350fe Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Tue, 22 Dec 2020 19:04:17 -0500 Subject: [PATCH 19/19] Add comment why we kept a second version of query in the tests --- test/Search_UnitTestHelpers.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/Search_UnitTestHelpers.hpp b/test/Search_UnitTestHelpers.hpp index 105c17ce0..4755b1d9a 100644 --- a/test/Search_UnitTestHelpers.hpp +++ b/test/Search_UnitTestHelpers.hpp @@ -103,6 +103,8 @@ auto query(ExecutionSpace const &exec_space, Tree const &tree, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); } +// This is a temporary workaround until we reconcile interfaces of +// DistributedTree and BVH template auto query(ExecutionSpace const &exec_space, ArborX::BVH const &tree, Queries const &queries)