diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index 2c31d13c3..b64ad7590 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -10,6 +10,7 @@ ****************************************************************************/ #include +#include #include #include @@ -225,9 +226,9 @@ void BM_knn_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - index.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setPredicateSorting( - spec.sort_predicates)); + ArborX::query(index, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setPredicateSorting( + spec.sort_predicates)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); @@ -295,10 +296,10 @@ void BM_radius_search(benchmark::State &state, Spec const &spec) Kokkos::View offset("offset", 0); Kokkos::View indices("indices", 0); auto const start = std::chrono::high_resolution_clock::now(); - index.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy() - .setPredicateSorting(spec.sort_predicates) - .setBufferSize(spec.buffer_size)); + ArborX::query(index, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy() + .setPredicateSorting(spec.sort_predicates) + .setBufferSize(spec.buffer_size)); auto const end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; state.SetIterationTime(elapsed_seconds.count()); diff --git a/examples/access_traits/example_cuda_access_traits.cpp b/examples/access_traits/example_cuda_access_traits.cpp index 6626aeb69..05a4c61f8 100644 --- a/examples/access_traits/example_cuda_access_traits.cpp +++ b/examples/access_traits/example_cuda_access_traits.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - bvh.query(cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); + ArborX::query(bvh, cuda, Spheres{d_a, d_a, d_a, d_a, N}, indices, offset); Kokkos::parallel_for(Kokkos::RangePolicy(cuda, 0, N), KOKKOS_LAMBDA(int i) { diff --git a/examples/callback/example_callback.cpp b/examples/callback/example_callback.cpp index b1e0bf487..89b9a79ef 100644 --- a/examples/callback/example_callback.cpp +++ b/examples/callback/example_callback.cpp @@ -53,12 +53,6 @@ struct AccessTraits }; } // namespace ArborX -struct PairIndexDistance -{ - int index; - float distance; -}; - struct PrintfCallback { template @@ -96,19 +90,19 @@ int main(int argc, char *argv[]) { Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - bvh.query(ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, values, - offsets); + ArborX::query(bvh, ExecutionSpace{}, FirstOctant{}, PrintfCallback{}, + values, offsets); #ifndef __NVCC__ - bvh.query(ExecutionSpace{}, FirstOctant{}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, - auto /*output_functor*/) { + ArborX::query(bvh, ExecutionSpace{}, FirstOctant{}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + auto /*output_functor*/) { #ifndef __SYCL_DEVICE_ONLY__ - printf("Found %d from generic lambda\n", primitive); + printf("Found %d from generic lambda\n", primitive); #else - (void)primitive; + (void)primitive; #endif - }, - values, offsets); + }, + values, offsets); #endif } @@ -116,19 +110,19 @@ int main(int argc, char *argv[]) int const k = 10; Kokkos::View values("values", 0); Kokkos::View offsets("offsets", 0); - bvh.query(ExecutionSpace{}, NearestToOrigin{k}, PrintfCallback{}, values, - offsets); + ArborX::query(bvh, ExecutionSpace{}, NearestToOrigin{k}, PrintfCallback{}, + values, offsets); #ifndef __NVCC__ - bvh.query(ExecutionSpace{}, NearestToOrigin{k}, - KOKKOS_LAMBDA(auto /*predicate*/, int primitive, - auto /*output_functor*/) { + ArborX::query(bvh, ExecutionSpace{}, NearestToOrigin{k}, + KOKKOS_LAMBDA(auto /*predicate*/, int primitive, + auto /*output_functor*/) { #ifndef __SYCL_DEVICE_ONLY__ - printf("Found %d from generic lambda\n", primitive); + printf("Found %d from generic lambda\n", primitive); #else - (void)primitive; + (void)primitive; #endif - }, - values, offsets); + }, + values, offsets); #endif } diff --git a/examples/dbscan/ArborX_DBSCAN.hpp b/examples/dbscan/ArborX_DBSCAN.hpp index af3c18134..44600ec62 100644 --- a/examples/dbscan/ArborX_DBSCAN.hpp +++ b/examples/dbscan/ArborX_DBSCAN.hpp @@ -221,7 +221,7 @@ void dbscan(ExecutionSpace exec_space, Primitives const &primitives, Kokkos::View indices("indices", 0); Kokkos::View offset("offset", 0); - bvh.query(exec_space, predicates, indices, offset); + ArborX::query(bvh, exec_space, predicates, indices, offset); auto passed = Details::verifyClusters(exec_space, indices, offset, clusters, core_min_size); diff --git a/src/ArborX.hpp b/src/ArborX.hpp index 376b15aab..594679dad 100644 --- a/src/ArborX.hpp +++ b/src/ArborX.hpp @@ -18,6 +18,7 @@ #ifdef ARBORX_ENABLE_MPI #include #endif +#include #include #include #include diff --git a/src/ArborX_CrsGraphWrapper.hpp b/src/ArborX_CrsGraphWrapper.hpp new file mode 100644 index 000000000..826e6bbb2 --- /dev/null +++ b/src/ArborX_CrsGraphWrapper.hpp @@ -0,0 +1,46 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_CRS_GRAPH_WRAPPER_HPP +#define ARBORX_CRS_GRAPH_WRAPPER_HPP + +#include "ArborX_DetailsCrsGraphWrapperImpl.hpp" + +namespace ArborX +{ + +template +inline void query(Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, + Args &&... args) +{ + Kokkos::Profiling::pushRegion("ArborX::query"); + + Details::CrsGraphWrapperImpl:: + check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, + predicates, view); + + using Access = AccessTraits; + using Tag = typename Details::AccessTraitsHelper::tag; + + ArborX::Details::CrsGraphWrapperImpl::queryDispatch( + Tag{}, tree, space, predicates, + std::forward(callback_or_view), std::forward(view), + std::forward(args)...); + + Kokkos::Profiling::popRegion(); +} + +} // namespace ArborX + +#endif diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 2272dbf8d..0a4449290 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -14,17 +14,23 @@ #include #include -#include +#include +#include +#include #include #include #include +#include #include #include +#include +#include #include namespace ArborX { + namespace Details { template @@ -57,18 +63,21 @@ class BoundingVolumeHierarchy KOKKOS_FUNCTION bounding_volume_type bounds() const noexcept { return _bounds; } - template + template void query(ExecutionSpace const &space, Predicates const &predicates, - Args &&... args) const + Callback const &callback, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) const; + + template + std::enable_if_t>{}> + query(ExecutionSpace const &space, Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, Args &&... args) const { - Details::check_valid_access_traits(PredicatesTag{}, predicates); - using Access = AccessTraits; - static_assert(KokkosExt::is_accessible_from::value, - "Predicates must be accessible from the execution space"); - - Details::BoundingVolumeHierarchyImpl::query(space, *this, predicates, - std::forward(args)...); + ArborX::query(*this, space, predicates, + std::forward(callback_or_view), + std::forward(view), std::forward(args)...); } private: @@ -167,11 +176,29 @@ class BoundingVolumeHierarchy< { } // clang-format on - template - void query(Args &&... args) const + template + std::enable_if_t::value> + query(FirstArgumentType &&arg1, Args &&... args) const + { + BoundingVolumeHierarchy::query( + typename DeviceType::execution_space{}, + std::forward(arg1), std::forward(args)...); + } + +private: + template + friend void ArborX::query(Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, + Args &&... args); + + template + std::enable_if_t::value> + query(FirstArgumentType const &space, Args &&... args) const { BoundingVolumeHierarchy::query( - typename DeviceType::execution_space{}, std::forward(args)...); + space, std::forward(args)...); } }; @@ -245,6 +272,46 @@ BoundingVolumeHierarchy::BoundingVolumeHierarchy( Kokkos::Profiling::popRegion(); } +template +template +void BoundingVolumeHierarchy::query( + ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, Experimental::TraversalPolicy const &policy) const +{ + Details::check_valid_access_traits(PredicatesTag{}, predicates); + + using Access = AccessTraits; + using Tag = typename Details::AccessTraitsHelper::tag; + + auto profiling_prefix = + std::string("ArborX::BVH::query::") + + (std::is_same{} ? "spatial" + : "nearest"); + + Kokkos::Profiling::pushRegion(profiling_prefix); + + if (policy._sort_predicates) + { + Kokkos::Profiling::pushRegion(profiling_prefix + "::compute_permutation"); + using DeviceType = Kokkos::Device; + auto permute = + Details::BatchedQueries::sortQueriesAlongZOrderCurve( + space, bounds(), predicates); + Kokkos::Profiling::popRegion(); + + using PermutedPredicates = + Details::PermutedData; + Details::traverse(space, *this, PermutedPredicates{predicates, permute}, + callback); + } + else + { + Details::traverse(space, *this, predicates, callback); + } + + Kokkos::Profiling::popRegion(); +} + } // namespace ArborX #endif diff --git a/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp b/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp deleted file mode 100644 index 067e5856a..000000000 --- a/src/details/ArborX_DetailsBoundingVolumeHierarchyImpl.hpp +++ /dev/null @@ -1,313 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2012-2020 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#ifndef ARBORX_DETAILS_BOUNDING_VOLUME_HIERARCHY_IMPL_HPP -#define ARBORX_DETAILS_BOUNDING_VOLUME_HIERARCHY_IMPL_HPP - -#include -#include -#include -#include -#include // ArithmeticTraits -#include -#include -#include - -#include - -namespace ArborX -{ - -namespace Experimental -{ -struct TraversalPolicy -{ - // Buffer size lets a user provide an upper bound for the number of results - // per query. If the guess is accurate, it avoids performing the tree - // traversals twice (the first one to count the number of results per query, - // the second to actually write down the results at the right location in - // the flattened array) - // - // The default value zero disables the buffer optimization. The sign of the - // integer is used to specify the policy in the case the size insufficient. - // If it is positive, the code falls back to the default behavior and - // performs a second pass. If it is negative, it throws an exception. - int _buffer_size = 0; - - // Sort predicates allows disabling predicate sorting. - bool _sort_predicates = true; - - TraversalPolicy &setBufferSize(int buffer_size) - { - _buffer_size = buffer_size; - return *this; - } - - TraversalPolicy &setPredicateSorting(bool sort_predicates) - { - _sort_predicates = sort_predicates; - return *this; - } -}; - -} // namespace Experimental - -namespace Details -{ - -// This class is the top level query distribution and search algorithm. It is -// implementation specific tree traversal. -// NOTE: There is nothing specific here about spatial, thus one should be able -// to rewrite nearest using the same structure, with a benefit of potentially -// adding threading. -template -struct BVHParallelTreeTraversal -{ - BVH _bvh; - - template - void launch(ExecutionSpace const &space, Predicates const predicates, - InsertGenerator const &insert_generator) const - { - traverse(space, _bvh, predicates, insert_generator); - } -}; - -struct Iota -{ - KOKKOS_FUNCTION unsigned int operator()(int const i) const { return i; } -}; - -namespace BoundingVolumeHierarchyImpl -{ -template -std::enable_if_t{}> -allocateAndInititalizeStorage(Tag, ExecutionSpace const &space, - Predicates const &predicates, OffsetView &offset, - OutView &out, int buffer_size) -{ - using Access = AccessTraits; - - auto const n_queries = Access::size(predicates); - reallocWithoutInitializing(offset, n_queries + 1); - - buffer_size = std::abs(buffer_size); - - Kokkos::deep_copy(space, offset, buffer_size); - - if (buffer_size != 0) - { - exclusivePrefixSum(space, offset); - - // Use calculation for the size to avoid calling lastElement(offset) as it - // will launch an extra kernel to copy to host. - reallocWithoutInitializing(out, n_queries * buffer_size); - } -} - -template -std::enable_if_t{}> -allocateAndInititalizeStorage(Tag, ExecutionSpace const &space, - Predicates const &predicates, OffsetView &offset, - OutView &out, int /*buffer_size*/) -{ - using Access = AccessTraits; - - auto const n_queries = Access::size(predicates); - reallocWithoutInitializing(offset, n_queries + 1); - - Kokkos::parallel_for( - "ArborX::BVH::query::nearest::" - "scan_queries_for_numbers_of_nearest_neighbors", - Kokkos::RangePolicy(space, 0, n_queries), - KOKKOS_LAMBDA(int i) { offset(i) = getK(Access::get(predicates, i)); }); - exclusivePrefixSum(space, offset); - - reallocWithoutInitializing(out, lastElement(offset)); -} - -// Views are passed by reference here because internally Kokkos::realloc() -// is called. -template -std::enable_if_t{} && - Kokkos::is_view{} && Kokkos::is_view{}> -queryDispatch(Tag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - using DeviceType = Kokkos::Device; - - check_valid_callback(callback, predicates, out); - - auto profiling_prefix = - std::string("ArborX::BVH::query::") + - (std::is_same{} ? "spatial" : "nearest"); - - Kokkos::Profiling::pushRegion(profiling_prefix); - - Kokkos::Profiling::pushRegion(profiling_prefix + "::init_and_alloc"); - - allocateAndInititalizeStorage(Tag{}, space, predicates, offset, out, - policy._buffer_size); - - Kokkos::Profiling::popRegion(); - - auto buffer_status = (std::is_same{} - ? toBufferStatus(policy._buffer_size) - : BufferStatus::PreallocationSoft); - - if (policy._sort_predicates) - { - Kokkos::Profiling::pushRegion(profiling_prefix + "::compute_permutation"); - auto permute = - Details::BatchedQueries::sortQueriesAlongZOrderCurve( - space, bvh.bounds(), predicates); - Kokkos::Profiling::popRegion(); - - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, buffer_status); - } - else - { - Iota permute; - queryImpl(space, BVHParallelTreeTraversal{bvh}, predicates, callback, - out, offset, permute, buffer_status); - } - - Kokkos::Profiling::popRegion(); -} - -template -inline std::enable_if_t{} && Kokkos::is_view{}> -queryDispatch(Tag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Indices &indices, Offset &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - queryDispatch(Tag{}, bvh, space, predicates, DefaultCallback{}, indices, - offset, policy); -} - -template -inline std::enable_if_t{}> -queryDispatch(Tag, BVH const &bvh, ExecutionSpace const &space, - Predicates const &predicates, Callback const &callback, - OutputView &out, OffsetView &offset, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - using MemorySpace = typename BVH::memory_space; - Kokkos::View indices("ArborX::BVH::query::indices", 0); - queryDispatch(Tag{}, bvh, space, predicates, indices, offset, policy); - callback(predicates, offset, indices, out); -} - -template -std::enable_if_t{} && - !is_tagged_post_callback{}> -check_valid_callback_if_first_argument_is_not_a_view( - Callback const &callback, Predicates const &predicates, - OutputView const &out) -{ - check_valid_callback(callback, predicates, out); -} - -template -std::enable_if_t{} && - is_tagged_post_callback{}> -check_valid_callback_if_first_argument_is_not_a_view(Callback const &, - Predicates const &, - OutputView const &) -{ - // TODO -} - -template -std::enable_if_t{}> -check_valid_callback_if_first_argument_is_not_a_view(View const &, - Predicates const &, - OutputView const &) -{ - // do nothing -} - -template -inline std::enable_if_t>{}> -query(ExecutionSpace const &space, BVH const &bvh, Predicates const &predicates, - CallbackOrView &&callback_or_view, View &&view, Args &&... args) -{ - check_valid_callback_if_first_argument_is_not_a_view(callback_or_view, - predicates, view); - - using Access = AccessTraits; - using Tag = typename AccessTraitsHelper::tag; - - queryDispatch(Tag{}, bvh, space, predicates, - std::forward(callback_or_view), - std::forward(view), std::forward(args)...); -} - -template -inline void query(ExecutionSpace const &space, BVH const &bvh, - Predicates const &predicates, Callback const &callback, - Experimental::TraversalPolicy const &policy = - Experimental::TraversalPolicy()) -{ - check_valid_callback(callback, predicates); - - using Access = AccessTraits; - using Tag = typename AccessTraitsHelper::tag; - - auto profiling_prefix = - std::string("ArborX::BVH::query::") + - (std::is_same{} ? "spatial" : "nearest"); - - Kokkos::Profiling::pushRegion(profiling_prefix); - - if (policy._sort_predicates) - { - Kokkos::Profiling::pushRegion(profiling_prefix + "::compute_permutation"); - using MemorySpace = typename BVH::memory_space; - using DeviceType = Kokkos::Device; - auto permute = - Details::BatchedQueries::sortQueriesAlongZOrderCurve( - space, bvh.bounds(), predicates); - Kokkos::Profiling::popRegion(); - - using PermutedPredicates = PermutedData; - traverse(space, bvh, PermutedPredicates{predicates, permute}, callback); - } - else - { - traverse(space, bvh, predicates, callback); - } - - Kokkos::Profiling::popRegion(); -} - -} // namespace BoundingVolumeHierarchyImpl -} // namespace Details -} // namespace ArborX - -#endif diff --git a/src/details/ArborX_DetailsBufferOptimization.hpp b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp similarity index 54% rename from src/details/ArborX_DetailsBufferOptimization.hpp rename to src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp index e4b18d10c..7569ff915 100644 --- a/src/details/ArborX_DetailsBufferOptimization.hpp +++ b/src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp @@ -8,14 +8,16 @@ * * * SPDX-License-Identifier: BSD-3-Clause * ****************************************************************************/ -#ifndef ARBORX_DETAILS_BUFFER_OPTIMIZATON_HPP -#define ARBORX_DETAILS_BUFFER_OPTIMIZATON_HPP -#include -#include -#include +#ifndef ARBORX_DETAIL_CRS_GRAPH_WRAPPER_IMPL_HPP +#define ARBORX_DETAIL_CRS_GRAPH_WRAPPER_IMPL_HPP -#include +#include +#include +#include +#include +#include +#include namespace ArborX { @@ -123,55 +125,13 @@ struct InsertGenerator } }; -template -struct PermutedData -{ - Data _data; - Permute _permute; - KOKKOS_FUNCTION auto &operator()(int i) const { return _data(_permute(i)); } -}; - -} // namespace Details - -template -struct AccessTraits, - PredicatesTag> -{ - using PermutedPredicates = - Details::PermutedData; - using NativeAccess = AccessTraits; - - static std::size_t size(PermutedPredicates const &permuted_predicates) - { - return NativeAccess::size(permuted_predicates._data); - } - - template - KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, - std::enable_if_t<_Attach, std::size_t> index) - { - auto const permuted_index = permuted_predicates._permute(index); - return attach(NativeAccess::get(permuted_predicates._data, permuted_index), - (int)index); - } - - template - KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, - std::enable_if_t index) - { - auto const permuted_index = permuted_predicates._permute(index); - return NativeAccess::get(permuted_predicates._data, permuted_index); - } - using memory_space = typename NativeAccess::memory_space; -}; - -namespace Details +namespace CrsGraphWrapperImpl { -template -void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, +void queryImpl(ExecutionSpace const &space, Tree const &tree, Predicates const &predicates, Callback const &callback, OutputView &out, OffsetView &offset, PermuteType permute, BufferStatus buffer_status) @@ -184,12 +144,11 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, using Access = AccessTraits; auto const n_queries = Access::size(predicates); - Kokkos::Profiling::pushRegion("ArborX::BufferOptimization::two_pass"); + Kokkos::Profiling::pushRegion("ArborX::CrsGraphWrapper::two_pass"); using CountView = OffsetView; - CountView counts( - Kokkos::view_alloc("ArborX::BufferOptimization::counts", space), - n_queries); + CountView counts(Kokkos::view_alloc("ArborX::CrsGraphWrapper::counts", space), + n_queries); using PermutedPredicates = PermutedData; @@ -199,23 +158,24 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, PermutedOffset permuted_offset = {offset, permute}; Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass::first_pass"); + "ArborX::CrsGraphWrapper::two_pass::first_pass"); bool underflow = false; bool overflow = false; if (buffer_status != BufferStatus::PreallocationNone) { - tree_traversal.launch( + tree.query( space, permuted_predicates, InsertGenerator{callback, out, counts, - permuted_offset}); + permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); // Detecting overflow is a local operation that needs to be done for every // index. We allow individual buffer sizes to differ, so it's not as easy // as computing max counts. int overflow_int = 0; Kokkos::parallel_reduce( - "ArborX::BufferOptimization::compute_overflow", + "ArborX::CrsGraphWrapper::compute_overflow", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int i, int &update) { auto const *const offset_ptr = &permuted_offset(i); @@ -229,7 +189,7 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, { int n_results = 0; Kokkos::parallel_reduce( - "ArborX::BufferOptimization::compute_underflow", + "ArborX::CrsGraphWrapper::compute_underflow", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int i, int &update) { update += counts(i); }, n_results); @@ -238,11 +198,12 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, } else { - tree_traversal.launch( + tree.query( space, permuted_predicates, InsertGenerator{ - callback, out, counts, permuted_offset}); + callback, out, counts, permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); // This may not be true, but it does not matter. As long as we have // (n_results == 0) check before second pass, this value is not used. // Otherwise, we know it's overflowed as there is no allocation. @@ -251,9 +212,9 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::first_pass_postprocess"); + "ArborX::CrsGraphWrapper::first_pass_postprocess"); - OffsetView preallocated_offset("ArborX::BufferOptimization::offset_copy", 0); + OffsetView preallocated_offset("ArborX::CrsGraphWrapper::offset_copy", 0); if (underflow) { // Store a copy of the original offset. We'll need it for compression. @@ -261,7 +222,7 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, } Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_counts_to_offsets", + "ArborX::CrsGraphWrapper::copy_counts_to_offsets", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int const i) { permuted_offset(i) = counts(i); }); exclusivePrefixSum(space, offset); @@ -290,20 +251,21 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, // Otherwise, do the second pass Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass:second_pass"); + "ArborX::CrsGraphWrapper::two_pass:second_pass"); Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_offsets_to_counts", + "ArborX::CrsGraphWrapper::copy_offsets_to_counts", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int const i) { counts(i) = permuted_offset(i); }); reallocWithoutInitializing(out, n_results); - tree_traversal.launch( + tree.query( space, permuted_predicates, InsertGenerator{callback, out, counts, - permuted_offset}); + permuted_offset}, + ArborX::Experimental::TraversalPolicy().setPredicateSorting(false)); Kokkos::Profiling::popRegion(); } @@ -311,13 +273,13 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, { // More than enough storage for results, need compression Kokkos::Profiling::pushRegion( - "ArborX::BufferOptimization::two_pass:copy_values"); + "ArborX::CrsGraphWrapper::two_pass:copy_values"); OutputView tmp_out(Kokkos::ViewAllocateWithoutInitializing(out.label()), n_results); Kokkos::parallel_for( - "ArborX::BufferOptimization::copy_valid_values", + "ArborX::CrsGraphWrapper::copy_valid_values", Kokkos::RangePolicy(space, 0, n_queries), KOKKOS_LAMBDA(int i) { int count = offset(i + 1) - offset(i); @@ -337,6 +299,175 @@ void queryImpl(ExecutionSpace const &space, TreeTraversal const &tree_traversal, Kokkos::Profiling::popRegion(); } +struct Iota +{ + KOKKOS_FUNCTION unsigned int operator()(int const i) const { return i; } +}; + +template +std::enable_if_t{}> +allocateAndInititalizeStorage(Tag, ExecutionSpace const &space, + Predicates const &predicates, OffsetView &offset, + OutView &out, int buffer_size) +{ + using Access = AccessTraits; + + auto const n_queries = Access::size(predicates); + reallocWithoutInitializing(offset, n_queries + 1); + + buffer_size = std::abs(buffer_size); + + Kokkos::deep_copy(space, offset, buffer_size); + + if (buffer_size != 0) + { + exclusivePrefixSum(space, offset); + + // Use calculation for the size to avoid calling lastElement(offset) as it + // will launch an extra kernel to copy to host. + reallocWithoutInitializing(out, n_queries * buffer_size); + } +} + +template +std::enable_if_t{}> +allocateAndInititalizeStorage(Tag, ExecutionSpace const &space, + Predicates const &predicates, OffsetView &offset, + OutView &out, int /*buffer_size*/) +{ + using Access = AccessTraits; + + auto const n_queries = Access::size(predicates); + reallocWithoutInitializing(offset, n_queries + 1); + + Kokkos::parallel_for( + "ArborX::CrsGraphWrapper::query::nearest::" + "scan_queries_for_numbers_of_nearest_neighbors", + Kokkos::RangePolicy(space, 0, n_queries), + KOKKOS_LAMBDA(int i) { offset(i) = getK(Access::get(predicates, i)); }); + exclusivePrefixSum(space, offset); + + reallocWithoutInitializing(out, lastElement(offset)); +} + +// Views are passed by reference here because internally Kokkos::realloc() +// is called. +template +std::enable_if_t{} && + Kokkos::is_view{} && Kokkos::is_view{}> +queryDispatch(Tag, Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, Callback const &callback, + OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + using DeviceType = Kokkos::Device; + + check_valid_callback(callback, predicates, out); + + auto profiling_prefix = + std::string("ArborX::CrsGraphWrapper::query::") + + (std::is_same{} ? "spatial" : "nearest"); + + Kokkos::Profiling::pushRegion(profiling_prefix); + + Kokkos::Profiling::pushRegion(profiling_prefix + "::init_and_alloc"); + + allocateAndInititalizeStorage(Tag{}, space, predicates, offset, out, + policy._buffer_size); + + Kokkos::Profiling::popRegion(); + + auto buffer_status = (std::is_same{} + ? toBufferStatus(policy._buffer_size) + : BufferStatus::PreallocationSoft); + + if (policy._sort_predicates) + { + Kokkos::Profiling::pushRegion(profiling_prefix + "::compute_permutation"); + auto permute = + Details::BatchedQueries::sortQueriesAlongZOrderCurve( + space, tree.bounds(), predicates); + Kokkos::Profiling::popRegion(); + + queryImpl(space, tree, predicates, callback, out, offset, permute, + buffer_status); + } + else + { + Iota permute; + queryImpl(space, tree, predicates, callback, out, offset, permute, + buffer_status); + } + + Kokkos::Profiling::popRegion(); +} + +template +inline std::enable_if_t{} && Kokkos::is_view{}> +queryDispatch(Tag, Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, Indices &indices, Offset &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + queryDispatch(Tag{}, tree, space, predicates, DefaultCallback{}, indices, + offset, policy); +} + +template +inline std::enable_if_t{}> +queryDispatch(Tag, Tree const &tree, ExecutionSpace const &space, + Predicates const &predicates, Callback const &callback, + OutputView &out, OffsetView &offset, + Experimental::TraversalPolicy const &policy = + Experimental::TraversalPolicy()) +{ + using MemorySpace = typename Tree::memory_space; + Kokkos::View indices( + "ArborX::CrsGraphWrapper::query::indices", 0); + queryDispatch(Tag{}, tree, space, predicates, indices, offset, policy); + callback(predicates, offset, indices, out); +} + +template +std::enable_if_t{} && + !is_tagged_post_callback{}> +check_valid_callback_if_first_argument_is_not_a_view( + Callback const &callback, Predicates const &predicates, + OutputView const &out) +{ + check_valid_callback(callback, predicates, out); +} + +template +std::enable_if_t{} && + is_tagged_post_callback{}> +check_valid_callback_if_first_argument_is_not_a_view(Callback const &, + Predicates const &, + OutputView const &) +{ + // TODO +} + +template +std::enable_if_t{}> +check_valid_callback_if_first_argument_is_not_a_view(View const &, + Predicates const &, + OutputView const &) +{ + // do nothing +} + +} // namespace CrsGraphWrapperImpl + } // namespace Details } // namespace ArborX diff --git a/src/details/ArborX_DetailsDistributedTreeImpl.hpp b/src/details/ArborX_DetailsDistributedTreeImpl.hpp index 66662eb50..317dfa685 100644 --- a/src/details/ArborX_DetailsDistributedTreeImpl.hpp +++ b/src/details/ArborX_DetailsDistributedTreeImpl.hpp @@ -327,7 +327,7 @@ void DistributedTreeImpl::deviseStrategy( auto const &bottom_tree_sizes = tree._bottom_tree_sizes; // Find the k nearest local trees. - top_tree.query(space, queries, indices, offset); + query(top_tree, space, queries, indices, offset); // Accumulate total leave count in the local trees until it reaches k which // is the number of neighbors queried for. Stop if local trees get @@ -419,7 +419,7 @@ void DistributedTreeImpl::reassessStrategy( getGeometry(Access::get(queries, i)), farthest_distances(i)}); }); - top_tree.query(space, radius_searches, indices, offset); + query(top_tree, space, radius_searches, indices, offset); // NOTE: in principle, we could perform radius searches on the bottom_tree // rather than nearest queries. @@ -530,8 +530,8 @@ DistributedTreeImpl::queryDispatchImpl( // Perform queries that have been received Kokkos::View out( "ArborX::DistributedTree::query::pairs_index_distance", 0); - bottom_tree.query(space, fwd_queries, callback_with_distance, out, - offset); + query(bottom_tree, space, fwd_queries, callback_with_distance, out, + offset); // Unzip auto const n = out.extent(0); @@ -585,7 +585,7 @@ DistributedTreeImpl::queryDispatch( "ArborX::DistributedTree::query::spatial::indices", 0); Kokkos::View ranks( "ArborX::DistributedTree::query::spatial::ranks", 0); - top_tree.query(space, queries, indices, offset); + query(top_tree, space, queries, indices, offset); { // NOTE_COMM_SPATIAL: The communication pattern here for the spatial search @@ -606,7 +606,7 @@ DistributedTreeImpl::queryDispatch( ranks); // Perform queries that have been received - bottom_tree.query(space, fwd_queries, callback, out, offset); + query(bottom_tree, space, fwd_queries, callback, out, offset); // Communicate results back communicateResultsBack(comm, space, out, offset, ranks, ids); diff --git a/src/details/ArborX_DetailsPermutedData.hpp b/src/details/ArborX_DetailsPermutedData.hpp new file mode 100644 index 000000000..10313b5e1 --- /dev/null +++ b/src/details/ArborX_DetailsPermutedData.hpp @@ -0,0 +1,67 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_DETAILS_PERMUTED_DATA_HPP +#define ARBORX_DETAILS_PERMUTED_DATA_HPP + +#include + +namespace ArborX +{ + +namespace Details +{ + +template +struct PermutedData +{ + Data _data; + Permute _permute; + KOKKOS_FUNCTION auto &operator()(int i) const { return _data(_permute(i)); } +}; + +} // namespace Details + +template +struct AccessTraits, + PredicatesTag> +{ + using PermutedPredicates = + Details::PermutedData; + using NativeAccess = AccessTraits; + + static std::size_t size(PermutedPredicates const &permuted_predicates) + { + return NativeAccess::size(permuted_predicates._data); + } + + template + KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, + std::enable_if_t<_Attach, std::size_t> index) + { + auto const permuted_index = permuted_predicates._permute(index); + return attach(NativeAccess::get(permuted_predicates._data, permuted_index), + (int)index); + } + + template + KOKKOS_FUNCTION static auto get(PermutedPredicates const &permuted_predicates, + std::enable_if_t index) + { + auto const permuted_index = permuted_predicates._permute(index); + return NativeAccess::get(permuted_predicates._data, permuted_index); + } + using memory_space = typename NativeAccess::memory_space; +}; + +} // namespace ArborX + +#endif diff --git a/src/details/ArborX_TraversalPolicy.hpp b/src/details/ArborX_TraversalPolicy.hpp new file mode 100644 index 000000000..4d859bfba --- /dev/null +++ b/src/details/ArborX_TraversalPolicy.hpp @@ -0,0 +1,52 @@ +/**************************************************************************** + * Copyright (c) 2012-2020 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_TRAVERSAL_POLICY_HPP +#define ARBORX_TRAVERSAL_POLICY_HPP + +namespace ArborX +{ +namespace Experimental +{ +struct TraversalPolicy +{ + // Buffer size lets a user provide an upper bound for the number of results + // per query. If the guess is accurate, it avoids performing the tree + // traversals twice (the first one to count the number of results per query, + // the second to actually write down the results at the right location in + // the flattened array) + // + // The default value zero disables the buffer optimization. The sign of the + // integer is used to specify the policy in the case the size insufficient. + // If it is positive, the code falls back to the default behavior and + // performs a second pass. If it is negative, it throws an exception. + int _buffer_size = 0; + + // Sort predicates allows disabling predicate sorting. + bool _sort_predicates = true; + + TraversalPolicy &setBufferSize(int buffer_size) + { + _buffer_size = buffer_size; + return *this; + } + + TraversalPolicy &setPredicateSorting(bool sort_predicates) + { + _sort_predicates = sort_predicates; + return *this; + } +}; + +} // namespace Experimental +} // namespace ArborX + +#endif diff --git a/test/ArborX_BoostRTreeHelpers.hpp b/test/ArborX_BoostRTreeHelpers.hpp index 29d64f5ca..97d15a79d 100644 --- a/test/ArborX_BoostRTreeHelpers.hpp +++ b/test/ArborX_BoostRTreeHelpers.hpp @@ -312,4 +312,19 @@ class ParallelRTree } // namespace BoostExt +namespace ArborX +{ +// Specialization of ArborX::query +template +inline void query(BoostExt::RTree const &rtree, + ExecutionSpace const &space, Predicates const &predicates, + InputView &indices, InputView &offset, + TrailingArgs &&... args) +{ + rtree.query(space, predicates, indices, offset, + std::forward(args)...); +} +} // namespace ArborX + #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 148ae3a7d..26af93827 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,11 +97,11 @@ target_compile_definitions(ArborX_DetailsBatchedQueries.exe PRIVATE BOOST_TEST_D target_include_directories(ArborX_DetailsBatchedQueries.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) add_test(NAME ArborX_DetailsBatchedQueries_Test COMMAND ./ArborX_DetailsBatchedQueries.exe) -add_executable(ArborX_DetailsBufferOptimization.exe tstDetailsBufferOptimization.cpp utf_main.cpp) -target_link_libraries(ArborX_DetailsBufferOptimization.exe PRIVATE ArborX Boost::unit_test_framework) -target_compile_definitions(ArborX_DetailsBufferOptimization.exe PRIVATE BOOST_TEST_DYN_LINK) -target_include_directories(ArborX_DetailsBufferOptimization.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_test(NAME ArborX_DetailsBufferOptimization_Test COMMAND ./ArborX_DetailsBufferOptimization.exe) +add_executable(ArborX_DetailsCrsGraphWrapperImpl.exe tstDetailsCrsGraphWrapperImpl.cpp utf_main.cpp) +target_link_libraries(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE ArborX Boost::unit_test_framework) +target_compile_definitions(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE BOOST_TEST_DYN_LINK) +target_include_directories(ArborX_DetailsCrsGraphWrapperImpl.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +add_test(NAME ArborX_DetailsCrsGraphWrapperImpl_Test COMMAND ./ArborX_DetailsCrsGraphWrapperImpl.exe) if(ARBORX_ENABLE_MPI) add_executable(ArborX_DistributedTree.exe tstDistributedTree.cpp tstKokkosToolsDistributedAnnotations.cpp utf_main.cpp) diff --git a/test/Search_UnitTestHelpers.hpp b/test/Search_UnitTestHelpers.hpp index 658394031..4755b1d9a 100644 --- a/test/Search_UnitTestHelpers.hpp +++ b/test/Search_UnitTestHelpers.hpp @@ -103,6 +103,21 @@ auto query(ExecutionSpace const &exec_space, Tree const &tree, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); } +// This is a temporary workaround until we reconcile interfaces of +// DistributedTree and BVH +template +auto query(ExecutionSpace const &exec_space, + ArborX::BVH const &tree, Queries const &queries) +{ + using memory_space = MemorySpace; + Kokkos::View values("Testing::values", 0); + Kokkos::View offsets("Testing::offsets", 0); + ArborX::query(tree, exec_space, queries, values, offsets); + return make_compressed_storage( + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, offsets), + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, values)); +} + #define ARBORX_TEST_QUERY_TREE(exec_space, tree, queries, reference) \ BOOST_TEST(query(exec_space, tree, queries) == (reference), \ boost::test_tools::per_element()); diff --git a/test/tstDetailsBufferOptimization.cpp b/test/tstDetailsCrsGraphWrapperImpl.cpp similarity index 84% rename from test/tstDetailsBufferOptimization.cpp rename to test/tstDetailsCrsGraphWrapperImpl.cpp index 17f60830e..325149fee 100644 --- a/test/tstDetailsBufferOptimization.cpp +++ b/test/tstDetailsCrsGraphWrapperImpl.cpp @@ -11,14 +11,13 @@ #include "ArborX_EnableDeviceTypes.hpp" // ARBORX_DEVICE_TYPES #include "ArborX_EnableViewComparison.hpp" -#include -//#include -#include // FIXME +#include #include +#include #include -#define BOOST_TEST_MODULE DetailsBufferOptiization +#define BOOST_TEST_MODULE DetailsCrsGraphWrapperImpl namespace tt = boost::test_tools; @@ -26,8 +25,10 @@ struct Test1 { template - void launch(ExecutionSpace const &space, Predicates const &predicates, - InsertGenerator const &insert_generator) const + void query(ExecutionSpace const &space, Predicates const &predicates, + InsertGenerator const &insert_generator, + ArborX::Experimental::TraversalPolicy const & = + ArborX::Experimental::TraversalPolicy()) const { using Access = ArborX::AccessTraits; @@ -69,10 +70,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(query_impl, DeviceType, ARBORX_DEVICE_TYPES) ArborX::exclusivePrefixSum(ExecutionSpace{}, offset); ArborX::reallocWithoutInitializing(indices, ArborX::lastElement(offset)); - ArborX::Details::queryImpl(ExecutionSpace{}, Test1{}, predicates, - ArborX::Details::DefaultCallback{}, indices, - offset, permute, - ArborX::Details::BufferStatus::PreallocationHard); + ArborX::Details::CrsGraphWrapperImpl::queryImpl( + ExecutionSpace{}, Test1{}, predicates, ArborX::Details::DefaultCallback{}, + indices, offset, permute, + ArborX::Details::BufferStatus::PreallocationHard); auto indices_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, indices); diff --git a/test/tstKokkosToolsAnnotations.cpp b/test/tstKokkosToolsAnnotations.cpp index 9e4d72a4e..abe369050 100644 --- a/test/tstKokkosToolsAnnotations.cpp +++ b/test/tstKokkosToolsAnnotations.cpp @@ -100,7 +100,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(bvh_query_allocations_prefixed, DeviceType, (isPrefixedWith(label, "ArborX::BVH::query::") || isPrefixedWith(label, "ArborX::TreeTraversal::spatial::") || isPrefixedWith(label, "ArborX::TreeTraversal::nearest::") || - isPrefixedWith(label, "ArborX::BufferOptimization::") || + isPrefixedWith(label, "ArborX::CrsGraphWrapper::") || isPrefixedWith(label, "ArborX::Sorting::") || isPrefixedWith(label, "Kokkos::SortImpl::BinSortFunctor::") || isPrefixedWith(label, "Testing::"))); diff --git a/test/tstKokkosToolsDistributedAnnotations.cpp b/test/tstKokkosToolsDistributedAnnotations.cpp index afb056785..6e0824c57 100644 --- a/test/tstKokkosToolsDistributedAnnotations.cpp +++ b/test/tstKokkosToolsDistributedAnnotations.cpp @@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE( isPrefixedWith(label, "ArborX::BVH::query::") || isPrefixedWith(label, "ArborX::TreeTraversal::spatial::") || isPrefixedWith(label, "ArborX::TreeTraversal::nearest::") || - isPrefixedWith(label, "ArborX::BufferOptimization::") || + isPrefixedWith(label, "ArborX::CrsGraphWrapper::") || isPrefixedWith(label, "ArborX::Sorting::") || isPrefixedWith(label, "Kokkos::SortImpl::") || isPrefixedWith(label, "Testing::"))); diff --git a/test/tstLinearBVH.cpp b/test/tstLinearBVH.cpp index a2a235526..7f70bddea 100644 --- a/test/tstLinearBVH.cpp +++ b/test/tstLinearBVH.cpp @@ -273,7 +273,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, tt::per_element()); }; - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, queries, indices, offset)); + BOOST_CHECK_NO_THROW( + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset)); checkResultsAreFine(); // compute number of results per query @@ -285,37 +286,37 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(buffer_optimization, DeviceType, // optimal size BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize( - -max_results_per_query))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize( + -max_results_per_query))); checkResultsAreFine(); // buffer size insufficient BOOST_TEST(max_results_per_query > 1); BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+1))); checkResultsAreFine(); BOOST_CHECK_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-1)), ArborX::SearchException); // adequate buffer size BOOST_TEST(max_results_per_query < 5); BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(+5))); checkResultsAreFine(); BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(-5))); checkResultsAreFine(); // passing null size skips the buffer optimization and never throws BOOST_CHECK_NO_THROW( - bvh.query(ExecutionSpace{}, queries, indices, offset, - ArborX::Experimental::TraversalPolicy().setBufferSize(0))); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset, + ArborX::Experimental::TraversalPolicy().setBufferSize(0))); checkResultsAreFine(); } @@ -355,13 +356,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0., 0., 0.}}, {{1., 1., 1.}}}, }); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query( + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query( + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -373,13 +374,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(unsorted_predicates, DeviceType, {{{0.5, 0.5, 0.5}}, 2}, }); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query( + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(true))); checkResultsAreFine(); - BOOST_CHECK_NO_THROW(bvh.query( - ExecutionSpace{}, queries, indices, offset, + BOOST_CHECK_NO_THROW(ArborX::query( + bvh, ExecutionSpace{}, queries, indices, offset, ArborX::Experimental::TraversalPolicy().setPredicateSorting(false))); checkResultsAreFine(); } @@ -406,20 +407,20 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(not_exceeding_stack_capacity, DeviceType, Kokkos::View offset("offset", 0); // query number of nearest neighbors that exceed capacity of the stack is // not a problem - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {{{0., 0., 0.}}, n}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {{{0., 0., 0.}}, n}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); // spatial query that find all indexable in the tree is also fine - BOOST_CHECK_NO_THROW(bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - {}, - {{{0., 0., 0.}}, {{n, n, n}}}, - }), - indices, offset)); + BOOST_CHECK_NO_THROW(ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + {}, + {{{0., 0., 0.}}, {{n, n, n}}}, + }), + indices, offset)); BOOST_TEST(ArborX::lastElement(offset) == n); } @@ -486,11 +487,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomInlineCallback{points}, custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomInlineCallback{points}, custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -505,11 +506,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxQueries({ - bvh.bounds(), - }), - CustomPostCallback{points}, custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxQueries({ + bvh.bounds(), + }), + CustomPostCallback{points}, custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -524,11 +525,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomInlineCallback{points}, custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {origin, n}, + }), + CustomInlineCallback{points}, custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -539,11 +540,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback, DeviceType, ARBORX_DEVICE_TYPES) { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestQueries({ - {origin, n}, - }), - CustomPostCallback{points}, custom, offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestQueries({ + {origin, n}, + }), + CustomPostCallback{points}, custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -672,11 +673,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxWithAttachmentQueries( - {bvh.bounds()}, {delta}), - CustomInlineCallbackWithAttachment{points}, custom, - offset); + ArborX::query(bvh, ExecutionSpace{}, + makeIntersectsBoxWithAttachmentQueries( + {bvh.bounds()}, {delta}), + CustomInlineCallbackWithAttachment{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -691,12 +692,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeIntersectsBoxWithAttachmentQueries>( - {bvh.bounds()}, {{0., delta}}), - CustomPostCallbackWithAttachment{points}, custom, - offset); + ArborX::query( + bvh, ExecutionSpace{}, + makeIntersectsBoxWithAttachmentQueries>( + {bvh.bounds()}, {{0., delta}}), + CustomPostCallbackWithAttachment{points}, custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -711,11 +712,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query(ExecutionSpace{}, - makeNearestWithAttachmentQueries({{origin, n}}, - {delta}), - CustomInlineCallbackWithAttachment{points}, custom, - offset); + ArborX::query(bvh, ExecutionSpace{}, + makeNearestWithAttachmentQueries( + {{origin, n}}, {delta}), + CustomInlineCallbackWithAttachment{points}, + custom, offset); auto custom_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, custom); @@ -726,8 +727,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(callback_with_attachment, DeviceType, { Kokkos::View *, DeviceType> custom("custom", 0); Kokkos::View offset("offset", 0); - bvh.query( - ExecutionSpace{}, + ArborX::query( + bvh, ExecutionSpace{}, makeNearestWithAttachmentQueries>( {{origin, n}}, {{0, delta}}), CustomPostCallbackWithAttachment{points}, custom, offset); @@ -791,7 +792,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) Kokkos::View indices("indices", n); Kokkos::View offset("offset", n); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); auto indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); @@ -901,7 +902,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset); @@ -960,7 +961,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(structured_grid, DeviceType, ARBORX_DEVICE_TYPES) KOKKOS_LAMBDA(int i) { queries[i] = ArborX::intersects(bounding_boxes[i]); }); - bvh.query(ExecutionSpace{}, queries, indices, offset); + ArborX::query(bvh, ExecutionSpace{}, queries, indices, offset); indices_host = Kokkos::create_mirror_view(indices); Kokkos::deep_copy(indices_host, indices); offset_host = Kokkos::create_mirror_view(offset);