diff --git a/benchmarks/bvh_driver/bvh_driver.cpp b/benchmarks/bvh_driver/bvh_driver.cpp index 34bd6d4a56..fc64d219e8 100644 --- a/benchmarks/bvh_driver/bvh_driver.cpp +++ b/benchmarks/bvh_driver/bvh_driver.cpp @@ -142,6 +142,51 @@ void BM_knn_search(benchmark::State &state) } } +template +struct Type1NearestCallback +{ + Kokkos::View stat_; + + template + KOKKOS_FUNCTION void operator()(int i, int, double) const + { + Kokkos::atomic_fetch_add(&stat(i), 1); + } +}; + +template +void BM_knn_type1_search(benchmark::State &state) +{ + using DeviceType = typename TreeType::device_type; + int const n_values = state.range(0); + int const n_queries = state.range(1); + int const n_neighbors = state.range(2); + bool const sort_predicates_int = state.range(3); + auto const source_point_cloud_type = + static_cast(state.range(4)); + auto const target_point_cloud_type = + static_cast(state.range(5)); + + TreeType index( + constructPoints(n_values, source_point_cloud_type)); + auto const queries = makeNearestQueries( + n_values, n_queries, n_neighbors, target_point_cloud_type); + + Kokkos::View stat("stat", n_queries); + Type1NearestCallback callback{stat}; + + for (auto _ : state) + { + auto const start = std::chrono::high_resolution_clock::now(); + index.query(queries, callback, + ArborX::Experimental::TraversalPolicy().setPredicateSorting( + sort_predicates_int)); + auto const end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + state.SetIterationTime(elapsed_seconds.count()); + } +} + template void BM_radius_search(benchmark::State &state) { @@ -176,6 +221,51 @@ void BM_radius_search(benchmark::State &state) } } +template +struct Type1SpatialCallback +{ + Kokkos::View stat_; + + template + KOKKOS_FUNCTION void operator()(int i, int) const + { + Kokkos::atomic_fetch_add(&stat(i), 1); + } +}; + +template +void BM_radius_type1_search(benchmark::State &state) +{ + using DeviceType = typename TreeType::device_type; + int const n_values = state.range(0); + int const n_queries = state.range(1); + int const n_neighbors = state.range(2); + int const sort_predicates_int = state.range(3); + auto const source_point_cloud_type = + static_cast(state.range(4)); + auto const target_point_cloud_type = + static_cast(state.range(5)); + + TreeType index( + constructPoints(n_values, source_point_cloud_type)); + auto const queries = makeSpatialQueries( + n_values, n_queries, n_neighbors, target_point_cloud_type); + + Kokkos::View stat("stat", n_queries); + Type1SpatialCallback callback{stat}; + + for (auto _ : state) + { + auto const start = std::chrono::high_resolution_clock::now(); + index.query(queries, callback, + ArborX::Experimental::TraversalPolicy().setPredicateSorting( + sort_predicates_int)); + auto const end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + state.SetIterationTime(elapsed_seconds.count()); + } +} + class KokkosScopeGuard { public: @@ -200,6 +290,18 @@ class KokkosScopeGuard ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); +#define REGISTER_TYPE1_BENCHMARK(TreeType, r1, r2, r3) \ + BENCHMARK_TEMPLATE(BM_knn_type1_search, TreeType) \ + ->Args({n_values, n_queries, n_neighbors, sort_predicates_int, \ + source_point_cloud_type, target_point_cloud_type}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); \ + BENCHMARK_TEMPLATE(BM_radius_type1_search, TreeType) \ + ->Args({n_values, n_queries, n_neighbors, sort_predicates_int, \ + source_point_cloud_type, target_point_cloud_type}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + #else #define REGISTER_BENCHMARK(TreeType, r1, r2, r3) \ BENCHMARK_TEMPLATE(BM_construction, TreeType) \ @@ -229,6 +331,8 @@ class KokkosScopeGuard ->Args({(int)r3, (int)r3, 10, 1, 0, 1, 3}) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); + +#define REGISTER_TYPE1_BENCHMARK(TreeType, r1, r2, r3) #endif // NOTE Motivation for this class that stores the argument count and values is @@ -237,9 +341,9 @@ class KokkosScopeGuard // Benchmark removes its own arguments from the command line arguments. This // means, that by virtue of returning references to internal data members in // argc() and argv() function, it will necessarily modify the members. It will -// decrease _argc, and "reduce" _argv data. Hence, we must keep a copy of _argv -// that is not modified from the outside to release memory in the destructor -// correctly. +// decrease _argc, and "reduce" _argv data. Hence, we must keep a copy of +// _argv that is not modified from the outside to release memory in the +// destructor correctly. class CmdLineArgs { private: @@ -359,21 +463,25 @@ int main(int argc, char *argv[]) #ifdef KOKKOS_ENABLE_SERIAL using Serial = Kokkos::Serial::device_type; REGISTER_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); + REGISTER_TYPE1_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); #endif #ifdef KOKKOS_ENABLE_OPENMP using OpenMP = Kokkos::OpenMP::device_type; REGISTER_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); + REGISTER_TYPE1_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); #endif #ifdef KOKKOS_ENABLE_THREADS using Threads = Kokkos::Threads::device_type; REGISTER_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); + REGISTER_TYPE1_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); #endif #ifdef KOKKOS_ENABLE_CUDA using Cuda = Kokkos::Cuda::device_type; REGISTER_BENCHMARK(ArborX::BVH, 1e4, 1e5, 1e6); + REGISTER_TYPE1_BENCHMARK(ArborX::BVH, 1e3, 1e4, 1e5); #endif #ifndef ARBORX_PERFORMANCE_TESTING