Skip to content

Commit

Permalink
Add a benchmark for pure callback interface
Browse files Browse the repository at this point in the history
  • Loading branch information
aprokop committed Jun 2, 2020
1 parent 5366adf commit b5ad61e
Showing 1 changed file with 111 additions and 3 deletions.
114 changes: 111 additions & 3 deletions benchmarks/bvh_driver/bvh_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,51 @@ void BM_knn_search(benchmark::State &state)
}
}

template <typename DeviceType>
struct Type1NearestCallback
{
Kokkos::View<int *, DeviceType> stat_;

template <typename Query, typename Insert>
KOKKOS_FUNCTION void operator()(int i, int, double) const
{
Kokkos::atomic_fetch_add(&stat(i), 1);
}
};

template <class TreeType>
void BM_knn_type1_search(benchmark::State &state)
{
using DeviceType = typename TreeType::device_type;
int const n_values = state.range(0);
int const n_queries = state.range(1);
int const n_neighbors = state.range(2);
bool const sort_predicates_int = state.range(3);
auto const source_point_cloud_type =
static_cast<PointCloudType>(state.range(4));
auto const target_point_cloud_type =
static_cast<PointCloudType>(state.range(5));

TreeType index(
constructPoints<DeviceType>(n_values, source_point_cloud_type));
auto const queries = makeNearestQueries<DeviceType>(
n_values, n_queries, n_neighbors, target_point_cloud_type);

Kokkos::View<int *, DeviceType> stat("stat", n_queries);
Type1NearestCallback<DeviceType> callback{stat};

for (auto _ : state)
{
auto const start = std::chrono::high_resolution_clock::now();
index.query(queries, callback,
ArborX::Experimental::TraversalPolicy().setPredicateSorting(
sort_predicates_int));
auto const end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
state.SetIterationTime(elapsed_seconds.count());
}
}

template <class TreeType>
void BM_radius_search(benchmark::State &state)
{
Expand Down Expand Up @@ -176,6 +221,51 @@ void BM_radius_search(benchmark::State &state)
}
}

template <typename MemorySpace>
struct Type1SpatialCallback
{
Kokkos::View<int *, MemorySpace> stat_;

template <typename Query, typename Insert>
KOKKOS_FUNCTION void operator()(int i, int) const
{
Kokkos::atomic_fetch_add(&stat(i), 1);
}
};

template <class TreeType>
void BM_radius_type1_search(benchmark::State &state)
{
using DeviceType = typename TreeType::device_type;
int const n_values = state.range(0);
int const n_queries = state.range(1);
int const n_neighbors = state.range(2);
int const sort_predicates_int = state.range(3);
auto const source_point_cloud_type =
static_cast<PointCloudType>(state.range(4));
auto const target_point_cloud_type =
static_cast<PointCloudType>(state.range(5));

TreeType index(
constructPoints<DeviceType>(n_values, source_point_cloud_type));
auto const queries = makeSpatialQueries<DeviceType>(
n_values, n_queries, n_neighbors, target_point_cloud_type);

Kokkos::View<int *, DeviceType> stat("stat", n_queries);
Type1SpatialCallback<DeviceType> callback{stat};

for (auto _ : state)
{
auto const start = std::chrono::high_resolution_clock::now();
index.query(queries, callback,
ArborX::Experimental::TraversalPolicy().setPredicateSorting(
sort_predicates_int));
auto const end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
state.SetIterationTime(elapsed_seconds.count());
}
}

class KokkosScopeGuard
{
public:
Expand All @@ -200,6 +290,18 @@ class KokkosScopeGuard
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define REGISTER_TYPE1_BENCHMARK(TreeType, r1, r2, r3) \
BENCHMARK_TEMPLATE(BM_knn_type1_search, TreeType) \
->Args({n_values, n_queries, n_neighbors, sort_predicates_int, \
source_point_cloud_type, target_point_cloud_type}) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond); \
BENCHMARK_TEMPLATE(BM_radius_type1_search, TreeType) \
->Args({n_values, n_queries, n_neighbors, sort_predicates_int, \
source_point_cloud_type, target_point_cloud_type}) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#else
#define REGISTER_BENCHMARK(TreeType, r1, r2, r3) \
BENCHMARK_TEMPLATE(BM_construction, TreeType) \
Expand Down Expand Up @@ -229,6 +331,8 @@ class KokkosScopeGuard
->Args({(int)r3, (int)r3, 10, 1, 0, 1, 3}) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define REGISTER_TYPE1_BENCHMARK(TreeType, r1, r2, r3)
#endif

// NOTE Motivation for this class that stores the argument count and values is
Expand All @@ -237,9 +341,9 @@ class KokkosScopeGuard
// Benchmark removes its own arguments from the command line arguments. This
// means, that by virtue of returning references to internal data members in
// argc() and argv() function, it will necessarily modify the members. It will
// decrease _argc, and "reduce" _argv data. Hence, we must keep a copy of _argv
// that is not modified from the outside to release memory in the destructor
// correctly.
// decrease _argc, and "reduce" _argv data. Hence, we must keep a copy of
// _argv that is not modified from the outside to release memory in the
// destructor correctly.
class CmdLineArgs
{
private:
Expand Down Expand Up @@ -359,21 +463,25 @@ int main(int argc, char *argv[])
#ifdef KOKKOS_ENABLE_SERIAL
using Serial = Kokkos::Serial::device_type;
REGISTER_BENCHMARK(ArborX::BVH<Serial>, 1e3, 1e4, 1e5);
REGISTER_TYPE1_BENCHMARK(ArborX::BVH<Serial>, 1e3, 1e4, 1e5);
#endif

#ifdef KOKKOS_ENABLE_OPENMP
using OpenMP = Kokkos::OpenMP::device_type;
REGISTER_BENCHMARK(ArborX::BVH<OpenMP>, 1e3, 1e4, 1e5);
REGISTER_TYPE1_BENCHMARK(ArborX::BVH<OpenMP>, 1e3, 1e4, 1e5);
#endif

#ifdef KOKKOS_ENABLE_THREADS
using Threads = Kokkos::Threads::device_type;
REGISTER_BENCHMARK(ArborX::BVH<Threads>, 1e3, 1e4, 1e5);
REGISTER_TYPE1_BENCHMARK(ArborX::BVH<Threads>, 1e3, 1e4, 1e5);
#endif

#ifdef KOKKOS_ENABLE_CUDA
using Cuda = Kokkos::Cuda::device_type;
REGISTER_BENCHMARK(ArborX::BVH<Cuda>, 1e4, 1e5, 1e6);
REGISTER_TYPE1_BENCHMARK(ArborX::BVH<Cuda>, 1e3, 1e4, 1e5);
#endif

#ifndef ARBORX_PERFORMANCE_TESTING
Expand Down

0 comments on commit b5ad61e

Please sign in to comment.