Skip to content

Commit

Permalink
Merge pull request #962 from aprokop/brute_force_value_reb
Browse files Browse the repository at this point in the history
Implement APIv2 BruteForce
  • Loading branch information
aprokop committed Oct 30, 2023
2 parents 5c1501e + fab0039 commit 48e13e7
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 84 deletions.
10 changes: 6 additions & 4 deletions benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (c) 2017-2022 by the ArborX authors *
* Copyright (c) 2017-2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
Expand All @@ -10,7 +10,6 @@
****************************************************************************/

#include <ArborX_BruteForce.hpp>
#include <ArborX_HyperBox.hpp>
#include <ArborX_HyperPoint.hpp>
#include <ArborX_HyperSphere.hpp>
#include <ArborX_LinearBVH.hpp>
Expand Down Expand Up @@ -84,7 +83,6 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats)
Placeholder<DIM, FloatingPoint> predicates{nqueries};

using Point = ArborX::ExperimentalHyperGeometry::Point<DIM, FloatingPoint>;
using Box = ArborX::ExperimentalHyperGeometry::Box<DIM, FloatingPoint>;

for (int i = 0; i < nrepeats; i++)
{
Expand All @@ -111,7 +109,11 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats)

{
Kokkos::Timer timer;
ArborX::BruteForce<MemorySpace, Box> brute{space, primitives};
ArborX::BasicBruteForce<MemorySpace,
ArborX::Details::PairIndexVolume<Point>>
brute{space,
ArborX::Details::LegacyValues<decltype(primitives), Point>{
primitives}};

Kokkos::View<int *, ExecutionSpace> indices("Benchmark::indices", 0);
Kokkos::View<int *, ExecutionSpace> offset("Benchmark::offset", 0);
Expand Down
135 changes: 104 additions & 31 deletions src/ArborX_BruteForce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,36 @@
#include <ArborX_DetailsBruteForceImpl.hpp>
#include <ArborX_DetailsCrsGraphWrapperImpl.hpp>
#include <ArborX_DetailsKokkosExtAccessibilityTraits.hpp>
#include <ArborX_DetailsKokkosExtScopedProfileRegion.hpp>
#include <ArborX_DetailsLegacy.hpp>
#include <ArborX_IndexableGetter.hpp>

#include <Kokkos_Core.hpp>

namespace ArborX
{

template <typename MemorySpace, typename BoundingVolume = Box>
class BruteForce
template <typename MemorySpace, typename Value,
typename IndexableGetter = Details::DefaultIndexableGetter,
typename BoundingVolume = ExperimentalHyperGeometry::Box<
GeometryTraits::dimension_v<
std::decay_t<std::invoke_result_t<IndexableGetter, Value>>>,
typename GeometryTraits::coordinate_type<std::decay_t<
std::invoke_result_t<IndexableGetter, Value>>>::type>>
class BasicBruteForce
{
public:
using memory_space = MemorySpace;
static_assert(Kokkos::is_memory_space<MemorySpace>::value);
using size_type = typename MemorySpace::size_type;
using bounding_volume_type = BoundingVolume;
using value_type = Value;

BruteForce() = default;
BasicBruteForce() = default;

template <typename ExecutionSpace, typename Primitives>
BruteForce(ExecutionSpace const &space, Primitives const &primitives);
template <typename ExecutionSpace, typename Values>
BasicBruteForce(ExecutionSpace const &space, Values const &values,
IndexableGetter const &indexable_getter = IndexableGetter());

KOKKOS_FUNCTION
size_type size() const noexcept { return _size; }
Expand Down Expand Up @@ -76,43 +87,103 @@ class BruteForce
private:
size_type _size{0};
bounding_volume_type _bounds;
Kokkos::View<bounding_volume_type *, memory_space> _bounding_volumes;
Kokkos::View<value_type *, memory_space> _values;
IndexableGetter _indexable_getter;
};

template <typename MemorySpace, typename BoundingVolume = Box>
class BruteForce
: public BasicBruteForce<MemorySpace, Details::PairIndexVolume<Box>,
Details::DefaultIndexableGetter, BoundingVolume>
{
using base_type =
BasicBruteForce<MemorySpace, Details::PairIndexVolume<Box>,
Details::DefaultIndexableGetter, BoundingVolume>;

public:
using legacy_tree = void;

using bounding_volume_type = typename base_type::bounding_volume_type;

BruteForce() = default;

template <typename ExecutionSpace, typename Primitives>
BruteForce(ExecutionSpace const &space, Primitives const &primitives)
: base_type(
space,
// Validate the primitives before calling the base constructor
(Details::check_valid_access_traits(PrimitivesTag{}, primitives),
Details::LegacyValues<Primitives, bounding_volume_type>{
primitives}),
Details::DefaultIndexableGetter())
{}

template <typename ExecutionSpace, typename Predicates, typename Callback,
typename Ignore = int>
void query(ExecutionSpace const &space, Predicates const &predicates,
Callback const &callback, Ignore = Ignore()) const
{
base_type::query(space, predicates,
Details::LegacyCallbackWrapper<
Callback, typename base_type::value_type>{callback});
}

template <typename ExecutionSpace, typename Predicates,
typename CallbackOrView, typename View, typename... Args>
std::enable_if_t<Kokkos::is_view_v<std::decay_t<View>>>
query(ExecutionSpace const &space, Predicates const &predicates,
CallbackOrView &&callback_or_view, View &&view, Args &&...args) const
{
base_type::query(space, predicates,
std::forward<CallbackOrView>(callback_or_view),
std::forward<View>(view), std::forward<Args>(args)...);
}
};

template <typename MemorySpace, typename BoundingVolume>
template <typename ExecutionSpace, typename Primitives>
BruteForce<MemorySpace, BoundingVolume>::BruteForce(
ExecutionSpace const &space, Primitives const &primitives)
: _size(AccessTraits<Primitives, PrimitivesTag>::size(primitives))
, _bounding_volumes(
Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::BruteForce::bounding_volumes"),
_size)
template <typename MemorySpace, typename Value, typename IndexableGetter,
typename BoundingVolume>
template <typename ExecutionSpace, typename Values>
BasicBruteForce<MemorySpace, Value, IndexableGetter, BoundingVolume>::
BasicBruteForce(ExecutionSpace const &space, Values const &user_values,
IndexableGetter const &indexable_getter)
: _size(AccessTraits<Values, PrimitivesTag>::size(user_values))
, _values(Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::BruteForce::values"),
_size)
, _indexable_getter(indexable_getter)
{
static_assert(
KokkosExt::is_accessible_from<MemorySpace, ExecutionSpace>::value);
// FIXME for now, do not check the return type of get()
Details::check_valid_access_traits<Primitives>(
PrimitivesTag{}, primitives, Details::DoNotCheckGetReturnType());
using Access = AccessTraits<Primitives, PrimitivesTag>;
// FIXME redo with RangeTraits
Details::check_valid_access_traits<Values>(
PrimitivesTag{}, user_values, Details::DoNotCheckGetReturnType());
using Access = AccessTraits<Values, PrimitivesTag>;
static_assert(KokkosExt::is_accessible_from<typename Access::memory_space,
ExecutionSpace>::value,
"Primitives must be accessible from the execution space");
"Values must be accessible from the execution space");

Kokkos::Profiling::pushRegion("ArborX::BruteForce::BruteForce");
KokkosExt::ScopedProfileRegion guard("ArborX::BruteForce::BruteForce");

Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene(
space, primitives, _bounding_volumes, _bounds);
if (empty())
{
return;
}

Kokkos::Profiling::popRegion();
Details::AccessValues<Values> values{user_values};

Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene(
space, values, _indexable_getter, _values, _bounds);
}

template <typename MemorySpace, typename BoundingVolume>
template <typename MemorySpace, typename Value, typename IndexableGetter,
typename BoundingVolume>
template <typename ExecutionSpace, typename Predicates, typename Callback,
typename Ignore>
void BruteForce<MemorySpace, BoundingVolume>::query(
ExecutionSpace const &space, Predicates const &predicates,
Callback const &callback, Ignore) const
void BasicBruteForce<MemorySpace, Value, IndexableGetter,
BoundingVolume>::query(ExecutionSpace const &space,
Predicates const &predicates,
Callback const &callback,
Ignore) const
{
static_assert(
KokkosExt::is_accessible_from<MemorySpace, ExecutionSpace>::value);
Expand All @@ -124,13 +195,15 @@ void BruteForce<MemorySpace, BoundingVolume>::query(
using Tag = typename Details::AccessTraitsHelper<Access>::tag;
static_assert(std::is_same<Tag, Details::SpatialPredicateTag>{},
"nearest query not implemented yet");
using Value = int;
Details::check_valid_callback<Value>(callback, predicates);

Kokkos::Profiling::pushRegion("ArborX::BruteForce::query::spatial");

Details::BruteForceImpl::query(space, _bounding_volumes, predicates,
callback);
Details::BruteForceImpl::query(
space, predicates, _values,
Details::Indexables<decltype(_values), IndexableGetter>{
_values, _indexable_getter},
callback);

Kokkos::Profiling::popRegion();
}
Expand Down
91 changes: 44 additions & 47 deletions src/details/ArborX_DetailsBruteForceImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,118 +24,115 @@ namespace Details
{
struct BruteForceImpl
{
template <class ExecutionSpace, class Primitives, class BoundingVolumes,
class Bounds>
template <class ExecutionSpace, class Values, class IndexableGetter,
class Nodes, class Bounds>
static void initializeBoundingVolumesAndReduceBoundsOfTheScene(
ExecutionSpace const &space, Primitives const &primitives,
BoundingVolumes const &bounding_volumes, Bounds &bounds)
ExecutionSpace const &space, Values const &values,
IndexableGetter const &indexable_getter, Nodes const &nodes,
Bounds &bounds)
{
using Access = AccessTraits<Primitives, PrimitivesTag>;

int const n = Access::size(primitives);

Kokkos::parallel_reduce(
"ArborX::BruteForce::BruteForce::"
"initialize_bounding_volumes_and_reduce_bounds",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
"initialize_values_and_reduce_bounds",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, values.size()),
KOKKOS_LAMBDA(int i, Bounds &update) {
nodes(i) = values(i);

using Details::expand;
Bounds bounding_volume{};
expand(bounding_volume, Access::get(primitives, i));
bounding_volumes(i) = bounding_volume;
expand(bounding_volume, indexable_getter(nodes(i)));
update += bounding_volume;
},
Kokkos::Sum<Bounds>{bounds});
}

template <class ExecutionSpace, class Primitives, class Predicates,
class Callback>
static void query(ExecutionSpace const &space, Primitives const &primitives,
Predicates const &predicates, Callback const &callback)
template <class ExecutionSpace, class Predicates, class Values,
class Indexables, class Callback>
static void query(ExecutionSpace const &space, Predicates const &predicates,
Values const &values, Indexables const &indexables,
Callback const &callback)
{
using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
using AccessPrimitives = AccessTraits<Primitives, PrimitivesTag>;
using AccessPredicates = AccessTraits<Predicates, PredicatesTag>;
using PredicateType = typename AccessTraitsHelper<AccessPredicates>::type;
using PrimitiveType = typename AccessTraitsHelper<AccessPrimitives>::type;
using IndexableType = std::decay_t<decltype(indexables(0))>;

int const n_primitives = AccessPrimitives::size(primitives);
int const n_indexables = values.size();
int const n_predicates = AccessPredicates::size(predicates);
int max_scratch_size = TeamPolicy::scratch_size_max(0);
// half of the scratch memory used by predicates and half for primitives
// half of the scratch memory used by predicates and half for indexables
int const predicates_per_team =
max_scratch_size / 2 / sizeof(PredicateType);
int const primitives_per_team =
max_scratch_size / 2 / sizeof(PrimitiveType);
int const indexables_per_team =
max_scratch_size / 2 / sizeof(IndexableType);
ARBORX_ASSERT(predicates_per_team > 0);
ARBORX_ASSERT(primitives_per_team > 0);
ARBORX_ASSERT(indexables_per_team > 0);

int const n_primitive_tiles =
std::ceil((float)n_primitives / primitives_per_team);
int const n_indexable_tiles =
std::ceil((float)n_indexables / indexables_per_team);
int const n_predicate_tiles =
std::ceil((float)n_predicates / predicates_per_team);
int const n_teams = n_primitive_tiles * n_predicate_tiles;
int const n_teams = n_indexable_tiles * n_predicate_tiles;

using ScratchPredicateType =
Kokkos::View<PredicateType *,
typename ExecutionSpace::scratch_memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
using ScratchPrimitiveType =
Kokkos::View<PrimitiveType *,
using ScratchIndexableType =
Kokkos::View<IndexableType *,
typename ExecutionSpace::scratch_memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
int scratch_size = ScratchPredicateType::shmem_size(predicates_per_team) +
ScratchPrimitiveType::shmem_size(primitives_per_team);
ScratchIndexableType::shmem_size(indexables_per_team);

Kokkos::parallel_for(
"ArborX::BruteForce::query::spatial::"
"check_all_predicates_against_all_primitives",
"check_all_predicates_against_all_indexables",
TeamPolicy(space, n_teams, Kokkos::AUTO, 1)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
KOKKOS_LAMBDA(typename TeamPolicy::member_type const &teamMember) {
// select the tiles of predicates/primitives checked by each team
// select the tiles of predicates/indexables checked by each team
int predicate_start = predicates_per_team *
(teamMember.league_rank() / n_primitive_tiles);
int primitive_start = primitives_per_team *
(teamMember.league_rank() % n_primitive_tiles);
(teamMember.league_rank() / n_indexable_tiles);
int indexable_start = indexables_per_team *
(teamMember.league_rank() % n_indexable_tiles);

int predicates_in_this_team = KokkosExt::min(
predicates_per_team, n_predicates - predicate_start);
int primitives_in_this_team = KokkosExt::min(
primitives_per_team, n_primitives - primitive_start);
int indexables_in_this_team = KokkosExt::min(
indexables_per_team, n_indexables - indexable_start);

ScratchPredicateType scratch_predicates(teamMember.team_scratch(0),
predicates_per_team);
ScratchPrimitiveType scratch_primitives(teamMember.team_scratch(0),
primitives_per_team);
// fill the scratch space with the predicates / primitives in the tile
ScratchIndexableType scratch_indexables(teamMember.team_scratch(0),
indexables_per_team);
// fill the scratch space with the predicates / indexables in the tile
Kokkos::parallel_for(
Kokkos::TeamVectorRange(teamMember, predicates_in_this_team),
[&](const int q) {
scratch_predicates(q) =
AccessPredicates::get(predicates, predicate_start + q);
});
Kokkos::parallel_for(
Kokkos::TeamVectorRange(teamMember, primitives_in_this_team),
Kokkos::TeamVectorRange(teamMember, indexables_in_this_team),
[&](const int j) {
scratch_primitives(j) =
AccessPrimitives::get(primitives, primitive_start + j);
scratch_indexables(j) = indexables(indexable_start + j);
});
teamMember.team_barrier();

// start threads for every predicate / primitive combination
// start threads for every predicate / indexable combination
Kokkos::parallel_for(
Kokkos::TeamThreadRange(teamMember, primitives_in_this_team),
Kokkos::TeamThreadRange(teamMember, indexables_in_this_team),
[&](int j) {
Kokkos::parallel_for(
Kokkos::ThreadVectorRange(teamMember,
predicates_in_this_team),
[&](const int q) {
auto const &predicate = scratch_predicates(q);
auto const &primitive = scratch_primitives(j);
if (predicate(primitive))
auto const &indexable = scratch_indexables(j);
if (predicate(indexable))
{
callback(predicate, j + primitive_start);
callback(predicate, values(indexable_start + j));
}
});
});
Expand Down
Loading

0 comments on commit 48e13e7

Please sign in to comment.