Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement APIv2 BruteForce #962

Merged
merged 2 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (c) 2017-2022 by the ArborX authors *
* Copyright (c) 2017-2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
Expand All @@ -10,7 +10,6 @@
****************************************************************************/

#include <ArborX_BruteForce.hpp>
#include <ArborX_HyperBox.hpp>
#include <ArborX_HyperPoint.hpp>
#include <ArborX_HyperSphere.hpp>
#include <ArborX_LinearBVH.hpp>
Expand Down Expand Up @@ -84,7 +83,6 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats)
Placeholder<DIM, FloatingPoint> predicates{nqueries};

using Point = ArborX::ExperimentalHyperGeometry::Point<DIM, FloatingPoint>;
using Box = ArborX::ExperimentalHyperGeometry::Box<DIM, FloatingPoint>;

for (int i = 0; i < nrepeats; i++)
{
Expand All @@ -111,7 +109,11 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats)

{
Kokkos::Timer timer;
ArborX::BruteForce<MemorySpace, Box> brute{space, primitives};
ArborX::BasicBruteForce<MemorySpace,
ArborX::Details::PairIndexVolume<Point>>
brute{space,
ArborX::Details::LegacyValues<decltype(primitives), Point>{
primitives}};

Kokkos::View<int *, ExecutionSpace> indices("Benchmark::indices", 0);
Kokkos::View<int *, ExecutionSpace> offset("Benchmark::offset", 0);
Expand Down
135 changes: 104 additions & 31 deletions src/ArborX_BruteForce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,36 @@
#include <ArborX_DetailsBruteForceImpl.hpp>
#include <ArborX_DetailsCrsGraphWrapperImpl.hpp>
#include <ArborX_DetailsKokkosExtAccessibilityTraits.hpp>
#include <ArborX_DetailsKokkosExtScopedProfileRegion.hpp>
#include <ArborX_DetailsLegacy.hpp>
#include <ArborX_IndexableGetter.hpp>

#include <Kokkos_Core.hpp>

namespace ArborX
{

template <typename MemorySpace, typename BoundingVolume = Box>
class BruteForce
template <typename MemorySpace, typename Value,
typename IndexableGetter = Details::DefaultIndexableGetter,
typename BoundingVolume = ExperimentalHyperGeometry::Box<
GeometryTraits::dimension_v<
std::decay_t<std::invoke_result_t<IndexableGetter, Value>>>,
typename GeometryTraits::coordinate_type<std::decay_t<
std::invoke_result_t<IndexableGetter, Value>>>::type>>
class BasicBruteForce
{
public:
using memory_space = MemorySpace;
static_assert(Kokkos::is_memory_space<MemorySpace>::value);
using size_type = typename MemorySpace::size_type;
using bounding_volume_type = BoundingVolume;
using value_type = Value;

BruteForce() = default;
BasicBruteForce() = default;

template <typename ExecutionSpace, typename Primitives>
BruteForce(ExecutionSpace const &space, Primitives const &primitives);
template <typename ExecutionSpace, typename Values>
BasicBruteForce(ExecutionSpace const &space, Values const &values,
IndexableGetter const &indexable_getter = IndexableGetter());

KOKKOS_FUNCTION
size_type size() const noexcept { return _size; }
Expand Down Expand Up @@ -76,43 +87,103 @@ class BruteForce
private:
size_type _size{0};
bounding_volume_type _bounds;
Kokkos::View<bounding_volume_type *, memory_space> _bounding_volumes;
Kokkos::View<value_type *, memory_space> _values;
IndexableGetter _indexable_getter;
};

template <typename MemorySpace, typename BoundingVolume = Box>
class BruteForce
: public BasicBruteForce<MemorySpace, Details::PairIndexVolume<Box>,
Details::DefaultIndexableGetter, BoundingVolume>
{
using base_type =
BasicBruteForce<MemorySpace, Details::PairIndexVolume<Box>,
Details::DefaultIndexableGetter, BoundingVolume>;

public:
using legacy_tree = void;

using bounding_volume_type = typename base_type::bounding_volume_type;

BruteForce() = default;

template <typename ExecutionSpace, typename Primitives>
BruteForce(ExecutionSpace const &space, Primitives const &primitives)
: base_type(
space,
// Validate the primitives before calling the base constructor
(Details::check_valid_access_traits(PrimitivesTag{}, primitives),
Details::LegacyValues<Primitives, bounding_volume_type>{
primitives}),
Details::DefaultIndexableGetter())
{}

template <typename ExecutionSpace, typename Predicates, typename Callback,
typename Ignore = int>
void query(ExecutionSpace const &space, Predicates const &predicates,
Callback const &callback, Ignore = Ignore()) const
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just omit the last template parameter for now or if we want to have compatibility with BasicBoundingVolumeHierarchy actually allow passing in TraversalPolicy but enforce it being default-constructed.

Copy link
Contributor Author

@aprokop aprokop Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be ok to do that in a different PR. Though, there is a question of backwards compatibility if one just removes the last argument.

{
base_type::query(space, predicates,
Details::LegacyCallbackWrapper<
Callback, typename base_type::value_type>{callback});
}

template <typename ExecutionSpace, typename Predicates,
typename CallbackOrView, typename View, typename... Args>
std::enable_if_t<Kokkos::is_view_v<std::decay_t<View>>>
query(ExecutionSpace const &space, Predicates const &predicates,
CallbackOrView &&callback_or_view, View &&view, Args &&...args) const
{
base_type::query(space, predicates,
std::forward<CallbackOrView>(callback_or_view),
std::forward<View>(view), std::forward<Args>(args)...);
}
};

template <typename MemorySpace, typename BoundingVolume>
template <typename ExecutionSpace, typename Primitives>
BruteForce<MemorySpace, BoundingVolume>::BruteForce(
ExecutionSpace const &space, Primitives const &primitives)
: _size(AccessTraits<Primitives, PrimitivesTag>::size(primitives))
, _bounding_volumes(
Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::BruteForce::bounding_volumes"),
_size)
template <typename MemorySpace, typename Value, typename IndexableGetter,
typename BoundingVolume>
template <typename ExecutionSpace, typename Values>
BasicBruteForce<MemorySpace, Value, IndexableGetter, BoundingVolume>::
BasicBruteForce(ExecutionSpace const &space, Values const &user_values,
IndexableGetter const &indexable_getter)
: _size(AccessTraits<Values, PrimitivesTag>::size(user_values))
, _values(Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::BruteForce::values"),
_size)
, _indexable_getter(indexable_getter)
{
static_assert(
KokkosExt::is_accessible_from<MemorySpace, ExecutionSpace>::value);
// FIXME for now, do not check the return type of get()
Details::check_valid_access_traits<Primitives>(
PrimitivesTag{}, primitives, Details::DoNotCheckGetReturnType());
using Access = AccessTraits<Primitives, PrimitivesTag>;
// FIXME redo with RangeTraits
Details::check_valid_access_traits<Values>(
PrimitivesTag{}, user_values, Details::DoNotCheckGetReturnType());
using Access = AccessTraits<Values, PrimitivesTag>;
static_assert(KokkosExt::is_accessible_from<typename Access::memory_space,
ExecutionSpace>::value,
"Primitives must be accessible from the execution space");
"Values must be accessible from the execution space");

Kokkos::Profiling::pushRegion("ArborX::BruteForce::BruteForce");
KokkosExt::ScopedProfileRegion guard("ArborX::BruteForce::BruteForce");
aprokop marked this conversation as resolved.
Show resolved Hide resolved

Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene(
space, primitives, _bounding_volumes, _bounds);
if (empty())
{
return;
}

Kokkos::Profiling::popRegion();
Details::AccessValues<Values> values{user_values};

Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene(
space, values, _indexable_getter, _values, _bounds);
}

template <typename MemorySpace, typename BoundingVolume>
template <typename MemorySpace, typename Value, typename IndexableGetter,
typename BoundingVolume>
template <typename ExecutionSpace, typename Predicates, typename Callback,
typename Ignore>
void BruteForce<MemorySpace, BoundingVolume>::query(
ExecutionSpace const &space, Predicates const &predicates,
Callback const &callback, Ignore) const
void BasicBruteForce<MemorySpace, Value, IndexableGetter,
BoundingVolume>::query(ExecutionSpace const &space,
Predicates const &predicates,
Callback const &callback,
Ignore) const
{
static_assert(
KokkosExt::is_accessible_from<MemorySpace, ExecutionSpace>::value);
Expand All @@ -124,13 +195,15 @@ void BruteForce<MemorySpace, BoundingVolume>::query(
using Tag = typename Details::AccessTraitsHelper<Access>::tag;
static_assert(std::is_same<Tag, Details::SpatialPredicateTag>{},
"nearest query not implemented yet");
using Value = int;
Details::check_valid_callback<Value>(callback, predicates);

Kokkos::Profiling::pushRegion("ArborX::BruteForce::query::spatial");

Details::BruteForceImpl::query(space, _bounding_volumes, predicates,
callback);
Details::BruteForceImpl::query(
space, predicates, _values,
Details::Indexables<decltype(_values), IndexableGetter>{
_values, _indexable_getter},
callback);

Kokkos::Profiling::popRegion();
}
Expand Down
91 changes: 44 additions & 47 deletions src/details/ArborX_DetailsBruteForceImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,118 +24,115 @@ namespace Details
{
struct BruteForceImpl
{
template <class ExecutionSpace, class Primitives, class BoundingVolumes,
class Bounds>
template <class ExecutionSpace, class Values, class IndexableGetter,
class Nodes, class Bounds>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not a big fan of that name "Nodes" but I understand we need to distinguish form the user-provided input and I don't have a better idea at that time.

static void initializeBoundingVolumesAndReduceBoundsOfTheScene(
ExecutionSpace const &space, Primitives const &primitives,
BoundingVolumes const &bounding_volumes, Bounds &bounds)
ExecutionSpace const &space, Values const &values,
IndexableGetter const &indexable_getter, Nodes const &nodes,
Bounds &bounds)
{
using Access = AccessTraits<Primitives, PrimitivesTag>;

int const n = Access::size(primitives);

Kokkos::parallel_reduce(
"ArborX::BruteForce::BruteForce::"
"initialize_bounding_volumes_and_reduce_bounds",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
"initialize_values_and_reduce_bounds",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, values.size()),
KOKKOS_LAMBDA(int i, Bounds &update) {
nodes(i) = values(i);

using Details::expand;
Bounds bounding_volume{};
expand(bounding_volume, Access::get(primitives, i));
bounding_volumes(i) = bounding_volume;
expand(bounding_volume, indexable_getter(nodes(i)));
update += bounding_volume;
},
Kokkos::Sum<Bounds>{bounds});
}

template <class ExecutionSpace, class Primitives, class Predicates,
class Callback>
static void query(ExecutionSpace const &space, Primitives const &primitives,
Predicates const &predicates, Callback const &callback)
template <class ExecutionSpace, class Predicates, class Values,
class Indexables, class Callback>
static void query(ExecutionSpace const &space, Predicates const &predicates,
Values const &values, Indexables const &indexables,
Callback const &callback)
{
using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
using AccessPrimitives = AccessTraits<Primitives, PrimitivesTag>;
using AccessPredicates = AccessTraits<Predicates, PredicatesTag>;
using PredicateType = typename AccessTraitsHelper<AccessPredicates>::type;
using PrimitiveType = typename AccessTraitsHelper<AccessPrimitives>::type;
using IndexableType = std::decay_t<decltype(indexables(0))>;

int const n_primitives = AccessPrimitives::size(primitives);
int const n_indexables = values.size();
int const n_predicates = AccessPredicates::size(predicates);
int max_scratch_size = TeamPolicy::scratch_size_max(0);
// half of the scratch memory used by predicates and half for primitives
// half of the scratch memory used by predicates and half for indexables
int const predicates_per_team =
max_scratch_size / 2 / sizeof(PredicateType);
int const primitives_per_team =
max_scratch_size / 2 / sizeof(PrimitiveType);
int const indexables_per_team =
max_scratch_size / 2 / sizeof(IndexableType);
ARBORX_ASSERT(predicates_per_team > 0);
ARBORX_ASSERT(primitives_per_team > 0);
ARBORX_ASSERT(indexables_per_team > 0);

int const n_primitive_tiles =
std::ceil((float)n_primitives / primitives_per_team);
int const n_indexable_tiles =
std::ceil((float)n_indexables / indexables_per_team);
int const n_predicate_tiles =
std::ceil((float)n_predicates / predicates_per_team);
int const n_teams = n_primitive_tiles * n_predicate_tiles;
int const n_teams = n_indexable_tiles * n_predicate_tiles;

using ScratchPredicateType =
Kokkos::View<PredicateType *,
typename ExecutionSpace::scratch_memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
using ScratchPrimitiveType =
Kokkos::View<PrimitiveType *,
using ScratchIndexableType =
Kokkos::View<IndexableType *,
typename ExecutionSpace::scratch_memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
int scratch_size = ScratchPredicateType::shmem_size(predicates_per_team) +
ScratchPrimitiveType::shmem_size(primitives_per_team);
ScratchIndexableType::shmem_size(indexables_per_team);

Kokkos::parallel_for(
"ArborX::BruteForce::query::spatial::"
"check_all_predicates_against_all_primitives",
"check_all_predicates_against_all_indexables",
TeamPolicy(space, n_teams, Kokkos::AUTO, 1)
.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
KOKKOS_LAMBDA(typename TeamPolicy::member_type const &teamMember) {
// select the tiles of predicates/primitives checked by each team
// select the tiles of predicates/indexables checked by each team
int predicate_start = predicates_per_team *
(teamMember.league_rank() / n_primitive_tiles);
int primitive_start = primitives_per_team *
(teamMember.league_rank() % n_primitive_tiles);
(teamMember.league_rank() / n_indexable_tiles);
int indexable_start = indexables_per_team *
(teamMember.league_rank() % n_indexable_tiles);

int predicates_in_this_team = KokkosExt::min(
predicates_per_team, n_predicates - predicate_start);
int primitives_in_this_team = KokkosExt::min(
primitives_per_team, n_primitives - primitive_start);
int indexables_in_this_team = KokkosExt::min(
indexables_per_team, n_indexables - indexable_start);

ScratchPredicateType scratch_predicates(teamMember.team_scratch(0),
predicates_per_team);
ScratchPrimitiveType scratch_primitives(teamMember.team_scratch(0),
primitives_per_team);
// fill the scratch space with the predicates / primitives in the tile
ScratchIndexableType scratch_indexables(teamMember.team_scratch(0),
indexables_per_team);
// fill the scratch space with the predicates / indexables in the tile
Kokkos::parallel_for(
Kokkos::TeamVectorRange(teamMember, predicates_in_this_team),
[&](const int q) {
scratch_predicates(q) =
AccessPredicates::get(predicates, predicate_start + q);
});
Kokkos::parallel_for(
Kokkos::TeamVectorRange(teamMember, primitives_in_this_team),
Kokkos::TeamVectorRange(teamMember, indexables_in_this_team),
[&](const int j) {
scratch_primitives(j) =
AccessPrimitives::get(primitives, primitive_start + j);
scratch_indexables(j) = indexables(indexable_start + j);
});
teamMember.team_barrier();

// start threads for every predicate / primitive combination
// start threads for every predicate / indexable combination
Kokkos::parallel_for(
Kokkos::TeamThreadRange(teamMember, primitives_in_this_team),
Kokkos::TeamThreadRange(teamMember, indexables_in_this_team),
[&](int j) {
Kokkos::parallel_for(
Kokkos::ThreadVectorRange(teamMember,
predicates_in_this_team),
[&](const int q) {
auto const &predicate = scratch_predicates(q);
auto const &primitive = scratch_primitives(j);
if (predicate(primitive))
auto const &indexable = scratch_indexables(j);
if (predicate(indexable))
{
callback(predicate, j + primitive_start);
callback(predicate, values(indexable_start + j));
}
});
});
Expand Down
Loading