Skip to content

Commit

Permalink
Migrate ViewHelpers
Browse files Browse the repository at this point in the history
  • Loading branch information
aprokop committed Dec 26, 2023
1 parent 73a9e90 commit 73c8d03
Show file tree
Hide file tree
Showing 21 changed files with 94 additions and 85 deletions.
2 changes: 1 addition & 1 deletion benchmarks/dbscan/ArborX_DBSCANVerification.hpp
Expand Up @@ -273,7 +273,7 @@ bool verifyClusters(ExecutionSpace const &exec_space, IndicesView indices,
{
int n = labels.size();
if ((int)offset.size() != n + 1 ||
KokkosBlah::lastElement(exec_space, offset) != (int)indices.size())
KokkosExt::lastElement(exec_space, offset) != (int)indices.size())
return false;

using Verify = bool (*)(ExecutionSpace const &, IndicesView, OffsetView,
Expand Down
10 changes: 6 additions & 4 deletions benchmarks/dbscan/dbscan_timpl.hpp
Expand Up @@ -50,6 +50,8 @@ void sortAndFilterClusters(ExecutionSpace const &exec_space,
{
Kokkos::Profiling::pushRegion("ArborX::DBSCAN::sortAndFilterClusters");

namespace KokkosExt = ArborX::Details::KokkosExt;

static_assert(Kokkos::is_view<LabelsView>{});
static_assert(Kokkos::is_view<ClusterIndicesView>{});
static_assert(Kokkos::is_view<ClusterOffsetView>{});
Expand Down Expand Up @@ -93,7 +95,7 @@ void sortAndFilterClusters(ExecutionSpace const &exec_space,
auto &map_cluster_to_offset_position = cluster_sizes;
constexpr int IGNORED_CLUSTER = -1;
int num_clusters;
KokkosBlah::reallocWithoutInitializing(exec_space, cluster_offset, n + 1);
KokkosExt::reallocWithoutInitializing(exec_space, cluster_offset, n + 1);
Kokkos::parallel_scan(
"ArborX::DBSCAN::compute_cluster_offset_with_filter",
Kokkos::RangePolicy<ExecutionSpace>(exec_space, 0, n),
Expand All @@ -118,10 +120,10 @@ void sortAndFilterClusters(ExecutionSpace const &exec_space,
Kokkos::resize(Kokkos::WithoutInitializing, cluster_offset, num_clusters + 1);
ArborX::exclusivePrefixSum(exec_space, cluster_offset);

auto cluster_starts = KokkosBlah::clone(exec_space, cluster_offset);
KokkosBlah::reallocWithoutInitializing(
auto cluster_starts = KokkosExt::clone(exec_space, cluster_offset);
KokkosExt::reallocWithoutInitializing(
exec_space, cluster_indices,
KokkosBlah::lastElement(exec_space, cluster_offset));
KokkosExt::lastElement(exec_space, cluster_offset));
Kokkos::parallel_for(
"ArborX::DBSCAN::compute_cluster_indices",
Kokkos::RangePolicy<ExecutionSpace>(exec_space, 0, n),
Expand Down
13 changes: 7 additions & 6 deletions src/details/ArborX_Dendrogram.hpp
Expand Up @@ -49,13 +49,15 @@ struct Dendrogram
{
Kokkos::Profiling::pushRegion("ArborX::Dendrogram::Dendrogram");

namespace KokkosExt = ArborX::Details::KokkosExt;

auto const num_edges = edges.size();
auto const num_vertices = num_edges + 1;

KokkosBlah::reallocWithoutInitializing(exec_space, _parents,
num_edges + num_vertices);
KokkosBlah::reallocWithoutInitializing(exec_space, _parent_heights,
num_edges);
KokkosExt::reallocWithoutInitializing(exec_space, _parents,
num_edges + num_vertices);
KokkosExt::reallocWithoutInitializing(exec_space, _parent_heights,
num_edges);

Kokkos::View<Details::UnweightedEdge *, MemorySpace> unweighted_edges(
Kokkos::view_alloc(exec_space, Kokkos::WithoutInitializing,
Expand All @@ -64,8 +66,7 @@ struct Dendrogram
splitEdges(exec_space, edges, unweighted_edges, _parent_heights);

Kokkos::Profiling::pushRegion("ArborX::Dendrogram::sort_edges");
Details::KokkosExt::sortByKey(exec_space, _parent_heights,
unweighted_edges);
KokkosExt::sortByKey(exec_space, _parent_heights, unweighted_edges);
Kokkos::Profiling::popRegion();

using ConstEdges =
Expand Down
16 changes: 8 additions & 8 deletions src/details/ArborX_DetailsCrsGraphWrapperImpl.hpp
Expand Up @@ -198,7 +198,7 @@ void queryImpl(ExecutionSpace const &space, Tree const &tree,
if (underflow)
{
// Store a copy of the original offset. We'll need it for compression.
preallocated_offset = KokkosBlah::clone(space, offset);
preallocated_offset = KokkosExt::clone(space, offset);
}

Kokkos::parallel_for(
Expand All @@ -207,7 +207,7 @@ void queryImpl(ExecutionSpace const &space, Tree const &tree,
KOKKOS_LAMBDA(int const i) { permuted_offset(i) = counts(i); });
exclusivePrefixSum(space, offset);

int const n_results = KokkosBlah::lastElement(space, offset);
int const n_results = KokkosExt::lastElement(space, offset);

Kokkos::Profiling::popRegion();

Expand Down Expand Up @@ -238,7 +238,7 @@ void queryImpl(ExecutionSpace const &space, Tree const &tree,
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n_queries),
KOKKOS_LAMBDA(int const i) { counts(i) = permuted_offset(i); });

KokkosBlah::reallocWithoutInitializing(space, out, n_results);
KokkosExt::reallocWithoutInitializing(space, out, n_results);

tree.query(
space, permuted_predicates,
Expand Down Expand Up @@ -293,7 +293,7 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space,
OutView &out, int buffer_size)
{
auto const n_queries = predicates.size();
KokkosBlah::reallocWithoutInitializing(space, offset, n_queries + 1);
KokkosExt::reallocWithoutInitializing(space, offset, n_queries + 1);

buffer_size = std::abs(buffer_size);

Expand All @@ -305,7 +305,7 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space,

// Use calculation for the size to avoid calling lastElement(space, offset)
// as it will launch an extra kernel to copy to host.
KokkosBlah::reallocWithoutInitializing(space, out, n_queries * buffer_size);
KokkosExt::reallocWithoutInitializing(space, out, n_queries * buffer_size);
}
}

Expand All @@ -317,7 +317,7 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space,
OutView &out, int /*buffer_size*/)
{
auto const n_queries = predicates.size();
KokkosBlah::reallocWithoutInitializing(space, offset, n_queries + 1);
KokkosExt::reallocWithoutInitializing(space, offset, n_queries + 1);

Kokkos::parallel_for(
"ArborX::CrsGraphWrapper::query::nearest::"
Expand All @@ -326,8 +326,8 @@ allocateAndInitializeStorage(Tag, ExecutionSpace const &space,
KOKKOS_LAMBDA(int i) { offset(i) = getK(predicates(i)); });
exclusivePrefixSum(space, offset);

KokkosBlah::reallocWithoutInitializing(
space, out, KokkosBlah::lastElement(space, offset));
KokkosExt::reallocWithoutInitializing(space, out,
KokkosExt::lastElement(space, offset));
}

// Views are passed by reference here because internally Kokkos::realloc()
Expand Down
16 changes: 8 additions & 8 deletions src/details/ArborX_DetailsDistributedTreeImpl.hpp
Expand Up @@ -176,7 +176,7 @@ struct DistributedTreeImpl
"ArborX::DistributedTree::query::nearest::ranks", 0);
queryDispatchImpl(tag, tree, space, queries, indices, offset, ranks);
auto const n = indices.extent(0);
KokkosBlah::reallocWithoutInitializing(space, values, n);
KokkosExt::reallocWithoutInitializing(space, values, n);
Kokkos::parallel_for(
"ArborX::DistributedTree::query::zip_indices_and_ranks",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n), KOKKOS_LAMBDA(int i) {
Expand Down Expand Up @@ -358,7 +358,7 @@ void DistributedTreeImpl<DeviceType>::deviseStrategy(
// trees as necessary to find k neighbors.
Kokkos::View<int *, DeviceType> new_indices(
Kokkos::view_alloc(space, indices.label()),
KokkosBlah::lastElement(space, new_offset));
KokkosExt::lastElement(space, new_offset));
Kokkos::parallel_for(
"ArborX::DistributedTree::query::truncate_before_forwarding",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n_queries),
Expand Down Expand Up @@ -548,8 +548,8 @@ DistributedTreeImpl<DeviceType>::queryDispatchImpl(

// Unzip
auto const n = out.extent(0);
KokkosBlah::reallocWithoutInitializing(space, indices, n);
KokkosBlah::reallocWithoutInitializing(space, distances, n);
KokkosExt::reallocWithoutInitializing(space, indices, n);
KokkosExt::reallocWithoutInitializing(space, distances, n);
Kokkos::parallel_for(
"ArborX::DistributedTree::query::nearest::split_"
"index_distance_pairs",
Expand Down Expand Up @@ -706,7 +706,7 @@ void DistributedTreeImpl<DeviceType>::forwardQueries(
Distributor<DeviceType> distributor(comm);

int const n_queries = queries.size();
int const n_exports = KokkosBlah::lastElement(space, offset);
int const n_exports = KokkosExt::lastElement(space, offset);
int const n_imports = distributor.createFromSends(space, indices);

static_assert(std::is_same_v<Query, typename Predicates::value_type>);
Expand Down Expand Up @@ -795,7 +795,7 @@ void DistributedTreeImpl<DeviceType>::communicateResultsBack(
MPI_Comm_rank(comm, &comm_rank);

int const n_fwd_queries = offset.extent_int(0) - 1;
int const n_exports = KokkosBlah::lastElement(space, offset);
int const n_exports = KokkosExt::lastElement(space, offset);

// We are assuming here that if the same rank is related to multiple batches
// these batches appear consecutively. Hence, no reordering is necessary.
Expand Down Expand Up @@ -894,7 +894,7 @@ void DistributedTreeImpl<DeviceType>::filterResults(

exclusivePrefixSum(space, new_offset);

int const n_truncated_results = KokkosBlah::lastElement(space, new_offset);
int const n_truncated_results = KokkosExt::lastElement(space, new_offset);
Kokkos::View<int *, DeviceType> new_indices(
Kokkos::view_alloc(space, indices.label()), n_truncated_results);
Kokkos::View<int *, DeviceType> new_ranks(
Expand All @@ -911,7 +911,7 @@ void DistributedTreeImpl<DeviceType>::filterResults(
}
};

int const n_results = KokkosBlah::lastElement(space, offset);
int const n_results = KokkosExt::lastElement(space, offset);
Kokkos::View<PairIndexDistance *, DeviceType> buffer(
Kokkos::view_alloc(
space, Kokkos::WithoutInitializing,
Expand Down
6 changes: 3 additions & 3 deletions src/details/ArborX_DetailsDistributor.hpp
Expand Up @@ -59,7 +59,7 @@ determineBufferLayout(ExecutionSpace const &space, InputView batched_ranks,

auto const n_batched_ranks = batched_ranks.size();
if (n_batched_ranks == 0 ||
KokkosBlah::lastElement(space, batched_offsets) == 0)
KokkosExt::lastElement(space, batched_offsets) == 0)
return;

using DeviceType = typename InputView::traits::device_type;
Expand Down Expand Up @@ -242,8 +242,8 @@ class Distributor

// The next two function calls are the only difference to the other
// overload.
KokkosBlah::reallocWithoutInitializing(space, _permute,
destination_ranks.size());
KokkosExt::reallocWithoutInitializing(space, _permute,
destination_ranks.size());
sortAndDetermineBufferLayout(space, destination_ranks, _permute,
_destinations, _dest_counts, _dest_offsets);

Expand Down
10 changes: 5 additions & 5 deletions src/details/ArborX_DetailsExpandHalfToFull.hpp
Expand Up @@ -29,7 +29,7 @@ void expandHalfToFull(ExecutionSpace const &space, Offsets &offsets,
typename Indices::const_type const indices_orig = indices;

auto const n = offsets.extent(0) - 1;
offsets = KokkosBlah::cloneWithoutInitializingNorCopying(space, offsets_orig);
offsets = KokkosExt::cloneWithoutInitializingNorCopying(space, offsets_orig);
Kokkos::deep_copy(space, offsets, 0);
Kokkos::parallel_for(
"ArborX::Experimental::HalfToFull::count",
Expand All @@ -43,11 +43,11 @@ void expandHalfToFull(ExecutionSpace const &space, Offsets &offsets,
});
exclusivePrefixSum(space, offsets);

auto const m = KokkosBlah::lastElement(space, offsets);
KokkosBlah::reallocWithoutInitializing(space, indices, m);
auto const m = KokkosExt::lastElement(space, offsets);
KokkosExt::reallocWithoutInitializing(space, indices, m);

auto counts = KokkosBlah::clone(space, offsets,
"ArborX::Experimental::HalfToFull::counts");
auto counts = KokkosExt::clone(space, offsets,
"ArborX::Experimental::HalfToFull::counts");
Kokkos::parallel_for(
"ArborX::Experimental::HalfToFull::rewrite",
Kokkos::TeamPolicy<ExecutionSpace>(space, n, Kokkos::AUTO, 1),
Expand Down
6 changes: 3 additions & 3 deletions src/details/ArborX_DetailsFDBSCANDenseBox.hpp
Expand Up @@ -118,7 +118,7 @@ struct FDBSCANDenseBoxCallback
, _dense_cell_offsets(dense_cell_offsets)
, _num_dense_cells(dense_cell_offsets.size() - 1)
, _num_points_in_dense_cells(
KokkosBlah::lastElement(exec_space, _dense_cell_offsets))
KokkosExt::lastElement(exec_space, _dense_cell_offsets))
, _permute(permute)
, eps(eps_in)
{}
Expand Down Expand Up @@ -244,8 +244,8 @@ int reorderDenseAndSparseCells(ExecutionSpace const &exec_space,
Kokkos::deep_copy(exec_space, sparse_offset, num_points_in_dense_cells);

auto reordered_permute =
KokkosBlah::cloneWithoutInitializingNorCopying(exec_space, permute);
auto reordered_cell_indices = KokkosBlah::cloneWithoutInitializingNorCopying(
KokkosExt::cloneWithoutInitializingNorCopying(exec_space, permute);
auto reordered_cell_indices = KokkosExt::cloneWithoutInitializingNorCopying(
exec_space, sorted_cell_indices);
Kokkos::parallel_for(
"ArborX::DBSCAN::reorder_cell_indices_and_permutation",
Expand Down
2 changes: 1 addition & 1 deletion src/details/ArborX_DetailsSortUtils.hpp
Expand Up @@ -131,7 +131,7 @@ void applyPermutation(ExecutionSpace const &space,
PermutationView const &permutation, View &view)
{
static_assert(std::is_integral<typename PermutationView::value_type>::value);
auto scratch_view = KokkosBlah::clone(space, view);
auto scratch_view = KokkosExt::clone(space, view);
applyPermutation(space, permutation, scratch_view, view);
}

Expand Down
2 changes: 1 addition & 1 deletion src/details/ArborX_DetailsTreeTraversal.hpp
Expand Up @@ -159,7 +159,7 @@ struct TreeTraversal<BVH, Predicates, Callback, NearestPredicateTag>
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n_queries),
KOKKOS_CLASS_LAMBDA(int i) { offset(i) = getK(_predicates(i)); });
exclusivePrefixSum(space, offset);
int const buffer_size = KokkosBlah::lastElement(space, offset);
int const buffer_size = KokkosExt::lastElement(space, offset);
// Allocate buffer over which to perform heap operations in
// TreeTraversal::nearestQuery() to store nearest leaf nodes found so far.
// It is not possible to anticipate how much memory to allocate since the
Expand Down
17 changes: 9 additions & 8 deletions src/details/ArborX_DetailsUtils.hpp
Expand Up @@ -268,7 +268,7 @@ template <typename T, typename... P>
lastElement(Kokkos::View<T, P...> const &v)
{
using ExecutionSpace = typename Kokkos::View<T, P...>::execution_space;
return KokkosBlah::lastElement(ExecutionSpace{}, v);
return Details::KokkosExt::lastElement(ExecutionSpace{}, v);
}

/** \brief Fills the view with a sequence of numbers
Expand Down Expand Up @@ -540,38 +540,39 @@ reallocWithoutInitializing(View &v, size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
{
using ExecutionSpace = typename View::execution_space;
KokkosBlah::reallocWithoutInitializing(ExecutionSpace{}, v, n0, n1, n2, n3,
n4, n5, n6, n7);
Details::KokkosExt::reallocWithoutInitializing(ExecutionSpace{}, v, n0, n1,
n2, n3, n4, n5, n6, n7);
}

template <typename View>
[[deprecated]] void
reallocWithoutInitializing(View &v, const typename View::array_layout &layout)
{
using ExecutionSpace = typename View::execution_space;
KokkosBlah::reallocWithoutInitializing(ExecutionSpace{}, v, layout);
Details::KokkosExt::reallocWithoutInitializing(ExecutionSpace{}, v, layout);
}

template <typename View>
[[deprecated]] typename View::non_const_type
cloneWithoutInitializingNorCopying(View &v)
{
using ExecutionSpace = typename View::execution_space;
return KokkosBlah::cloneWithoutInitializingNorCopying(ExecutionSpace{}, v);
return Details::KokkosExt::cloneWithoutInitializingNorCopying(
ExecutionSpace{}, v);
}

template <typename ExecutionSpace, typename View>
[[deprecated]] typename View::non_const_type clone(ExecutionSpace const &space,
View &v)
{
return KokkosBlah::clone(space, v);
return Details::KokkosExt::clone(space, v);
}

template <typename View>
[[deprecated]] inline typename View::non_const_type clone(View &v)
{
using ExecutionSpace = typename View::execution_space;
return KokkosBlah::clone(ExecutionSpace{}, v);
return Details::KokkosExt::clone(ExecutionSpace{}, v);
}

namespace Details
Expand All @@ -589,7 +590,7 @@ void computeOffsetsInOrderedView(ExecutionSpace const &exec_space, View view,
auto const n = view.extent_int(0);

int num_offsets;
KokkosBlah::reallocWithoutInitializing(exec_space, offsets, n + 1);
KokkosExt::reallocWithoutInitializing(exec_space, offsets, n + 1);
Kokkos::parallel_scan(
"ArborX::Algorithms::compute_offsets_in_sorted_view",
Kokkos::RangePolicy<ExecutionSpace>(exec_space, 0, n + 1),
Expand Down
14 changes: 7 additions & 7 deletions src/details/ArborX_MinimumSpanningTree.hpp
Expand Up @@ -153,7 +153,7 @@ struct MinimumSpanningTree

if constexpr (use_lower_bounds)
{
KokkosBlah::reallocWithoutInitializing(space, lower_bounds, n);
KokkosExt::reallocWithoutInitializing(space, lower_bounds, n);
Kokkos::deep_copy(space, lower_bounds, 0);
}

Expand All @@ -168,10 +168,10 @@ struct MinimumSpanningTree
0);
if constexpr (Mode == BoruvkaMode::HDBSCAN)
{
KokkosBlah::reallocWithoutInitializing(space, edges_mapping, n - 1);
KokkosBlah::reallocWithoutInitializing(space, sided_parents, n - 1);
KokkosBlah::reallocWithoutInitializing(space, dendrogram_parents,
2 * n - 1);
KokkosExt::reallocWithoutInitializing(space, edges_mapping, n - 1);
KokkosExt::reallocWithoutInitializing(space, sided_parents, n - 1);
KokkosExt::reallocWithoutInitializing(space, dendrogram_parents,
2 * n - 1);
}

// Boruvka iterations
Expand Down Expand Up @@ -276,8 +276,8 @@ struct MinimumSpanningTree

computeParents(space, edges, sided_parents, dendrogram_parents);

KokkosBlah::reallocWithoutInitializing(space, dendrogram_parent_heights,
n - 1);
KokkosExt::reallocWithoutInitializing(space, dendrogram_parent_heights,
n - 1);
Kokkos::parallel_for(
"ArborX::MST::assign_dendrogram_parent_heights",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n - 1),
Expand Down

0 comments on commit 73c8d03

Please sign in to comment.