/
ArborX_DetailsExpandHalfToFull.hpp
72 lines (62 loc) · 2.94 KB
/
ArborX_DetailsExpandHalfToFull.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/****************************************************************************
* Copyright (c) 2017-2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
* distributed under a BSD 3-clause license. For the licensing terms see *
* the LICENSE file in the top-level directory. *
* *
* SPDX-License-Identifier: BSD-3-Clause *
****************************************************************************/
#ifndef ARBORX_DETAILS_EXPAND_HALF_TO_FULL_HPP
#define ARBORX_DETAILS_EXPAND_HALF_TO_FULL_HPP
#include <ArborX_DetailsKokkosExtViewHelpers.hpp>
#include <ArborX_DetailsUtils.hpp>
#include <Kokkos_Core.hpp>
namespace ArborX::Details
{
template <class ExecutionSpace, class Offsets, class Indices>
void expandHalfToFull(ExecutionSpace const &space, Offsets &offsets,
Indices &indices)
{
Kokkos::Profiling::pushRegion("ArborX::Experimental::HalfToFull");
typename Offsets::const_type const offsets_orig = offsets;
typename Indices::const_type const indices_orig = indices;
auto const n = offsets.extent(0) - 1;
offsets = KokkosBlah::cloneWithoutInitializingNorCopying(space, offsets_orig);
Kokkos::deep_copy(space, offsets, 0);
Kokkos::parallel_for(
"ArborX::Experimental::HalfToFull::count",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n), KOKKOS_LAMBDA(int i) {
for (int j = offsets_orig(i); j < offsets_orig(i + 1); ++j)
{
int const k = indices_orig(j);
Kokkos::atomic_increment(&offsets(i));
Kokkos::atomic_increment(&offsets(k));
}
});
exclusivePrefixSum(space, offsets);
auto const m = KokkosBlah::lastElement(space, offsets);
KokkosBlah::reallocWithoutInitializing(space, indices, m);
auto counts = KokkosBlah::clone(space, offsets,
"ArborX::Experimental::HalfToFull::counts");
Kokkos::parallel_for(
"ArborX::Experimental::HalfToFull::rewrite",
Kokkos::TeamPolicy<ExecutionSpace>(space, n, Kokkos::AUTO, 1),
KOKKOS_LAMBDA(
typename Kokkos::TeamPolicy<ExecutionSpace>::member_type const
&member) {
auto const i = member.league_rank();
auto const first = offsets_orig(i);
auto const last = offsets_orig(i + 1);
Kokkos::parallel_for(
Kokkos::TeamVectorRange(member, last - first), [&](int j) {
int const k = indices_orig(first + j);
indices(Kokkos::atomic_fetch_inc(&counts(i))) = k;
indices(Kokkos::atomic_fetch_inc(&counts(k))) = i;
});
});
Kokkos::Profiling::popRegion();
}
} // namespace ArborX::Details
#endif