forked from arborx/ArborX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ArborX_DetailsDistributedTreeNearest.hpp
244 lines (210 loc) · 10.4 KB
/
ArborX_DetailsDistributedTreeNearest.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/****************************************************************************
* Copyright (c) 2017-2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
* distributed under a BSD 3-clause license. For the licensing terms see *
* the LICENSE file in the top-level directory. *
* *
* SPDX-License-Identifier: BSD-3-Clause *
****************************************************************************/
#ifndef ARBORX_DETAILS_DISTRIBUTED_TREE_NEAREST_HPP
#define ARBORX_DETAILS_DISTRIBUTED_TREE_NEAREST_HPP
#include <ArborX_DetailsDistributedTreeImpl.hpp>
#include <ArborX_DetailsDistributedTreeNearestHelpers.hpp>
#include <ArborX_DetailsDistributedTreeUtils.hpp>
#include <ArborX_DetailsKokkosExtKernelStdAlgorithms.hpp>
#include <ArborX_DetailsKokkosExtMinMaxOperations.hpp>
#include <ArborX_DetailsKokkosExtStdAlgorithms.hpp>
#include <ArborX_DetailsKokkosExtViewHelpers.hpp>
#include <ArborX_Predicates.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
// Don't really need it, but our self containment tests rely on its presence
#include <mpi.h>
namespace ArborX::Details
{
template <typename Value>
struct PairValueDistance
{
Value value;
float distance;
};
template <typename ExecutionSpace, typename Tree, typename Predicates,
typename Distances>
void DistributedTreeImpl::phaseI(ExecutionSpace const &space, Tree const &tree,
Predicates const &predicates,
Distances &farthest_distances)
{
std::string prefix = "ArborX::DistributedTree::query::nearest::phaseI";
Kokkos::Profiling::ScopedRegion guard(prefix);
using namespace DistributedTree;
using MemorySpace = typename Tree::memory_space;
auto comm = tree.getComm();
// Find the k nearest local trees.
Kokkos::View<int *, MemorySpace> offset(prefix + "::offset", 0);
Kokkos::View<int *, MemorySpace> nearest_ranks(prefix + "::nearest_ranks", 0);
tree._top_tree.query(space, predicates, nearest_ranks, offset);
// Accumulate total leave count in the local trees until it reaches k which
// is the number of neighbors queried for. Stop if local trees get
// empty because it means that there are no more leaves and there is no point
// in forwarding predicates to leafless trees.
auto const n_predicates = predicates.size();
auto const &bottom_tree_sizes = tree._bottom_tree_sizes;
Kokkos::View<int *, MemorySpace> new_offset(
Kokkos::view_alloc(space, offset.label()), n_predicates + 1);
Kokkos::parallel_for(
"ArborX::DistributedTree::query::nearest::"
"bottom_trees_with_required_cumulated_leaves_count",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n_predicates),
KOKKOS_LAMBDA(int i) {
int leaves_count = 0;
int const n_nearest_neighbors = getK(predicates(i));
for (int j = offset(i); j < offset(i + 1); ++j)
{
int const bottom_tree_size = bottom_tree_sizes(nearest_ranks(j));
if ((bottom_tree_size == 0) || (leaves_count >= n_nearest_neighbors))
break;
leaves_count += bottom_tree_size;
++new_offset(i);
}
});
KokkosExt::exclusive_scan(space, new_offset, new_offset, 0);
// Truncate results so that predicates will only be forwarded to as many local
// trees as necessary to find k neighbors.
Kokkos::View<int *, MemorySpace> new_nearest_ranks(
Kokkos::view_alloc(space, nearest_ranks.label()),
KokkosExt::lastElement(space, new_offset));
Kokkos::parallel_for(
prefix + "::truncate_before_forwarding",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n_predicates),
KOKKOS_LAMBDA(int i) {
for (int j = 0; j < new_offset(i + 1) - new_offset(i); ++j)
new_nearest_ranks(new_offset(i) + j) = nearest_ranks(offset(i) + j);
});
offset = new_offset;
nearest_ranks = new_nearest_ranks;
auto const &bottom_tree = tree._bottom_tree;
using BottomTree = std::decay_t<decltype(bottom_tree)>;
// Gather distances from every identified rank
Kokkos::View<float *, MemorySpace> distances(prefix + "::distances", 0);
forwardQueriesAndCommunicateResults(
comm, space, bottom_tree, predicates,
CallbackWithDistance<BottomTree, DefaultCallback, float, false>(
space, bottom_tree, DefaultCallback{}),
nearest_ranks, offset, distances);
// Postprocess distances to find the k-th farthest
Kokkos::parallel_for(
prefix + "::compute_farthest_distances",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, predicates.size()),
KOKKOS_LAMBDA(int i) {
auto const num_distances = offset(i + 1) - offset(i);
if (num_distances == 0)
return;
auto const k = KokkosExt::min(getK(predicates(i)), num_distances) - 1;
auto *begin = distances.data() + offset(i);
KokkosExt::nth_element(begin, begin + k, begin + num_distances);
farthest_distances(i) = *(begin + k);
});
}
template <typename ExecutionSpace, typename Tree, typename Predicates,
typename Callback, typename Distances, typename Offset,
typename Values>
void DistributedTreeImpl::phaseII(ExecutionSpace const &space, Tree const &tree,
Predicates const &predicates,
Callback const &callback,
Distances &distances, Offset &offset,
Values &values)
{
std::string prefix = "ArborX::DistributedTree::query::nearest::phaseII";
Kokkos::Profiling::ScopedRegion guard(prefix);
using MemorySpace = typename Tree::memory_space;
Kokkos::View<int *, MemorySpace> nearest_ranks(prefix + "::nearest_ranks", 0);
tree._top_tree.query(space,
WithinDistanceFromPredicates<Predicates, Distances>{
predicates, distances},
nearest_ranks, offset);
auto const &bottom_tree = tree._bottom_tree;
using BottomTree = std::decay_t<decltype(bottom_tree)>;
// NOTE: in principle, we could perform radius searches on the bottom_tree
// rather than nearest predicates.
Kokkos::View<PairValueDistance<typename Values::value_type> *, MemorySpace>
out(prefix + "::pairs_value_distance", 0);
Kokkos::View<int *, MemorySpace> ranks(prefix + "::ranks", 0);
DistributedTree::forwardQueriesAndCommunicateResults(
tree.getComm(), space, bottom_tree, predicates,
CallbackWithDistance<BottomTree, Callback, typename Values::value_type,
true>(space, bottom_tree, callback),
nearest_ranks, offset, out, &ranks);
// Unzip
auto n = out.extent(0);
KokkosExt::reallocWithoutInitializing(space, values, n);
KokkosExt::reallocWithoutInitializing(space, distances, n);
Kokkos::parallel_for(
prefix + "::split_index_distance_pairs",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n), KOKKOS_LAMBDA(int i) {
values(i) = out(i).value;
distances(i) = out(i).distance;
});
DistributedTree::filterResults(space, predicates, distances, values, offset);
}
template <typename Tree, typename ExecutionSpace, typename Predicates,
typename Callback, typename Values, typename Offset>
std::enable_if_t<Kokkos::is_view_v<Values> && Kokkos::is_view_v<Offset>>
DistributedTreeImpl::queryDispatchImpl(NearestPredicateTag, Tree const &tree,
ExecutionSpace const &space,
Predicates const &predicates,
Callback const &callback, Values &values,
Offset &offset)
{
std::string prefix = "ArborX::DistributedTree::query::nearest";
Kokkos::Profiling::ScopedRegion guard(prefix);
if (tree.empty())
{
KokkosExt::reallocWithoutInitializing(space, values, 0);
KokkosExt::reallocWithoutInitializing(space, offset, predicates.size() + 1);
Kokkos::deep_copy(space, offset, 0);
return;
}
Kokkos::View<float *, typename Tree::memory_space> farthest_distances(
Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
prefix + "::farthest_distances"),
predicates.size());
// In the first phase, the predicates are sent to as many ranks as necessary
// to guarantee that all k neighbors queried for are found. The farthest
// distances are determined to reduce the communication in the second phase.
phaseI(space, tree, predicates, farthest_distances);
// In the second phase, predicates are sent again to all ranks that may have a
// neighbor closer to the farthest neighbor identified in the first pass. It
// is guaranteed that the nearest k neighbors are within that distance.
//
// The current implementation discards the results after the first phase.
// Everything is recomputed from scratch instead of just searching for
// potential better neighbors and updating the list.
phaseII(space, tree, predicates, callback, farthest_distances, offset,
values);
}
template <typename Tree, typename ExecutionSpace, typename Predicates,
typename Values, typename Offset>
std::enable_if_t<Kokkos::is_view_v<Values> && Kokkos::is_view_v<Offset>>
DistributedTreeImpl::queryDispatch(NearestPredicateTag tag, Tree const &tree,
ExecutionSpace const &space,
Predicates const &predicates, Values &values,
Offset &offset)
{
queryDispatchImpl(tag, tree, space, predicates, DefaultCallback{}, values,
offset);
}
template <typename Tree, typename ExecutionSpace, typename Predicates,
typename Callback, typename Values, typename Offset>
std::enable_if_t<Kokkos::is_view_v<Values> && Kokkos::is_view_v<Offset>>
DistributedTreeImpl::queryDispatch(NearestPredicateTag tag, Tree const &tree,
ExecutionSpace const &space,
Predicates const &predicates,
Callback const &callback, Values &values,
Offset &offset)
{
queryDispatchImpl(tag, tree, space, predicates, callback, values, offset);
}
} // namespace ArborX::Details
#endif