Skip to content

Commit 1124d89

Browse files
committed
v1.9.1 reduce memory usage for optimizing paths in a graph
1 parent dca53e0 commit 1124d89

File tree

5 files changed

+158
-34
lines changed

5 files changed

+158
-34
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.9.0
1+
1.9.1

bin/ngt/ngt.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ main(int argc, char **argv)
9494
NGT::Optimizer::evaluate(args);
9595
} else if (command == "optimize-search-parameters") {
9696
ngt.optimizeSearchParameters(args);
97+
} else if (command == "refine-anng") {
98+
ngt.refineANNG(args);
9799
#ifndef NGT_SHARED_MEMORY_ALLOCATOR
98100
} else if (command == "extract-query") {
99101
NGT::Optimizer::extractQueries(args);

lib/NGT/Command.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,90 @@ using namespace std;
855855
}
856856
}
857857

858+
void
859+
NGT::Command::refineANNG(Args &args)
860+
{
861+
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
862+
std::cerr << "refineANNG. Not implemented." << std::endl;
863+
abort();
864+
#else
865+
const string usage = "Usage: ngt refine-anng anng-index";
866+
867+
string inIndexPath;
868+
try {
869+
inIndexPath = args.get("#1");
870+
} catch (...) {
871+
cerr << "Input index is not specified" << endl;
872+
cerr << usage << endl;
873+
return;
874+
}
875+
876+
string outIndexPath;
877+
try {
878+
outIndexPath = args.get("#2");
879+
} catch (...) {
880+
cerr << "Output index is not specified" << endl;
881+
cerr << usage << endl;
882+
return;
883+
}
884+
885+
NGT::Index index(inIndexPath);
886+
auto prop = static_cast<GraphIndex&>(index.getIndex()).getGraphProperty();
887+
NGT::ObjectRepository &objectRepository = index.getObjectSpace().getRepository();
888+
NGT::GraphIndex &graphIndex = static_cast<GraphIndex&>(index.getIndex());
889+
size_t nOfObjects = objectRepository.size();
890+
size_t batchSize = 10000;
891+
for (size_t bid = 1; bid < nOfObjects; bid += batchSize) {
892+
NGT::ObjectDistances results[batchSize];
893+
// search
894+
#pragma omp parallel for
895+
for (size_t idx = 0; idx < batchSize; idx++) {
896+
size_t id = bid + idx;
897+
if (objectRepository.isEmpty(id)) {
898+
continue;
899+
}
900+
NGT::SearchContainer searchContainer(*objectRepository.get(id));
901+
searchContainer.setResults(&results[idx]);
902+
searchContainer.setSize(prop.edgeSizeForCreation);
903+
searchContainer.setEpsilon(0.1); // epsilon should be adjusted.
904+
searchContainer.setEdgeSize(0); // use all of the existing edges to obtain high accuracy.
905+
index.search(searchContainer);
906+
}
907+
// outgoing edges
908+
#pragma omp parallel for
909+
for (size_t idx = 0; idx < batchSize; idx++) {
910+
size_t id = bid + idx;
911+
NGT::GraphNode &node = *graphIndex.getNode(id);
912+
for (auto i = results[idx].begin(); i != results[idx].end(); ++i) {
913+
node.push_back(*i);
914+
}
915+
std::sort(node.begin(), node.end());
916+
ObjectID prev = 0;
917+
for (GraphNode::iterator ni = node.begin(); ni != node.end();) {
918+
if (prev == (*ni).id) {
919+
ni = node.erase(ni);
920+
continue;
921+
}
922+
prev = (*ni).id;
923+
ni++;
924+
}
925+
}
926+
// incomming edges
927+
for (size_t idx = 0; idx < batchSize; idx++) {
928+
size_t id = bid + idx;
929+
if (id % 10000 == 0) {
930+
std::cerr << "# of processed objects=" << id << std::endl;
931+
}
932+
for (auto i = results[idx].begin(); i != results[idx].end(); ++i) {
933+
NGT::GraphNode &node = *graphIndex.getNode((*i).id);
934+
graphIndex.addEdge(node, id, (*i).distance, false);
935+
}
936+
}
937+
}
938+
939+
index.saveIndex(outIndexPath);
940+
#endif
941+
}
858942

859943

860944

lib/NGT/Command.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ class Command {
108108
void prune(Args &args);
109109
void reconstructGraph(Args &args);
110110
void optimizeSearchParameters(Args &args);
111+
void refineANNG(Args &args);
111112

112113
void info(Args &args);
113114
void setDebugLevel(int level) { debugLevel = level; }

lib/NGT/GraphReconstructor.h

Lines changed: 70 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ class GraphReconstructor {
9898
std::cerr << id << ":" << rank << ":" << node[rank - 1].id << ":" << node[rank].id << std::endl;
9999
}
100100
NGT::GraphNode &tn = *outGraph.getNode(id);
101-
//////////////////
102101
volatile bool found = false;
103102
if (rank < 1000) {
104103
for (size_t tni = 0; tni < tn.size() && !found; tni++) {
@@ -163,23 +162,53 @@ class GraphReconstructor {
163162
adjustPathsEffectively(outGraph);
164163
}
165164

165+
static bool edgeComp(NGT::ObjectDistance a, NGT::ObjectDistance b) {
166+
return a.id < b.id;
167+
}
168+
169+
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
170+
static void insert(NGT::GraphNode &node, size_t edgeID, NGT::Distance edgeDistance, NGT::GraphIndex &graph) {
171+
NGT::ObjectDistance edge(edgeID, edgeDistance);
172+
GraphNode::iterator ni = std::lower_bound(node.begin(graph.repository.allocator), node.end(graph.repository.allocator), edge, edgeComp);
173+
node.insert(ni, edge, graph.repository.allocator);
174+
}
175+
176+
static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID)
177+
{
178+
NGT::GraphNode &srcNode = *graph.getNode(srcNodeID);
179+
GraphNode::iterator ni = std::lower_bound(srcNode.begin(graph.repository.allocator), srcNode.end(graph.repository.allocator), ObjectDistance(dstNodeID, 0.0), edgeComp);
180+
return (ni != srcNode.end(graph.repository.allocator)) && ((*ni).id == dstNodeID);
181+
}
182+
#else
183+
static void insert(NGT::GraphNode &node, size_t edgeID, NGT::Distance edgeDistance) {
184+
NGT::ObjectDistance edge(edgeID, edgeDistance);
185+
GraphNode::iterator ni = std::lower_bound(node.begin(), node.end(), edge, edgeComp);
186+
node.insert(ni, edge);
187+
}
188+
189+
static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID)
190+
{
191+
NGT::GraphNode &srcNode = *graph.getNode(srcNodeID);
192+
GraphNode::iterator ni = std::lower_bound(srcNode.begin(), srcNode.end(), ObjectDistance(dstNodeID, 0.0), edgeComp);
193+
return (ni != srcNode.end()) && ((*ni).id == dstNodeID);
194+
}
195+
#endif
196+
197+
166198
static void
167199
adjustPathsEffectively(NGT::GraphIndex &outGraph)
168200
{
169201
Timer timer;
170202
timer.start();
171-
size_t rStartRank = 0;
172-
std::vector<std::pair<size_t, NGT::GraphNode> > tmpGraph;
203+
std::vector<NGT::GraphNode> tmpGraph;
173204
for (size_t id = 1; id < outGraph.repository.size(); id++) {
174205
NGT::GraphNode &node = *outGraph.getNode(id);
175-
tmpGraph.push_back(std::pair<size_t, NGT::GraphNode>(id, node));
176-
if (node.size() > rStartRank) {
206+
tmpGraph.push_back(node);
177207
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
178-
node.resize(rStartRank, outGraph.repository.allocator);
208+
node.clear(outGraph.repository.allocator);
179209
#else
180-
node.resize(rStartRank);
210+
node.clear();
181211
#endif
182-
}
183212
}
184213
timer.stop();
185214
std::cerr << "GraphReconstructor::adjustPaths: graph preparing time=" << timer << std::endl;
@@ -193,9 +222,9 @@ class GraphReconstructor {
193222
#endif
194223
for (size_t idx = 0; idx < tmpGraph.size(); ++idx) {
195224
auto it = tmpGraph.begin() + idx;
196-
size_t id = (*it).first;
225+
size_t id = idx + 1;
197226
try {
198-
NGT::GraphNode &srcNode = (*it).second;
227+
NGT::GraphNode &srcNode = *it;
199228
std::unordered_map<uint32_t, std::pair<size_t, double> > neighbors;
200229
for (size_t sni = 0; sni < srcNode.size(); ++sni) {
201230
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
@@ -208,11 +237,9 @@ class GraphReconstructor {
208237
std::vector<std::pair<int, std::pair<uint32_t, uint32_t> > > candidates;
209238
for (size_t sni = 0; sni < srcNode.size(); sni++) {
210239
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
211-
assert(srcNode.at(sni, outGraph.repository.allocator).id == tmpGraph[srcNode.at(sni, outGraph.repository.allocator).id - 1].first);
212-
NGT::GraphNode &pathNode = tmpGraph[srcNode.at(sni, outGraph.repository.allocator).id - 1].second;
240+
NGT::GraphNode &pathNode = tmpGraph[srcNode.at(sni, outGraph.repository.allocator).id - 1];
213241
#else
214-
assert(srcNode[sni].id == tmpGraph[srcNode[sni].id - 1].first);
215-
NGT::GraphNode &pathNode = tmpGraph[srcNode[sni].id - 1].second;
242+
NGT::GraphNode &pathNode = tmpGraph[srcNode[sni].id - 1];
216243
#endif
217244
for (size_t pni = 0; pni < pathNode.size(); pni++) {
218245
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
@@ -242,8 +269,9 @@ class GraphReconstructor {
242269
}
243270
}
244271
sort(candidates.begin(), candidates.end(), std::greater<std::pair<int, std::pair<uint32_t, uint32_t>>>());
272+
removeCandidates[id - 1].reserve(candidates.size());
245273
for (size_t i = 0; i < candidates.size(); i++) {
246-
removeCandidates[idx].push_back(candidates[i].second);
274+
removeCandidates[id - 1].push_back(candidates[i].second);
247275
}
248276
} catch(NGT::Exception &err) {
249277
std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl;
@@ -256,25 +284,27 @@ class GraphReconstructor {
256284
timer.start();
257285

258286
std::list<size_t> ids;
259-
for (auto it = tmpGraph.begin(); it != tmpGraph.end(); ++it) {
260-
size_t id = (*it).first;
261-
ids.push_back(id);
287+
for (size_t idx = 0; idx < tmpGraph.size(); ++idx) {
288+
ids.push_back(idx + 1);
262289
}
263290

264291
int removeCount = 0;
265292
removeCandidateCount = 0;
266-
std::vector<std::unordered_set<uint32_t> > edges(tmpGraph.size());
267293
for (size_t rank = 0; ids.size() != 0; rank++) {
268294
for (auto it = ids.begin(); it != ids.end(); ) {
269295
size_t id = *it;
270296
size_t idx = id - 1;
271297
try {
272-
NGT::GraphNode &srcNode = tmpGraph[idx].second;
298+
NGT::GraphNode &srcNode = tmpGraph[idx];
273299
if (rank >= srcNode.size()) {
274300
if (!removeCandidates[idx].empty()) {
275301
std::cerr << "Something wrong! ID=" << id << " # of remaining candidates=" << removeCandidates[idx].size() << std::endl;
276302
abort();
277303
}
304+
#if !defined(NGT_SHARED_MEMORY_ALLOCATOR)
305+
NGT::GraphNode empty;
306+
tmpGraph[idx] = empty;
307+
#endif
278308
it = ids.erase(it);
279309
continue;
280310
}
@@ -289,14 +319,22 @@ class GraphReconstructor {
289319
size_t path = removeCandidates[idx].back().first;
290320
size_t dst = removeCandidates[idx].back().second;
291321
removeCandidates[idx].pop_back();
292-
if ((edges[idx].find(path) != edges[idx].end()) && (edges[path - 1].find(dst) != edges[path - 1].end())) {
322+
if (removeCandidates[idx].empty()) {
323+
std::vector<std::pair<uint32_t, uint32_t>> empty;
324+
removeCandidates[idx] = empty;
325+
}
326+
if ((hasEdge(outGraph, id, path)) && (hasEdge(outGraph, path, dst))) {
293327
pathExist = true;
294328
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
295329
while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode.at(rank, outGraph.repository.allocator).id)) {
296330
#else
297331
while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode[rank].id)) {
298332
#endif
299333
removeCandidates[idx].pop_back();
334+
if (removeCandidates[idx].empty()) {
335+
std::vector<std::pair<uint32_t, uint32_t>> empty;
336+
removeCandidates[idx] = empty;
337+
}
300338
}
301339
break;
302340
}
@@ -307,21 +345,11 @@ class GraphReconstructor {
307345
continue;
308346
}
309347
}
310-
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
311-
edges[idx].insert(srcNode.at(rank, outGraph.repository.allocator).id);
312-
#else
313-
edges[idx].insert(srcNode[rank].id);
314-
#endif
315348
NGT::GraphNode &outSrcNode = *outGraph.getNode(id);
316349
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
317-
outSrcNode.push_back(NGT::ObjectDistance(srcNode.at(rank, outGraph.repository.allocator).id, srcNode.at(rank, outGraph.repository.allocator).distance), outGraph.repository.allocator);
350+
insert(outSrcNode, srcNode.at(rank, outGraph.repository.allocator).id, srcNode.at(rank, outGraph.repository.allocator).distance, outGraph);
318351
#else
319-
size_t r = outSrcNode.capacity();
320-
size_t s = outSrcNode.size();
321-
outSrcNode.push_back(NGT::ObjectDistance(srcNode[rank].id, srcNode[rank].distance));
322-
if (r != outSrcNode.capacity()) {
323-
std::cerr << id << "-" << rank << " " << s << ":" << r << ":" << outSrcNode.capacity() << std::endl;
324-
}
352+
insert(outSrcNode, srcNode[rank].id, srcNode[rank].distance);
325353
#endif
326354
} catch(NGT::Exception &err) {
327355
std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl;
@@ -331,8 +359,17 @@ class GraphReconstructor {
331359
it++;
332360
}
333361
}
362+
for (size_t id = 1; id < outGraph.repository.size(); id++) {
363+
NGT::GraphNode &node = *outGraph.getNode(id);
364+
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
365+
std::sort(node.begin(outGraph.repository.allocator), node.end(outGraph.repository.allocator));
366+
#else
367+
std::sort(node.begin(), node.end());
368+
#endif
369+
}
334370
}
335371

372+
336373
static
337374
void convertToANNG(std::vector<NGT::ObjectDistances> &graph)
338375
{

0 commit comments

Comments
 (0)