Skip to content

Commit

Permalink
cluster wip
Browse files Browse the repository at this point in the history
  • Loading branch information
dbaston committed Jun 14, 2020
1 parent 099e74b commit 50717e0
Show file tree
Hide file tree
Showing 3 changed files with 408 additions and 0 deletions.
91 changes: 91 additions & 0 deletions include/geos/operation/cluster/Cluster.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include <geos/geom/Geometry.h>
#include <geos/geom/GeometryFactory.h>
#include <geos/index/strtree/STRtree.h>
#include <geos/operation/cluster/UnionFind.h>

namespace geos {
namespace operation {
namespace cluster {

class Cluster {

public:
Cluster(std::function<bool(const geom::Geometry*, const geom::Geometry*)> pred,
double expansionDistance) :
m_pred(std::move(pred)),
m_expansionDistance(expansionDistance)
{}

static Cluster intersecting() {
return Cluster([](const geom::Geometry* a, const geom::Geometry* b) {
return a->intersects(b);
}, 0);
}

static Cluster intersectingEnvelopes() {
return Cluster([](const geom::Geometry* a, const geom::Geometry* b) {
return a->getEnvelopeInternal()->intersects(b->getEnvelopeInternal());
}, 0);
}

static Cluster withinDistance(double distance) {
return Cluster([distance](const geom::Geometry* a, const geom::Geometry* b) {
return a->isWithinDistance(b, distance);
}, distance);
}

std::vector<std::vector<size_t>> getComponents(geom::Geometry & g) {
index::strtree::STRtree tree;
auto n = g.getNumGeometries();

// The STRtree is currently incapable of storing values, so we need
// to create a vector of integers so that we can store pointers to
// those integers :(
std::vector<size_t> ids(n);
std::iota(ids.begin(), ids.end(), 0);

for (size_t i = 0; i < n; i++) {
const auto& gi = g.getGeometryN(i);
tree.insert(gi->getEnvelopeInternal(), &ids[n]);
}

UnionFind uf(n);
std::vector<void*> hits;
for (size_t i = 0; i < n; i++) {
hits.clear();

const auto& gi = g.getGeometryN(i);

if (m_expansionDistance > 0) {
geom::Envelope queryEnvelope(*gi->getEnvelopeInternal());
queryEnvelope.expandBy(m_expansionDistance);
tree.query(&queryEnvelope, hits);
} else {
tree.query(gi->getEnvelopeInternal(), hits);
}

for (auto& hit : hits) {
size_t j = *static_cast<size_t*>(hit);

if (i == j || uf.find(i) == uf.find(j)) {
continue;
}

const auto& gj = g.getGeometryN(j);
if (m_pred(gi, gj)) {
uf.join(i, j);
}
}
}

return uf.getClusters();
}

private:
std::function<bool(const geom::Geometry*, const geom::Geometry*)> m_pred;
double m_expansionDistance;
};

}
}
}
93 changes: 93 additions & 0 deletions include/geos/operation/cluster/UnionFind.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#include <algorithm>
#include <numeric>
#include <vector>

namespace geos {
namespace operation {
namespace cluster {

class UnionFind {

public:
explicit UnionFind(size_t n) :
clusters(n),
sizes(n),
num_clusters(n) {
std::iota(clusters.begin(), clusters.end(), 0);
std::fill(sizes.begin(), sizes.end(), 1);
}

size_t find(size_t i) {
size_t root = i;

while(clusters[root] != root) {
root = clusters[root];
}

while (i != root) {
size_t next = clusters[i];
clusters[i] = root;
i = next;
}

return i;
}

void join(size_t i, size_t j) {
auto a = find(i);
auto b = find(j);

if (a == b) {
return;
}

if ((sizes[b] > sizes[a]) || (sizes[a] == sizes[b] && b <= a)) {
std::swap(a, b);
}

clusters[a] = clusters[b];
sizes[b] += sizes[a];
sizes[a] = 0;

num_clusters--;
}

std::vector<size_t> getComponentsOrderedByCluster() {
std::vector<size_t> elems;
std::iota(elems.begin(), elems.end(), 1);

std::sort(elems.begin(), elems.end(), [this](size_t a, size_t b) {
return find(a) < find(b);
});

return elems;
}

std::vector<std::vector<size_t>> getClusters() {
auto ordered = getComponentsOrderedByCluster();

std::vector<size_t> components;
std::vector<std::vector<size_t>> clustered;
for (size_t i = 0; i < ordered.size(); i++) {
// Last geometry in the cluster?
if ((i == ordered.size() - 1) || find(ordered[i]) != find(ordered[i+1])) {
clustered.push_back(std::move(components));
components = std::vector<size_t>();
}

components.push_back(ordered[i]);
}

return clustered;
}

private:
std::vector<size_t> clusters;
std::vector<size_t> sizes;
size_t num_clusters;
};

}
}
}

Loading

0 comments on commit 50717e0

Please sign in to comment.