Skip to content

Commit

Permalink
Refactor bounded geogrid aggregations (#86120)
Browse files Browse the repository at this point in the history
This commit refactors the implementation of bounded GeoGrid aggregation into a GeoHashBoundedPredicate and a GeoTileBoundedPredicate to compute bin validity.
  • Loading branch information
iverase committed Apr 27, 2022
1 parent 42e8bd8 commit 1d9c5d7
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 179 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.elasticsearch.common.geo.GeoBoundingBox;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.Geohash;

/**
* Filters out geohashes using the provided bounds at the provided precision.
*/
public class GeoHashBoundedPredicate {

private final boolean crossesDateline;
private final long maxHashes;
private final GeoBoundingBox bbox;

public GeoHashBoundedPredicate(int precision, GeoBoundingBox bbox) {
this.crossesDateline = bbox.right() < bbox.left();
this.bbox = bbox;
final long hashesY = (long) Math.ceil(((bbox.top() - bbox.bottom()) / Geohash.latHeightInDegrees(precision)) + 1);
final long hashesX;
if (crossesDateline) {
final long hashesLeft = (long) Math.ceil(((180 - bbox.left()) / Geohash.lonWidthInDegrees(precision)) + 1);
final long hashesRight = (long) Math.ceil(((bbox.right() + 180) / Geohash.lonWidthInDegrees(precision)) + 1);
hashesX = hashesLeft + hashesRight;
} else {
hashesX = (long) Math.ceil(((bbox.right() - bbox.left()) / Geohash.lonWidthInDegrees(precision)) + 1);
}
this.maxHashes = hashesX * hashesY;
}

/** Check if the provided geohash intersects with the provided bounds. */
public boolean validHash(String geohash) {
final Rectangle rect = Geohash.toBoundingBox(geohash);
// hashes should not cross in theory the dateline but due to precision
// errors and normalization computing the hash, it might happen that they actually
// cross the dateline.
if (rect.getMaxX() < rect.getMinX()) {
return intersects(-180, rect.getMaxX(), rect.getMinY(), rect.getMaxY())
|| intersects(rect.getMinX(), 180, rect.getMinY(), rect.getMaxY());
} else {
return intersects(rect.getMinX(), rect.getMaxX(), rect.getMinY(), rect.getMaxY());
}
}

private boolean intersects(double minX, double maxX, double minY, double maxY) {
// touching hashes are excluded
if (bbox.top() > minY && bbox.bottom() < maxY) {
if (crossesDateline) {
return bbox.left() < maxX || bbox.right() > minX;
} else {
return bbox.left() < maxX && bbox.right() > minX;
}
}
return false;
}

/**
* upper bounds on count of geohashes intersecting this bounds at the precision provided in the constructor.
*/
public long getMaxHashes() {
return maxHashes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.common.geo.GeoBoundingBox;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.Geohash;
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
Expand Down Expand Up @@ -73,50 +72,25 @@ protected int advanceValue(org.elasticsearch.common.geo.GeoPoint target, int val

private static class BoundedCellValues extends CellValues {

private final GeoHashBoundedPredicate predicate;
private final GeoBoundingBox bbox;
private final boolean crossesDateline;

BoundedCellValues(MultiGeoPointValues geoValues, int precision, GeoBoundingBox bbox) {
super(geoValues, precision);
this.predicate = new GeoHashBoundedPredicate(precision, bbox);
this.bbox = bbox;
this.crossesDateline = bbox.right() < bbox.left();
}

@Override
protected int advanceValue(org.elasticsearch.common.geo.GeoPoint target, int valuesIdx) {
final String hash = Geohash.stringEncode(target.getLon(), target.getLat(), precision);
if (validPoint(target.getLon(), target.getLat()) || validHash(hash)) {
if (validPoint(target.getLon(), target.getLat()) || predicate.validHash(hash)) {
values[valuesIdx] = Geohash.longEncode(hash);
return valuesIdx + 1;
}
return valuesIdx;
}

private boolean validHash(String hash) {
final Rectangle rect = Geohash.toBoundingBox(hash);
// hashes should not cross in theory the dateline but due to precision
// errors and normalization computing the hash, it might happen that they actually
// crosses the dateline.
if (rect.getMaxX() < rect.getMinX()) {
return intersects(-180, rect.getMaxX(), rect.getMinY(), rect.getMaxY())
|| intersects(rect.getMinX(), 180, rect.getMinY(), rect.getMaxY());
} else {
return intersects(rect.getMinX(), rect.getMaxX(), rect.getMinY(), rect.getMaxY());
}
}

private boolean intersects(double minX, double maxX, double minY, double maxY) {
// touching hashes are excluded
if (bbox.top() > minY && bbox.bottom() < maxY) {
if (crossesDateline) {
return bbox.left() < maxX || bbox.right() > minX;
} else {
return bbox.left() < maxX && bbox.right() > minX;
}
}
return false;
}

private boolean validPoint(double x, double y) {
if (bbox.top() > y && bbox.bottom() < y) {
boolean crossesDateline = bbox.left() > bbox.right();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.elasticsearch.common.geo.GeoBoundingBox;
import org.elasticsearch.geometry.Rectangle;

/**
* Filters out tiles using the provided bounds at the provided precision. In order to be efficient it works on the X/Y coordinates of the
* geotile scheme.
*/
public class GeoTileBoundedPredicate {

private final boolean crossesDateline;
private final long maxTiles;
private final int precision, leftX, rightX, minY, maxY;

public GeoTileBoundedPredicate(int precision, GeoBoundingBox bbox) {
this.crossesDateline = bbox.right() < bbox.left();
this.precision = precision;
if (bbox.bottom() > GeoTileUtils.NORMALIZED_LATITUDE_MASK || bbox.top() < GeoTileUtils.NORMALIZED_NEGATIVE_LATITUDE_MASK) {
// this makes validTile() always return false
leftX = rightX = minY = maxY = -1;
maxTiles = 0;
} else {
final long tiles = 1L << precision;
// compute minX, minY
final int minX = GeoTileUtils.getXTile(bbox.left(), tiles);
final int minY = GeoTileUtils.getYTile(bbox.top(), tiles);
final Rectangle minTile = GeoTileUtils.toBoundingBox(minX, minY, precision);
// touching tiles are excluded, they need to share at least one interior point
this.leftX = minTile.getMaxX() == bbox.left() ? minX + 1 : minX;
this.minY = minTile.getMinY() == bbox.top() ? minY + 1 : minY;
// compute maxX, maxY
final int maxX = GeoTileUtils.getXTile(bbox.right(), tiles);
final int maxY = GeoTileUtils.getYTile(bbox.bottom(), tiles);
final Rectangle maxTile = GeoTileUtils.toBoundingBox(maxX, maxY, precision);
// touching tiles are excluded, they need to share at least one interior point
this.rightX = maxTile.getMinX() == bbox.right() ? maxX : maxX + 1;
this.maxY = maxTile.getMaxY() == bbox.bottom() ? maxY : maxY + 1;
if (crossesDateline) {
this.maxTiles = (tiles + this.rightX - this.leftX) * (this.maxY - this.minY);
} else {
this.maxTiles = (long) (this.rightX - this.leftX) * (this.maxY - this.minY);
}
}
}

/** Does the provided bounds crosses the dateline */
public boolean crossesDateline() {
return crossesDateline;
}

/** The left bound on geotile coordinates */
public int leftX() {
return leftX;
}

/** The right bound on geotile coordinates */
public int rightX() {
return rightX;
}

/** The bottom bound on geotile coordinates */
public int minY() {
return minY;
}

/** The top bound on geotile coordinates */
public int maxY() {
return maxY;
}

/** Check if the provided tile at the provided level intersects with the provided bounds. The provided precision must be
* lower or equal to the precision provided in the constructor.
*/
public boolean validTile(int x, int y, int precision) {
assert this.precision >= precision : "input precision bigger than this predicate precision";
// compute number of splits at precision
final int splits = 1 << this.precision - precision;
final int yMin = y * splits;
if (maxY > yMin && minY < yMin + splits) {
final int xMin = x * splits;
if (crossesDateline) {
return rightX > xMin || leftX < xMin + splits;
} else {
return rightX > xMin && leftX < xMin + splits;
}
}
return false;
}

/**
* Total number of tiles intersecting this bounds at the precision provided in the constructor.
*/
public long getMaxTiles() {
return maxTiles;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.common.geo.GeoBoundingBox;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
Expand Down Expand Up @@ -72,50 +71,24 @@ protected int advanceValue(org.elasticsearch.common.geo.GeoPoint target, int val

private static class BoundedCellValues extends CellValues {

private final boolean crossesDateline;
private final long tiles;
private final int minX, maxX, minY, maxY;
private final GeoTileBoundedPredicate predicate;

protected BoundedCellValues(MultiGeoPointValues geoValues, int precision, GeoBoundingBox bbox) {
super(geoValues, precision);
this.crossesDateline = bbox.right() < bbox.left();
this.predicate = new GeoTileBoundedPredicate(precision, bbox);
this.tiles = 1L << precision;
// compute minX, minY
final int minX = GeoTileUtils.getXTile(bbox.left(), this.tiles);
final int minY = GeoTileUtils.getYTile(bbox.top(), this.tiles);
final Rectangle minTile = GeoTileUtils.toBoundingBox(minX, minY, precision);
// touching tiles are excluded, they need to share at least one interior point
this.minX = minTile.getMaxX() == bbox.left() ? minX + 1 : minX;
this.minY = minTile.getMinY() == bbox.top() ? minY + 1 : minY;
// compute maxX, maxY
final int maxX = GeoTileUtils.getXTile(bbox.right(), this.tiles);
final int maxY = GeoTileUtils.getYTile(bbox.bottom(), this.tiles);
final Rectangle maxTile = GeoTileUtils.toBoundingBox(maxX, maxY, precision);
// touching tiles are excluded, they need to share at least one interior point
this.maxX = maxTile.getMinX() == bbox.right() ? maxX - 1 : maxX;
this.maxY = maxTile.getMaxY() == bbox.bottom() ? maxY - 1 : maxY;
}

@Override
protected int advanceValue(org.elasticsearch.common.geo.GeoPoint target, int valuesIdx) {
final int x = GeoTileUtils.getXTile(target.getLon(), this.tiles);
final int y = GeoTileUtils.getYTile(target.getLat(), this.tiles);
if (validTile(x, y)) {
if (predicate.validTile(x, y, precision)) {
values[valuesIdx] = GeoTileUtils.longEncodeTiles(precision, x, y);
return valuesIdx + 1;
}
return valuesIdx;
}

private boolean validTile(int x, int y) {
if (maxY >= y && minY <= y) {
if (crossesDateline) {
return maxX >= x || minX <= x;
} else {
return maxX >= x && minX <= x;
}
}
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.apache.lucene.tests.geo.GeoTestUtil;
import org.elasticsearch.common.geo.GeoBoundingBox;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.Geohash;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;

public class GeoHashBoundedPredicateTests extends ESTestCase {

public void testValidTile() {
int precision = 3;
String hash = "bcd";
Rectangle rectangle = Geohash.toBoundingBox(hash);
GeoBoundingBox bbox = new GeoBoundingBox(
new GeoPoint(rectangle.getMaxLat(), rectangle.getMinLon()),
new GeoPoint(rectangle.getMinLat(), rectangle.getMaxLon())
);
GeoHashBoundedPredicate predicate = new GeoHashBoundedPredicate(precision, bbox);
// the same tile should be valid
assertEquals(true, predicate.validHash(hash));
// neighbour tiles only touching should not be valid
assertEquals(false, predicate.validHash("bcc"));
assertEquals(false, predicate.validHash("bcf"));
assertEquals(false, predicate.validHash("bcg"));
assertEquals(false, predicate.validHash("bc9"));
assertEquals(false, predicate.validHash("bce"));
assertEquals(false, predicate.validHash("bc3"));
assertEquals(false, predicate.validHash("bc6"));
assertEquals(false, predicate.validHash("bc7"));
}

public void testRandomValidTile() {
int precision = randomIntBetween(1, Geohash.PRECISION);
String hash = Geohash.stringEncode(GeoTestUtil.nextLongitude(), GeoTestUtil.nextLatitude(), precision);
Rectangle rectangle = Geohash.toBoundingBox(hash);
GeoBoundingBox bbox = new GeoBoundingBox(
new GeoPoint(rectangle.getMaxLat(), rectangle.getMinLon()),
new GeoPoint(rectangle.getMinLat(), rectangle.getMaxLon())
);
GeoHashBoundedPredicate predicate = new GeoHashBoundedPredicate(precision, bbox);

assertPredicates(hash, predicate, bbox.left(), bbox.bottom());
assertPredicates(hash, predicate, bbox.left(), bbox.top());
assertPredicates(hash, predicate, bbox.right(), bbox.top());
assertPredicates(hash, predicate, bbox.right(), bbox.bottom());

for (int i = 0; i < 10000; i++) {
assertPredicates(hash, predicate, GeoTestUtil.nextLongitude(), GeoTestUtil.nextLatitude());
}
}

public void testMaxHash() {
int precision = randomIntBetween(1, Geohash.PRECISION);
String hash = Geohash.stringEncode(GeoTestUtil.nextLongitude(), GeoTestUtil.nextLatitude(), precision);
Rectangle rectangle = Geohash.toBoundingBox(hash);
GeoBoundingBox bbox = new GeoBoundingBox(
new GeoPoint(rectangle.getMaxLat(), rectangle.getMinLon()),
new GeoPoint(rectangle.getMinLat(), rectangle.getMaxLon())
);
for (int i = precision; i < Geohash.PRECISION; i++) {
GeoHashBoundedPredicate predicate = new GeoHashBoundedPredicate(i, bbox);
// not exact due to numerical errors
assertThat(predicate.getMaxHashes(), Matchers.greaterThanOrEqualTo((long) Math.pow(32, (i - precision))));
}
}

private void assertPredicates(String hash, GeoHashBoundedPredicate predicate, double lon, double lat) {
String newhash = Geohash.stringEncode(lon, lat, hash.length());
assertEquals(newhash.equals(hash), predicate.validHash(newhash));
}
}

0 comments on commit 1d9c5d7

Please sign in to comment.