Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-8620: LatLonShape contains #872

Merged
merged 17 commits into from
Dec 11, 2019
4 changes: 2 additions & 2 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ API Changes
(Jack Conradson via Adrien Grand)

New Features
---------------------
(No changes)

* LUCENE-8620: Add CONTAINS support for LatLonShape and XYShape. (Ignacio Vera)

Improvements

Expand Down
27 changes: 27 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/Component2D.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,33 @@ default PointValues.Relation relateTriangle(double aX, double aY, double bX, dou
return relateTriangle(minX, maxX, minY, maxY, aX, aY, bX, bY, cX, cY);
}

/** Used by withinTriangle to check the within relationship between a triangle and the query shape
* (e.g. if the query shape is within the triangle). */
enum WithinRelation {
/** If the shape is a candidate for within. Typically this is return if the query shape is fully inside
* the triangle or if the query shape intersects only edges that do not belong to the original shape. */
CANDIDATE,
/** The query shape intersects an edge that does belong to the original shape or any point of
* the triangle is inside the shape. */
NOTWITHIN,
/** The query shape is disjoint with the triangle. */
DISJOINT
}

/** Compute the within relation of this component2D with a triangle **/
default WithinRelation withinTriangle(double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca) {
double minY = StrictMath.min(StrictMath.min(aY, bY), cY);
double minX = StrictMath.min(StrictMath.min(aX, bX), cX);
double maxY = StrictMath.max(StrictMath.max(aY, bY), cY);
double maxX = StrictMath.max(StrictMath.max(aX, bX), cX);
return withinTriangle(minX, maxX, minY, maxY, aX, aY, ab, bX, bY, bc, cX, cY, ca);
}

/** Compute the within relation of this component2D with a triangle **/
WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca);


/** Compute whether the bounding boxes are disjoint **/
static boolean disjoint(double minX1, double maxX1, double minY1, double maxY1, double minX2, double maxX2, double minY2, double maxY2) {
return (maxY1 < minY2 || minY1 > maxY2 || maxX1 < minX2 || minX1 > maxX2);
Expand Down
9 changes: 9 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/ComponentTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ public Relation relateTriangle(double minX, double maxX, double minY, double max
return Relation.CELL_OUTSIDE_QUERY;
}

@Override
public WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca) {
if (left != null || right != null) {
throw new IllegalArgumentException("withinTriangle is not supported for shapes with more than one component");
}
return component.withinTriangle(minX, maxX, minY, maxY, aX, aY, ab, bX, bY, bc, cX, cY, ca);
}

/** Returns relation to the provided rectangle */
@Override
public Relation relate(double minX, double maxX, double minY, double maxY) {
Expand Down
58 changes: 58 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/Polygon2D.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,64 @@ public Relation relateTriangle(double minX, double maxX, double minY, double max
return relateIndexedTriangle(minX, maxX, minY, maxY, ax, ay, bx, by, cx, cy);
}

@Override
public WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double ax, double ay, boolean ab, double bx, double by, boolean bc, double cx, double cy, boolean ca) {
// short cut, lines and points cannot contain this type of shape
if ((ax == bx && ay == by) || (ax == cx && ay == cy) || (bx == cx && by == cy)) {
return WithinRelation.DISJOINT;
}

if (Component2D.disjoint(this.minX, this.maxX, this.minY, this.maxY, minX, maxX, minY, maxY)) {
return WithinRelation.DISJOINT;
}

// if any of the points is inside the polygon, the polygon cannot be within this indexed
// shape because points belong to the original indexed shape.
if (contains(ax, ay) || contains(bx, by) || contains(cx, cy)) {
return WithinRelation.NOTWITHIN;
}

WithinRelation relation = WithinRelation.DISJOINT;
// if any of the edges intersects an the edge belongs to the shape then it cannot be within.
// if it only intersects edges that do not belong to the shape, then it is a candidate
// we skip edges at the dateline to support shapes crossing it
if (tree.crossesLine(minX, maxX, minY, maxY, ax, ay, bx, by)) {
if (ab == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}

if (tree.crossesLine(minX, maxX, minY, maxY, bx, by, cx, cy)) {
if (bc == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}
if (tree.crossesLine(minX, maxX, minY, maxY, cx, cy, ax, ay)) {
if (ca == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}

// if any of the edges crosses and edge that does not belong to the shape
// then it is a candidate for within
if (relation == WithinRelation.CANDIDATE) {
return WithinRelation.CANDIDATE;
}

// Check if shape is within the triangle
if (Component2D.pointInTriangle(minX, maxX, minY, maxY, tree.x1, tree.y1, ax, ay, bx, by, cx, cy) == true) {
return WithinRelation.CANDIDATE;
}
return relation;
}

/** relates an indexed line segment (a "flat triangle") with the polygon */
private Relation relateIndexedLineSegment(double minX, double maxX, double minY, double maxY,
double a2x, double a2y, double b2x, double b2y) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@

import org.apache.lucene.document.ShapeField.QueryRelation; // javadoc
import org.apache.lucene.document.ShapeField.Triangle;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Tessellator;
import org.apache.lucene.index.PointValues; // javadoc
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;

import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
Expand Down Expand Up @@ -93,20 +96,40 @@ public static Field[] createIndexableFields(String fieldName, double lat, double

/** create a query to find all indexed geo shapes that intersect a defined bounding box **/
public static Query newBoxQuery(String field, QueryRelation queryRelation, double minLatitude, double maxLatitude, double minLongitude, double maxLongitude) {
if (queryRelation == QueryRelation.CONTAINS && minLongitude > maxLongitude) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(newBoxQuery(field, queryRelation, minLatitude, maxLatitude, minLongitude, GeoUtils.MAX_LON_INCL), BooleanClause.Occur.MUST);
builder.add(newBoxQuery(field, queryRelation, minLatitude, maxLatitude, GeoUtils.MIN_LON_INCL, maxLongitude), BooleanClause.Occur.MUST);
return builder.build();
}
return new LatLonShapeBoundingBoxQuery(field, queryRelation, minLatitude, maxLatitude, minLongitude, maxLongitude);
}

/** create a query to find all indexed geo shapes that intersect a provided linestring (or array of linestrings)
* note: does not support dateline crossing
**/
public static Query newLineQuery(String field, QueryRelation queryRelation, Line... lines) {
if (queryRelation == QueryRelation.CONTAINS && lines.length > 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int i =0; i < lines.length; i++) {
builder.add(newLineQuery(field, queryRelation, lines[i]), BooleanClause.Occur.MUST);
}
return builder.build();
}
return new LatLonShapeLineQuery(field, queryRelation, lines);
}

/** create a query to find all indexed geo shapes that intersect a provided polygon (or array of polygons)
* note: does not support dateline crossing
**/
public static Query newPolygonQuery(String field, QueryRelation queryRelation, Polygon... polygons) {
if (queryRelation == QueryRelation.CONTAINS && polygons.length > 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int i =0; i < polygons.length; i++) {
builder.add(newPolygonQuery(field, queryRelation, polygons[i]), BooleanClause.Occur.MUST);
}
return builder.build();
}
return new LatLonShapePolygonQuery(field, queryRelation, polygons);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import org.apache.lucene.document.ShapeField.QueryRelation;
import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.geo.Rectangle2D;
import org.apache.lucene.index.PointValues.Relation;
Expand Down Expand Up @@ -69,6 +70,16 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
// decode indexed triangle
ShapeField.decodeTriangle(t, scratchTriangle);

return rectangle2D.withinTriangle(scratchTriangle.aX, scratchTriangle.aY, scratchTriangle.ab,
scratchTriangle.bX, scratchTriangle.bY, scratchTriangle.bc,
scratchTriangle.cX, scratchTriangle.cY, scratchTriangle.ca);
}

@Override
public boolean equals(Object o) {
return sameClassAs(o) && equalsTo(getClass().cast(o));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public LatLonShapeLineQuery(String field, QueryRelation queryRelation, Line... l

@Override
protected Relation relateRangeBBoxToQuery(int minXOffset, int minYOffset, byte[] minTriangle,
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
double minLat = GeoEncodingUtils.decodeLatitude(NumericUtils.sortableBytesToInt(minTriangle, minYOffset));
double minLon = GeoEncodingUtils.decodeLongitude(NumericUtils.sortableBytesToInt(minTriangle, minXOffset));
double maxLat = GeoEncodingUtils.decodeLatitude(NumericUtils.sortableBytesToInt(maxTriangle, maxYOffset));
Expand Down Expand Up @@ -103,6 +103,20 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
ShapeField.decodeTriangle(t, scratchTriangle);

double alat = GeoEncodingUtils.decodeLatitude(scratchTriangle.aY);
double alon = GeoEncodingUtils.decodeLongitude(scratchTriangle.aX);
double blat = GeoEncodingUtils.decodeLatitude(scratchTriangle.bY);
double blon = GeoEncodingUtils.decodeLongitude(scratchTriangle.bX);
double clat = GeoEncodingUtils.decodeLatitude(scratchTriangle.cY);
double clon = GeoEncodingUtils.decodeLongitude(scratchTriangle.cX);

return line2D.withinTriangle(alon, alat, scratchTriangle.ab, blon, blat, scratchTriangle.bc, clon, clat, scratchTriangle.ca);
}

@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
ShapeField.decodeTriangle(t, scratchTriangle);

double alat = GeoEncodingUtils.decodeLatitude(scratchTriangle.aY);
double alon = GeoEncodingUtils.decodeLongitude(scratchTriangle.aX);
double blat = GeoEncodingUtils.decodeLatitude(scratchTriangle.bY);
double blon = GeoEncodingUtils.decodeLongitude(scratchTriangle.bX);
double clat = GeoEncodingUtils.decodeLatitude(scratchTriangle.cY);
double clon = GeoEncodingUtils.decodeLongitude(scratchTriangle.cX);

return poly2D.withinTriangle(alon, alat, scratchTriangle.ab, blon, blat, scratchTriangle.bc, clon, clat, scratchTriangle.ca);
}

@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ protected void setTriangleValue(int aX, int aY, boolean abFromShape, int bX, int

/** Query Relation Types **/
public enum QueryRelation {
INTERSECTS, WITHIN, DISJOINT
INTERSECTS, WITHIN, DISJOINT, CONTAINS
}

private static final int MINY_MINX_MAXY_MAXX_Y_X = 0;
Expand Down
72 changes: 66 additions & 6 deletions lucene/sandbox/src/java/org/apache/lucene/document/ShapeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Objects;

import org.apache.lucene.document.ShapeField.QueryRelation;
import org.apache.lucene.geo.Component2D;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
Expand Down Expand Up @@ -62,9 +63,11 @@ abstract class ShapeQuery extends Query {
/** field name */
final String field;
/** query relation
* disjoint: {@code CELL_OUTSIDE_QUERY}
* intersects: {@code CELL_CROSSES_QUERY},
* within: {@code CELL_WITHIN_QUERY} */
* disjoint: {@link QueryRelation#DISJOINT},
* intersects: {@link QueryRelation#INTERSECTS},
* within: {@link QueryRelation#DISJOINT},
* contains: {@link QueryRelation#CONTAINS}
* */
final QueryRelation queryRelation;

protected ShapeQuery(String field, final QueryRelation queryType) {
Expand All @@ -86,6 +89,9 @@ protected abstract Relation relateRangeBBoxToQuery(int minXOffset, int minYOffse
/** returns true if the provided triangle matches the query */
protected abstract boolean queryMatches(byte[] triangle, ShapeField.DecodedTriangle scratchTriangle, ShapeField.QueryRelation queryRelation);

/** Return the within relationship between the query and the indexed shape.*/
protected abstract Component2D.WithinRelation queryWithin(byte[] triangle, ShapeField.DecodedTriangle scratchTriangle);

/** relates a range of triangles (internal node) to the query */
protected Relation relateRangeToQuery(byte[] minTriangle, byte[] maxTriangle, QueryRelation queryRelation) {
// compute bounding box of internal node
Expand Down Expand Up @@ -133,11 +139,10 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti

final Weight weight = this;
final Relation rel = relateRangeToQuery(values.getMinPackedValue(), values.getMaxPackedValue(), queryRelation);
if (rel == Relation.CELL_OUTSIDE_QUERY) {
if (rel == Relation.CELL_OUTSIDE_QUERY || (rel == Relation.CELL_INSIDE_QUERY && queryRelation == QueryRelation.CONTAINS)) {
// no documents match the query
return null;
}
else if (values.getDocCount() == reader.maxDoc() && rel == Relation.CELL_INSIDE_QUERY) {
} else if (values.getDocCount() == reader.maxDoc() && rel == Relation.CELL_INSIDE_QUERY) {
// all documents match the query
return new ScorerSupplier() {
@Override
Expand All @@ -152,6 +157,7 @@ public long cost() {
};
} else {
if (queryRelation != QueryRelation.INTERSECTS
&& queryRelation != QueryRelation.CONTAINS
&& hasAnyHits(query, values) == false) {
// First we check if we have any hits so we are fast in the adversarial case where
// the shape does not match any documents and we are in the dense case
Expand Down Expand Up @@ -227,6 +233,7 @@ protected Scorer getScorer(final LeafReader reader, final Weight weight, final f
case INTERSECTS: return getSparseScorer(reader, weight, boost, scoreMode);
case WITHIN:
case DISJOINT: return getDenseScorer(reader, weight, boost, scoreMode);
case CONTAINS: return getContainsDenseScorer(reader, weight, boost, scoreMode);
default: throw new IllegalArgumentException("Unsupported query type :[" + query.getQueryRelation() + "]");
}
}
Expand Down Expand Up @@ -279,6 +286,17 @@ private Scorer getDenseScorer(LeafReader reader, Weight weight, final float boos
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}

private Scorer getContainsDenseScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException {
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
final long[] cost = new long[]{0};
// Get potential documents.
final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
result.andNot(excluded);
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}

@Override
public long cost() {
if (cost == -1) {
Expand Down Expand Up @@ -390,6 +408,48 @@ public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
};
}

/** create a visitor that adds documents that match the query using a dense bitset; used with CONTAINS */
private static IntersectVisitor getContainsDenseVisitor(final ShapeQuery query, final FixedBitSet result, final FixedBitSet excluded, final long[] cost) {
return new IntersectVisitor() {
final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();

@Override
public void visit(int docID) {
excluded.set(docID);
}

@Override
public void visit(int docID, byte[] t) {
Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
if (within == Component2D.WithinRelation.CANDIDATE) {
cost[0]++;
result.set(docID);
} else if (within == Component2D.WithinRelation.NOTWITHIN) {
excluded.set(docID);
}
}

@Override
public void visit(DocIdSetIterator iterator, byte[] t) throws IOException {
Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (within == Component2D.WithinRelation.CANDIDATE) {
cost[0]++;
result.set(docID);
} else if (within == Component2D.WithinRelation.NOTWITHIN) {
excluded.set(docID);
}
}
}

@Override
public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
return query.relateRangeToQuery(minTriangle, maxTriangle, query.getQueryRelation());
}
};
}

/** create a visitor that clears documents that do not match the polygon query using a dense bitset; used with WITHIN & DISJOINT */
private static IntersectVisitor getInverseDenseVisitor(final ShapeQuery query, final FixedBitSet result, final long[] cost) {
return new IntersectVisitor() {
Expand Down
Loading