Skip to content

Commit

Permalink
LUCENE-8620: Add CONTAINS support for LatLonShape and XYShape (#872)
Browse files Browse the repository at this point in the history
  • Loading branch information
iverase committed Dec 11, 2019
1 parent dda88f7 commit a06a2ea
Show file tree
Hide file tree
Showing 37 changed files with 1,287 additions and 160 deletions.
4 changes: 2 additions & 2 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ API Changes
(Jack Conradson via Adrien Grand)

New Features
---------------------
(No changes)

* LUCENE-8620: Add CONTAINS support for LatLonShape and XYShape. (Ignacio Vera)

Improvements

Expand Down
27 changes: 27 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/Component2D.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,33 @@ default PointValues.Relation relateTriangle(double aX, double aY, double bX, dou
return relateTriangle(minX, maxX, minY, maxY, aX, aY, bX, bY, cX, cY);
}

/** Used by withinTriangle to check the within relationship between a triangle and the query shape
* (e.g. if the query shape is within the triangle). */
enum WithinRelation {
/** If the shape is a candidate for within. Typically this is return if the query shape is fully inside
* the triangle or if the query shape intersects only edges that do not belong to the original shape. */
CANDIDATE,
/** The query shape intersects an edge that does belong to the original shape or any point of
* the triangle is inside the shape. */
NOTWITHIN,
/** The query shape is disjoint with the triangle. */
DISJOINT
}

/** Compute the within relation of this component2D with a triangle **/
default WithinRelation withinTriangle(double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca) {
double minY = StrictMath.min(StrictMath.min(aY, bY), cY);
double minX = StrictMath.min(StrictMath.min(aX, bX), cX);
double maxY = StrictMath.max(StrictMath.max(aY, bY), cY);
double maxX = StrictMath.max(StrictMath.max(aX, bX), cX);
return withinTriangle(minX, maxX, minY, maxY, aX, aY, ab, bX, bY, bc, cX, cY, ca);
}

/** Compute the within relation of this component2D with a triangle **/
WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca);


/** Compute whether the bounding boxes are disjoint **/
static boolean disjoint(double minX1, double maxX1, double minY1, double maxY1, double minX2, double maxX2, double minY2, double maxY2) {
return (maxY1 < minY2 || minY1 > maxY2 || maxX1 < minX2 || minX1 > maxX2);
Expand Down
9 changes: 9 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/ComponentTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ public Relation relateTriangle(double minX, double maxX, double minY, double max
return Relation.CELL_OUTSIDE_QUERY;
}

@Override
public WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double aX, double aY, boolean ab, double bX, double bY, boolean bc, double cX, double cY, boolean ca) {
if (left != null || right != null) {
throw new IllegalArgumentException("withinTriangle is not supported for shapes with more than one component");
}
return component.withinTriangle(minX, maxX, minY, maxY, aX, aY, ab, bX, bY, bc, cX, cY, ca);
}

/** Returns relation to the provided rectangle */
@Override
public Relation relate(double minX, double maxX, double minY, double maxY) {
Expand Down
58 changes: 58 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/Polygon2D.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,64 @@ public Relation relateTriangle(double minX, double maxX, double minY, double max
return relateIndexedTriangle(minX, maxX, minY, maxY, ax, ay, bx, by, cx, cy);
}

@Override
public WithinRelation withinTriangle(double minX, double maxX, double minY, double maxY,
double ax, double ay, boolean ab, double bx, double by, boolean bc, double cx, double cy, boolean ca) {
// short cut, lines and points cannot contain this type of shape
if ((ax == bx && ay == by) || (ax == cx && ay == cy) || (bx == cx && by == cy)) {
return WithinRelation.DISJOINT;
}

if (Component2D.disjoint(this.minX, this.maxX, this.minY, this.maxY, minX, maxX, minY, maxY)) {
return WithinRelation.DISJOINT;
}

// if any of the points is inside the polygon, the polygon cannot be within this indexed
// shape because points belong to the original indexed shape.
if (contains(ax, ay) || contains(bx, by) || contains(cx, cy)) {
return WithinRelation.NOTWITHIN;
}

WithinRelation relation = WithinRelation.DISJOINT;
// if any of the edges intersects an the edge belongs to the shape then it cannot be within.
// if it only intersects edges that do not belong to the shape, then it is a candidate
// we skip edges at the dateline to support shapes crossing it
if (tree.crossesLine(minX, maxX, minY, maxY, ax, ay, bx, by)) {
if (ab == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}

if (tree.crossesLine(minX, maxX, minY, maxY, bx, by, cx, cy)) {
if (bc == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}
if (tree.crossesLine(minX, maxX, minY, maxY, cx, cy, ax, ay)) {
if (ca == true) {
return WithinRelation.NOTWITHIN;
} else {
relation = WithinRelation.CANDIDATE;
}
}

// if any of the edges crosses and edge that does not belong to the shape
// then it is a candidate for within
if (relation == WithinRelation.CANDIDATE) {
return WithinRelation.CANDIDATE;
}

// Check if shape is within the triangle
if (Component2D.pointInTriangle(minX, maxX, minY, maxY, tree.x1, tree.y1, ax, ay, bx, by, cx, cy) == true) {
return WithinRelation.CANDIDATE;
}
return relation;
}

/** relates an indexed line segment (a "flat triangle") with the polygon */
private Relation relateIndexedLineSegment(double minX, double maxX, double minY, double maxY,
double a2x, double a2y, double b2x, double b2y) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@

import org.apache.lucene.document.ShapeField.QueryRelation; // javadoc
import org.apache.lucene.document.ShapeField.Triangle;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Tessellator;
import org.apache.lucene.index.PointValues; // javadoc
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;

import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
Expand Down Expand Up @@ -93,20 +96,40 @@ public static Field[] createIndexableFields(String fieldName, double lat, double

/** create a query to find all indexed geo shapes that intersect a defined bounding box **/
public static Query newBoxQuery(String field, QueryRelation queryRelation, double minLatitude, double maxLatitude, double minLongitude, double maxLongitude) {
if (queryRelation == QueryRelation.CONTAINS && minLongitude > maxLongitude) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(newBoxQuery(field, queryRelation, minLatitude, maxLatitude, minLongitude, GeoUtils.MAX_LON_INCL), BooleanClause.Occur.MUST);
builder.add(newBoxQuery(field, queryRelation, minLatitude, maxLatitude, GeoUtils.MIN_LON_INCL, maxLongitude), BooleanClause.Occur.MUST);
return builder.build();
}
return new LatLonShapeBoundingBoxQuery(field, queryRelation, minLatitude, maxLatitude, minLongitude, maxLongitude);
}

/** create a query to find all indexed geo shapes that intersect a provided linestring (or array of linestrings)
* note: does not support dateline crossing
**/
public static Query newLineQuery(String field, QueryRelation queryRelation, Line... lines) {
if (queryRelation == QueryRelation.CONTAINS && lines.length > 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int i =0; i < lines.length; i++) {
builder.add(newLineQuery(field, queryRelation, lines[i]), BooleanClause.Occur.MUST);
}
return builder.build();
}
return new LatLonShapeLineQuery(field, queryRelation, lines);
}

/** create a query to find all indexed geo shapes that intersect a provided polygon (or array of polygons)
* note: does not support dateline crossing
**/
public static Query newPolygonQuery(String field, QueryRelation queryRelation, Polygon... polygons) {
if (queryRelation == QueryRelation.CONTAINS && polygons.length > 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int i =0; i < polygons.length; i++) {
builder.add(newPolygonQuery(field, queryRelation, polygons[i]), BooleanClause.Occur.MUST);
}
return builder.build();
}
return new LatLonShapePolygonQuery(field, queryRelation, polygons);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import org.apache.lucene.document.ShapeField.QueryRelation;
import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.geo.Rectangle2D;
import org.apache.lucene.index.PointValues.Relation;
Expand Down Expand Up @@ -69,6 +70,16 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
// decode indexed triangle
ShapeField.decodeTriangle(t, scratchTriangle);

return rectangle2D.withinTriangle(scratchTriangle.aX, scratchTriangle.aY, scratchTriangle.ab,
scratchTriangle.bX, scratchTriangle.bY, scratchTriangle.bc,
scratchTriangle.cX, scratchTriangle.cY, scratchTriangle.ca);
}

@Override
public boolean equals(Object o) {
return sameClassAs(o) && equalsTo(getClass().cast(o));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public LatLonShapeLineQuery(String field, QueryRelation queryRelation, Line... l

@Override
protected Relation relateRangeBBoxToQuery(int minXOffset, int minYOffset, byte[] minTriangle,
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
int maxXOffset, int maxYOffset, byte[] maxTriangle) {
double minLat = GeoEncodingUtils.decodeLatitude(NumericUtils.sortableBytesToInt(minTriangle, minYOffset));
double minLon = GeoEncodingUtils.decodeLongitude(NumericUtils.sortableBytesToInt(minTriangle, minXOffset));
double maxLat = GeoEncodingUtils.decodeLatitude(NumericUtils.sortableBytesToInt(maxTriangle, maxYOffset));
Expand Down Expand Up @@ -103,6 +103,20 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
ShapeField.decodeTriangle(t, scratchTriangle);

double alat = GeoEncodingUtils.decodeLatitude(scratchTriangle.aY);
double alon = GeoEncodingUtils.decodeLongitude(scratchTriangle.aX);
double blat = GeoEncodingUtils.decodeLatitude(scratchTriangle.bY);
double blon = GeoEncodingUtils.decodeLongitude(scratchTriangle.bX);
double clat = GeoEncodingUtils.decodeLatitude(scratchTriangle.cY);
double clon = GeoEncodingUtils.decodeLongitude(scratchTriangle.cX);

return line2D.withinTriangle(alon, alat, scratchTriangle.ab, blon, blat, scratchTriangle.bc, clon, clat, scratchTriangle.ca);
}

@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ protected boolean queryMatches(byte[] t, ShapeField.DecodedTriangle scratchTrian
}
}

@Override
protected Component2D.WithinRelation queryWithin(byte[] t, ShapeField.DecodedTriangle scratchTriangle) {
ShapeField.decodeTriangle(t, scratchTriangle);

double alat = GeoEncodingUtils.decodeLatitude(scratchTriangle.aY);
double alon = GeoEncodingUtils.decodeLongitude(scratchTriangle.aX);
double blat = GeoEncodingUtils.decodeLatitude(scratchTriangle.bY);
double blon = GeoEncodingUtils.decodeLongitude(scratchTriangle.bX);
double clat = GeoEncodingUtils.decodeLatitude(scratchTriangle.cY);
double clon = GeoEncodingUtils.decodeLongitude(scratchTriangle.cX);

return poly2D.withinTriangle(alon, alat, scratchTriangle.ab, blon, blat, scratchTriangle.bc, clon, clat, scratchTriangle.ca);
}

@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ protected void setTriangleValue(int aX, int aY, boolean abFromShape, int bX, int

/** Query Relation Types **/
public enum QueryRelation {
INTERSECTS, WITHIN, DISJOINT
INTERSECTS, WITHIN, DISJOINT, CONTAINS
}

private static final int MINY_MINX_MAXY_MAXX_Y_X = 0;
Expand Down
72 changes: 66 additions & 6 deletions lucene/sandbox/src/java/org/apache/lucene/document/ShapeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Objects;

import org.apache.lucene.document.ShapeField.QueryRelation;
import org.apache.lucene.geo.Component2D;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
Expand Down Expand Up @@ -61,9 +62,11 @@ abstract class ShapeQuery extends Query {
/** field name */
final String field;
/** query relation
* disjoint: {@code CELL_OUTSIDE_QUERY}
* intersects: {@code CELL_CROSSES_QUERY},
* within: {@code CELL_WITHIN_QUERY} */
* disjoint: {@link QueryRelation#DISJOINT},
* intersects: {@link QueryRelation#INTERSECTS},
* within: {@link QueryRelation#DISJOINT},
* contains: {@link QueryRelation#CONTAINS}
* */
final QueryRelation queryRelation;

protected ShapeQuery(String field, final QueryRelation queryType) {
Expand All @@ -85,6 +88,9 @@ protected abstract Relation relateRangeBBoxToQuery(int minXOffset, int minYOffse
/** returns true if the provided triangle matches the query */
protected abstract boolean queryMatches(byte[] triangle, ShapeField.DecodedTriangle scratchTriangle, ShapeField.QueryRelation queryRelation);

/** Return the within relationship between the query and the indexed shape.*/
protected abstract Component2D.WithinRelation queryWithin(byte[] triangle, ShapeField.DecodedTriangle scratchTriangle);

/** relates a range of triangles (internal node) to the query */
protected Relation relateRangeToQuery(byte[] minTriangle, byte[] maxTriangle, QueryRelation queryRelation) {
// compute bounding box of internal node
Expand Down Expand Up @@ -132,11 +138,10 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti

final Weight weight = this;
final Relation rel = relateRangeToQuery(values.getMinPackedValue(), values.getMaxPackedValue(), queryRelation);
if (rel == Relation.CELL_OUTSIDE_QUERY) {
if (rel == Relation.CELL_OUTSIDE_QUERY || (rel == Relation.CELL_INSIDE_QUERY && queryRelation == QueryRelation.CONTAINS)) {
// no documents match the query
return null;
}
else if (values.getDocCount() == reader.maxDoc() && rel == Relation.CELL_INSIDE_QUERY) {
} else if (values.getDocCount() == reader.maxDoc() && rel == Relation.CELL_INSIDE_QUERY) {
// all documents match the query
return new ScorerSupplier() {
@Override
Expand All @@ -151,6 +156,7 @@ public long cost() {
};
} else {
if (queryRelation != QueryRelation.INTERSECTS
&& queryRelation != QueryRelation.CONTAINS
&& hasAnyHits(query, values) == false) {
// First we check if we have any hits so we are fast in the adversarial case where
// the shape does not match any documents and we are in the dense case
Expand Down Expand Up @@ -226,6 +232,7 @@ protected Scorer getScorer(final LeafReader reader, final Weight weight, final f
case INTERSECTS: return getSparseScorer(reader, weight, boost, scoreMode);
case WITHIN:
case DISJOINT: return getDenseScorer(reader, weight, boost, scoreMode);
case CONTAINS: return getContainsDenseScorer(reader, weight, boost, scoreMode);
default: throw new IllegalArgumentException("Unsupported query type :[" + query.getQueryRelation() + "]");
}
}
Expand Down Expand Up @@ -278,6 +285,17 @@ private Scorer getDenseScorer(LeafReader reader, Weight weight, final float boos
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}

private Scorer getContainsDenseScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException {
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
final long[] cost = new long[]{0};
// Get potential documents.
final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
result.andNot(excluded);
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}

@Override
public long cost() {
if (cost == -1) {
Expand Down Expand Up @@ -389,6 +407,48 @@ public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
};
}

/** create a visitor that adds documents that match the query using a dense bitset; used with CONTAINS */
private static IntersectVisitor getContainsDenseVisitor(final ShapeQuery query, final FixedBitSet result, final FixedBitSet excluded, final long[] cost) {
return new IntersectVisitor() {
final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();

@Override
public void visit(int docID) {
excluded.set(docID);
}

@Override
public void visit(int docID, byte[] t) {
Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
if (within == Component2D.WithinRelation.CANDIDATE) {
cost[0]++;
result.set(docID);
} else if (within == Component2D.WithinRelation.NOTWITHIN) {
excluded.set(docID);
}
}

@Override
public void visit(DocIdSetIterator iterator, byte[] t) throws IOException {
Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (within == Component2D.WithinRelation.CANDIDATE) {
cost[0]++;
result.set(docID);
} else if (within == Component2D.WithinRelation.NOTWITHIN) {
excluded.set(docID);
}
}
}

@Override
public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
return query.relateRangeToQuery(minTriangle, maxTriangle, query.getQueryRelation());
}
};
}

/** create a visitor that clears documents that do not match the polygon query using a dense bitset; used with WITHIN & DISJOINT */
private static IntersectVisitor getInverseDenseVisitor(final ShapeQuery query, final FixedBitSet result, final long[] cost) {
return new IntersectVisitor() {
Expand Down
Loading

0 comments on commit a06a2ea

Please sign in to comment.