Skip to content

Commit

Permalink
LUCENE-8443: Fix InverseIntersectVisitor logic for LatLonShape querie…
Browse files Browse the repository at this point in the history
…s, add adversarial test for same shape many times
  • Loading branch information
nknize committed Aug 3, 2018
1 parent 17a02c1 commit 2a41cbd
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 22 deletions.
Expand Up @@ -273,7 +273,7 @@ public void visit(int docID) {

@Override
public void visit(int docID, byte[] packedTriangle) {
if (queryCrossesTriangle(packedTriangle)) {
if (queryCrossesTriangle(packedTriangle) == false) {
result.clear(docID);
cost[0]--;
}
Expand All @@ -284,7 +284,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
Relation r = relateRangeToQuery(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_INSIDE_QUERY;
} else if (r == Relation.CELL_INSIDE_QUERY || r == Relation.CELL_CROSSES_QUERY) {
} else if (r == Relation.CELL_INSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
return r;
Expand Down
Expand Up @@ -39,7 +39,9 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.FutureArrays;
import org.apache.lucene.util.NumericUtils;

Expand Down Expand Up @@ -179,6 +181,39 @@ public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
};
}

/**
* Create a visitor that clears documents that do NOT match the polygon query.
*/
private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
return new IntersectVisitor() {

@Override
public void visit(int docID) {
result.clear(docID);
cost[0]--;
}

@Override
public void visit(int docID, byte[] packedTriangle) {
if (queryCrossesTriangle(packedTriangle) == false) {
result.clear(docID);
cost[0]--;
}
}

@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
Relation r = relateRangeToQuery(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_INSIDE_QUERY;
} else if (r == Relation.CELL_INSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
return r;
}
};
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
Expand All @@ -193,29 +228,64 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
return null;
}

boolean allDocsMatch = true;
if (values.getDocCount() != reader.maxDoc() ||
relateRangeToQuery(values.getMinPackedValue(), values.getMaxPackedValue()) != Relation.CELL_INSIDE_QUERY) {
allDocsMatch = false;
}

final Weight weight = this;
return new ScorerSupplier() {
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final PointValues.IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;
if (allDocsMatch) {
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
return new ConstantScoreScorer(weight, score(),
DocIdSetIterator.all(reader.maxDoc()));
}

@Override
public Scorer get(long leadCost) throws IOException {
values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), iterator);
}
@Override
public long cost() {
return reader.maxDoc();
}
};
} else {
return new ScorerSupplier() {
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;

@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
@Override
public Scorer get(long leadCost) throws IOException {
if (values.getDocCount() == reader.maxDoc()
&& values.getDocCount() == values.size()
&& cost() > reader.maxDoc() / 2) {
// If all docs have exactly one value and the cost is greater
// than half the leaf size then maybe we can make things faster
// by computing the set of documents that do NOT match the query
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.set(0, reader.maxDoc());
int[] cost = new int[]{reader.maxDoc()};
values.intersect(getInverseIntersectVisitor(result, cost));
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, score(), iterator);
}

values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), iterator);
}
return cost;
}
};

@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
}

@Override
Expand Down
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

Expand Down Expand Up @@ -110,6 +111,19 @@ protected Query newPolygonQuery(String field, Polygon... polygons) {
return LatLonShape.newPolygonQuery(field, polygons);
}

// A particularly tricky adversary for BKD tree:
public void testSameShapeManyTimes() throws Exception {
int numShapes = atLeast(1000);

// Every doc has 2 points:
Object theShape = nextShape();

Object[] shapes = new Object[numShapes];
Arrays.fill(shapes, theShape);

verify(shapes);
}

public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
doTestRandom(10);
Expand Down Expand Up @@ -398,6 +412,7 @@ public String toString() {
sb.append(lon);
sb.append(',');
sb.append(lat);
sb.append(')');
return sb.toString();
}
}
Expand Down
Expand Up @@ -64,7 +64,6 @@ public boolean testPolygonQuery(Polygon2D poly2d, Object shape) {
}
}

@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8443")
@Override
public void testRandomTiny() throws Exception {
}
Expand Down

0 comments on commit 2a41cbd

Please sign in to comment.