Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery #635

Merged
merged 5 commits into from Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -156,20 +156,9 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);

if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {

SortField sortField = indexSort.getSort()[0];
DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
return fallbackWeight.scorer(context);
}
Expand All @@ -180,9 +169,36 @@ public boolean isCacheable(LeafReaderContext ctx) {
// if the fallback query is cacheable.
return fallbackWeight.isCacheable(ctx);
}

@Override
public int count(LeafReaderContext context) throws IOException {
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return disi.lastDoc - disi.firstDoc;
}
return fallbackWeight.count(context);
}
};
}

private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {

SortField sortField = indexSort.getSort()[0];
return getDocIdSetIterator(sortField, context, numericValues);
}
}
return null;
}

/**
* Computes the document IDs that lie within the range [lowerValue, upperValue] by performing
* binary search on the field's doc values.
Expand All @@ -195,7 +211,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
private DocIdSetIterator getDocIdSetIterator(
private BoundedDocSetIdIterator getDocIdSetIterator(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this a typo?
BoundedDocSetIdIterator → BoundedDocIdSetIterator

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'm pretty sure it is. If you open a PR to rename, I'll merge it.

SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
throws IOException {
long lower = sortField.getReverse() ? upperValue : lowerValue;
Expand Down
Expand Up @@ -38,6 +38,7 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollectorManager;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
Expand Down Expand Up @@ -95,7 +96,7 @@ public void testSameHitsAsPointRangeQuery() throws IOException {
}
}

private void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
private static void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
Expand Down Expand Up @@ -167,43 +168,50 @@ public void testIndexSortDocValuesWithEvenLength(boolean reverse) throws Excepti
IndexSearcher searcher = newSearcher(reader);

// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);

assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 5, 5)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);

// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);

// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);

// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(3, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 3);

// Test ranges that span all documents.
assertEquals(6, searcher.count(createQuery("field", -80, 35)));
assertEquals(6, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 6);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 6);

writer.close();
reader.close();
dir.close();
}

private static void assertNumberOfHits(IndexSearcher searcher, Query query, int numberOfHits)
throws IOException {
assertEquals(
numberOfHits, searcher.search(query, new TotalHitCountCollectorManager()).intValue());
assertEquals(numberOfHits, searcher.count(query));
}

public void testIndexSortDocValuesWithOddLength() throws Exception {
testIndexSortDocValuesWithOddLength(false);
testIndexSortDocValuesWithOddLength(true);
Expand All @@ -229,38 +237,38 @@ public void testIndexSortDocValuesWithOddLength(boolean reverse) throws Exceptio
IndexSearcher searcher = newSearcher(reader);

// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 5, 5)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 1);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);

assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 6, 6)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 6, 6), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);

// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);

// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);

// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(4, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 4);

// Test ranges that span all documents.
assertEquals(7, searcher.count(createQuery("field", -80, 35)));
assertEquals(7, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 7);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 7);

writer.close();
reader.close();
Expand All @@ -285,10 +293,10 @@ private void testIndexSortDocValuesWithSingleValue(boolean reverse) throws IOExc
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

assertEquals(1, searcher.count(createQuery("field", 42, 43)));
assertEquals(1, searcher.count(createQuery("field", 42, 42)));
assertEquals(0, searcher.count(createQuery("field", 41, 41)));
assertEquals(0, searcher.count(createQuery("field", 43, 43)));
assertNumberOfHits(searcher, createQuery("field", 42, 43), 1);
assertNumberOfHits(searcher, createQuery("field", 42, 42), 1);
assertNumberOfHits(searcher, createQuery("field", 41, 41), 0);
assertNumberOfHits(searcher, createQuery("field", 43, 43), 0);

writer.close();
reader.close();
Expand Down Expand Up @@ -316,11 +324,11 @@ public void testIndexSortMissingValues() throws Exception {
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

assertEquals(2, searcher.count(createQuery("field", -70, 0)));
assertEquals(2, searcher.count(createQuery("field", -2, 35)));
assertNumberOfHits(searcher, createQuery("field", -70, 0), 2);
assertNumberOfHits(searcher, createQuery("field", -2, 35), 2);

assertEquals(4, searcher.count(createQuery("field", -80, 35)));
assertEquals(4, searcher.count(createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 4);
assertNumberOfHits(searcher, createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE), 4);

writer.close();
reader.close();
Expand Down Expand Up @@ -450,6 +458,56 @@ public void testIndexSortOptimizationDeactivated(RandomIndexWriter writer) throw
reader.close();
}

public void testCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertNotEquals(-1, weight.count(context));
}

writer.close();
reader.close();
dir.close();
}

public void testFallbackCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

// we use an unrealistic query that exposes its own Weight#count
Query fallbackQuery = new MatchNoDocsQuery();
// the index is not sorted on this field, the fallback query is used
Query query = new IndexSortSortedNumericDocValuesRangeQuery("another", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertNotEquals(-1, weight.count(context));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, thanks for adding a test for this!
Minor: Would be good to actually check the fallback weight count here, and in general, have a different assertion here than the one in testCount()..
Maybe, assertEquals(0, weight.count(context)); here, and assertEquals(1, weight.count(context)); in testCount() ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're right. I was initially hesitant on this because I was maybe planning to index more documents, then I could end up with more segments hence asserting on exact count could get more complicated. But if we keep a single doc we should be good and it's good to have a more precise check that also differs for the two scenarios. Thanks for your suggestion!

}

writer.close();
reader.close();
dir.close();
}

private Document createDocument(String field, long value) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(field, value));
Expand Down