Skip to content

Commit

Permalink
LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValu…
Browse files Browse the repository at this point in the history
…esRangeQuery (#635)

IndexSortSortedNumericDocValuesRangeQuery can implement its count method and coompute count through a binary search, the same binary search that is used to execute the query itself, whenever all the required conditions are met.
  • Loading branch information
javanna committed Feb 3, 2022
1 parent ee7a8d6 commit bade484
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 69 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Expand Up @@ -128,6 +128,9 @@ New Features
based on TotalHitCountCollector that allows users to parallelize counting the
number of hits. (Luca Cavanna, Adrien Grand)

* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
to speed up computing the number of hits when possible. (Luca Cavanna, Adrien Grand)

Improvements
---------------------

Expand Down
Expand Up @@ -156,20 +156,9 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);

if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {

SortField sortField = indexSort.getSort()[0];
DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
return fallbackWeight.scorer(context);
}
Expand All @@ -180,9 +169,36 @@ public boolean isCacheable(LeafReaderContext ctx) {
// if the fallback query is cacheable.
return fallbackWeight.isCacheable(ctx);
}

@Override
public int count(LeafReaderContext context) throws IOException {
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return disi.lastDoc - disi.firstDoc;
}
return fallbackWeight.count(context);
}
};
}

private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {

SortField sortField = indexSort.getSort()[0];
return getDocIdSetIterator(sortField, context, numericValues);
}
}
return null;
}

/**
* Computes the document IDs that lie within the range [lowerValue, upperValue] by performing
* binary search on the field's doc values.
Expand All @@ -195,7 +211,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
private DocIdSetIterator getDocIdSetIterator(
private BoundedDocSetIdIterator getDocIdSetIterator(
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
throws IOException {
long lower = sortField.getReverse() ? upperValue : lowerValue;
Expand Down
Expand Up @@ -38,6 +38,7 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollectorManager;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
Expand Down Expand Up @@ -95,7 +96,7 @@ public void testSameHitsAsPointRangeQuery() throws IOException {
}
}

private void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
private static void assertSameHits(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
Expand Down Expand Up @@ -167,43 +168,50 @@ public void testIndexSortDocValuesWithEvenLength(boolean reverse) throws Excepti
IndexSearcher searcher = newSearcher(reader);

// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);

assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 5, 5)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);

// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);

// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);

// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(3, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 3);

// Test ranges that span all documents.
assertEquals(6, searcher.count(createQuery("field", -80, 35)));
assertEquals(6, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 6);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 6);

writer.close();
reader.close();
dir.close();
}

private static void assertNumberOfHits(IndexSearcher searcher, Query query, int numberOfHits)
throws IOException {
assertEquals(
numberOfHits, searcher.search(query, new TotalHitCountCollectorManager()).intValue());
assertEquals(numberOfHits, searcher.count(query));
}

public void testIndexSortDocValuesWithOddLength() throws Exception {
testIndexSortDocValuesWithOddLength(false);
testIndexSortDocValuesWithOddLength(true);
Expand All @@ -229,38 +237,38 @@ public void testIndexSortDocValuesWithOddLength(boolean reverse) throws Exceptio
IndexSearcher searcher = newSearcher(reader);

// Test ranges consisting of one value.
assertEquals(1, searcher.count(createQuery("field", -80, -80)));
assertEquals(1, searcher.count(createQuery("field", -5, -5)));
assertEquals(2, searcher.count(createQuery("field", 0, 0)));
assertEquals(1, searcher.count(createQuery("field", 5, 5)));
assertEquals(1, searcher.count(createQuery("field", 30, 30)));
assertEquals(1, searcher.count(createQuery("field", 35, 35)));
assertNumberOfHits(searcher, createQuery("field", -80, -80), 1);
assertNumberOfHits(searcher, createQuery("field", -5, -5), 1);
assertNumberOfHits(searcher, createQuery("field", 0, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 5, 5), 1);
assertNumberOfHits(searcher, createQuery("field", 30, 30), 1);
assertNumberOfHits(searcher, createQuery("field", 35, 35), 1);

assertEquals(0, searcher.count(createQuery("field", -90, -90)));
assertEquals(0, searcher.count(createQuery("field", 6, 6)));
assertEquals(0, searcher.count(createQuery("field", 40, 40)));
assertNumberOfHits(searcher, createQuery("field", -90, -90), 0);
assertNumberOfHits(searcher, createQuery("field", 6, 6), 0);
assertNumberOfHits(searcher, createQuery("field", 40, 40), 0);

// Test the lower end of the document value range.
assertEquals(2, searcher.count(createQuery("field", -90, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -4)));
assertEquals(1, searcher.count(createQuery("field", -70, -4)));
assertEquals(2, searcher.count(createQuery("field", -80, -5)));
assertNumberOfHits(searcher, createQuery("field", -90, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -80, -4), 2);
assertNumberOfHits(searcher, createQuery("field", -70, -4), 1);
assertNumberOfHits(searcher, createQuery("field", -80, -5), 2);

// Test the upper end of the document value range.
assertEquals(1, searcher.count(createQuery("field", 25, 34)));
assertEquals(2, searcher.count(createQuery("field", 25, 35)));
assertEquals(2, searcher.count(createQuery("field", 25, 36)));
assertEquals(2, searcher.count(createQuery("field", 30, 35)));
assertNumberOfHits(searcher, createQuery("field", 25, 34), 1);
assertNumberOfHits(searcher, createQuery("field", 25, 35), 2);
assertNumberOfHits(searcher, createQuery("field", 25, 36), 2);
assertNumberOfHits(searcher, createQuery("field", 30, 35), 2);

// Test multiple occurrences of the same value.
assertEquals(2, searcher.count(createQuery("field", -4, 4)));
assertEquals(2, searcher.count(createQuery("field", -4, 0)));
assertEquals(2, searcher.count(createQuery("field", 0, 4)));
assertEquals(4, searcher.count(createQuery("field", 0, 30)));
assertNumberOfHits(searcher, createQuery("field", -4, 4), 2);
assertNumberOfHits(searcher, createQuery("field", -4, 0), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 4), 2);
assertNumberOfHits(searcher, createQuery("field", 0, 30), 4);

// Test ranges that span all documents.
assertEquals(7, searcher.count(createQuery("field", -80, 35)));
assertEquals(7, searcher.count(createQuery("field", -90, 40)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 7);
assertNumberOfHits(searcher, createQuery("field", -90, 40), 7);

writer.close();
reader.close();
Expand All @@ -285,10 +293,10 @@ private void testIndexSortDocValuesWithSingleValue(boolean reverse) throws IOExc
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

assertEquals(1, searcher.count(createQuery("field", 42, 43)));
assertEquals(1, searcher.count(createQuery("field", 42, 42)));
assertEquals(0, searcher.count(createQuery("field", 41, 41)));
assertEquals(0, searcher.count(createQuery("field", 43, 43)));
assertNumberOfHits(searcher, createQuery("field", 42, 43), 1);
assertNumberOfHits(searcher, createQuery("field", 42, 42), 1);
assertNumberOfHits(searcher, createQuery("field", 41, 41), 0);
assertNumberOfHits(searcher, createQuery("field", 43, 43), 0);

writer.close();
reader.close();
Expand Down Expand Up @@ -316,11 +324,11 @@ public void testIndexSortMissingValues() throws Exception {
DirectoryReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

assertEquals(2, searcher.count(createQuery("field", -70, 0)));
assertEquals(2, searcher.count(createQuery("field", -2, 35)));
assertNumberOfHits(searcher, createQuery("field", -70, 0), 2);
assertNumberOfHits(searcher, createQuery("field", -2, 35), 2);

assertEquals(4, searcher.count(createQuery("field", -80, 35)));
assertEquals(4, searcher.count(createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE)));
assertNumberOfHits(searcher, createQuery("field", -80, 35), 4);
assertNumberOfHits(searcher, createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE), 4);

writer.close();
reader.close();
Expand Down Expand Up @@ -450,6 +458,56 @@ public void testIndexSortOptimizationDeactivated(RandomIndexWriter writer) throw
reader.close();
}

public void testCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(1, weight.count(context));
}

writer.close();
reader.close();
dir.close();
}

public void testFallbackCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

// we use an unrealistic query that exposes its own Weight#count
Query fallbackQuery = new MatchNoDocsQuery();
// the index is not sorted on this field, the fallback query is used
Query query = new IndexSortSortedNumericDocValuesRangeQuery("another", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(0, weight.count(context));
}

writer.close();
reader.close();
dir.close();
}

private Document createDocument(String field, long value) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(field, value));
Expand Down

0 comments on commit bade484

Please sign in to comment.