diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index bf8610d216ad..0e9ef4298df2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -85,6 +85,8 @@ Optimizations * GITHUB#14963: Bypass HNSW graph building for tiny segments. (Shubham Chaudhary, Ben Trent) +* GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen) + Bug Fixes --------------------- * GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index 37efe32ec7d3..e39ecbbd6661 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -320,7 +320,8 @@ private class PointsCompetitiveDISIBuilder extends CompetitiveDISIBuilder { // helps to be conservative about increasing the sampling interval private int tryUpdateFailCount = 0; - public PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparator comparator) { + PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparator comparator) + throws IOException { super(comparator); LeafReaderContext context = comparator.context; FieldInfo info = context.reader().getFieldInfos().fieldInfo(field); @@ -344,6 +345,7 @@ public PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparat + bytesCount); } this.pointValues = pointValues; + postInitializeCompetitiveIterator(); } @Override @@ -364,6 +366,30 @@ int docCount() { return pointValues.getDocCount(); } + /** + * If queue is full and global min/max point values are not competitive with bottom then set an + * empty iterator as competitive iterator. + * + * @throws IOException i/o exception while fetching min and max values from point values + */ + void postInitializeCompetitiveIterator() throws IOException { + if (queueFull && hitsThresholdReached) { + // if some documents have missing points, then check that missing values prohibits + // optimization + if (docCount() < maxDoc && isMissingValueCompetitive()) { + return; + } + long bottom = leafComparator.bottomAsComparableLong(); + long minValue = sortableBytesToLong(pointValues.getMinPackedValue()); + long maxValue = sortableBytesToLong(pointValues.getMaxPackedValue()); + if (reverse == false && bottom < minValue) { + competitiveIterator.update(DocIdSetIterator.empty()); + } else if (reverse && bottom > maxValue) { + competitiveIterator.update(DocIdSetIterator.empty()); + } + } + } + @Override void doUpdateCompetitiveIterator() throws IOException { DocIdSetBuilder result = new DocIdSetBuilder(maxDoc); @@ -478,8 +504,8 @@ private class DVSkipperCompetitiveDISIBuilder extends CompetitiveDISIBuilder { private final DocValuesSkipper skipper; private final TwoPhaseIterator innerTwoPhase; - public DVSkipperCompetitiveDISIBuilder( - DocValuesSkipper skipper, NumericLeafComparator leafComparator) throws IOException { + DVSkipperCompetitiveDISIBuilder(DocValuesSkipper skipper, NumericLeafComparator leafComparator) + throws IOException { super(leafComparator); this.skipper = skipper; NumericDocValues docValues = @@ -497,6 +523,7 @@ public float matchCost() { return 2; // 2 comparisons } }; + postInitializeCompetitiveIterator(); } @Override @@ -504,6 +531,26 @@ int docCount() { return skipper.docCount(); } + /** + * If queue is full and global min/max skipper are not competitive with bottom then set an empty + * iterator as competitive iterator. + */ + void postInitializeCompetitiveIterator() { + if (queueFull && hitsThresholdReached) { + // if some documents have missing doc values, check that missing values prohibits + // optimization + if (docCount() < maxDoc && isMissingValueCompetitive()) { + return; + } + long bottom = leafComparator.bottomAsComparableLong(); + if (reverse == false && bottom < skipper.minValue()) { + competitiveIterator.update(DocIdSetIterator.empty()); + } else if (reverse && bottom > skipper.maxValue()) { + competitiveIterator.update(DocIdSetIterator.empty()); + } + } + } + @Override void doUpdateCompetitiveIterator() { TwoPhaseIterator twoPhaseIterator = diff --git a/lucene/core/src/test/org/apache/lucene/search/comparators/TestNumericComparator.java b/lucene/core/src/test/org/apache/lucene/search/comparators/TestNumericComparator.java new file mode 100644 index 000000000000..417a76c62311 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/comparators/TestNumericComparator.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.comparators; + +import java.io.IOException; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; + +public class TestNumericComparator extends LuceneTestCase { + + public void testEmptyCompetitiveIteratorOptimization() throws Exception { + final int numDocs = atLeast(1000); + try (var dir = newDirectory()) { + try (var writer = + new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { + for (int i = 0; i < numDocs; i++) { + var doc = new Document(); + + doc.add(NumericDocValuesField.indexedField("long_field_1", i)); + doc.add(new LongPoint("long_field_2", i)); + doc.add(new NumericDocValuesField("long_field_2", i)); + + doc.add(NumericDocValuesField.indexedField("int_field_1", i)); + doc.add(new IntPoint("int_field_2", i)); + doc.add(new NumericDocValuesField("int_field_2", i)); + + doc.add(NumericDocValuesField.indexedField("float_field_1", i)); + doc.add(new FloatPoint("float_field_2", i)); + doc.add(new NumericDocValuesField("float_field_2", i)); + + doc.add(NumericDocValuesField.indexedField("double_field_1", i)); + doc.add(new DoublePoint("double_field_2", i)); + doc.add(new NumericDocValuesField("double_field_2", i)); + + writer.addDocument(doc); + writer.forceMerge(1); + } + try (var reader = DirectoryReader.open(writer)) { + assertEquals(1, reader.leaves().size()); + var leafContext = reader.leaves().get(0); + + // long field 1 ascending: + assertLongField("long_field_1", false, -1, leafContext); + // long field 1 descending: + assertLongField("long_field_1", true, numDocs + 1, leafContext); + // long field 2 ascending: + assertLongField("long_field_2", false, -1, leafContext); + // long field 2 descending: + assertLongField("long_field_2", true, numDocs + 1, leafContext); + + // int field 1 ascending: + assertIntField("int_field_1", false, -1, leafContext); + // int field 1 descending: + assertIntField("int_field_1", true, numDocs + 1, leafContext); + // int field 2 ascending: + assertIntField("int_field_2", false, -1, leafContext); + // int field 2 descending: + assertIntField("int_field_2", true, numDocs + 1, leafContext); + + // float field 1 ascending: + assertFloatField("float_field_1", false, -1, leafContext); + // float field 1 descending: + assertFloatField("float_field_1", true, numDocs + 1, leafContext); + // float field 2 ascending: + assertFloatField("float_field_2", false, -1, leafContext); + // float field 2 descending: + assertFloatField("float_field_2", true, numDocs + 1, leafContext); + + // double field 1 ascending: + assertDoubleField("double_field_1", false, -1, leafContext); + // double field 1 descending: + assertDoubleField("double_field_1", true, numDocs + 1, leafContext); + // double field 2 ascending: + assertDoubleField("double_field_2", false, -1, leafContext); + // double field 2 descending: + assertDoubleField("double_field_2", true, numDocs + 1, leafContext); + } + } + } + } + + public void testEmptyCompetitiveIteratorOptimizationWithMissingValue() throws Exception { + final int numDocs = atLeast(1000); + try (var dir = newDirectory()) { + try (var writer = + new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { + // index docs without missing values: + for (int i = 0; i < numDocs; i++) { + var doc = new Document(); + + doc.add(NumericDocValuesField.indexedField("long_field_1", i)); + doc.add(new LongPoint("long_field_2", i)); + doc.add(new NumericDocValuesField("long_field_2", i)); + + writer.addDocument(doc); + if (i % 100 == 0) { + writer.flush(); + } + } + + // Index one doc with without long_field_1 and long_field_2 fields + var doc = new Document(); + doc.add(new NumericDocValuesField("another_field", numDocs)); + writer.addDocument(doc); + writer.flush(); + + try (var reader = DirectoryReader.open(writer)) { + var indexSearcher = newSearcher(reader); + indexSearcher.setQueryCache(null); + { + var sortField = new SortField("long_field_1", SortField.Type.LONG, false); + sortField.setMissingValue(Long.MIN_VALUE); + var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField)); + assertEquals(numDocs, topDocs.scoreDocs[0].doc); + assertEquals(Long.MIN_VALUE, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]); + assertEquals(0, topDocs.scoreDocs[1].doc); + assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]); + assertEquals(1, topDocs.scoreDocs[2].doc); + assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]); + } + { + var sortField = new SortField("long_field_2", SortField.Type.LONG, false); + sortField.setMissingValue(Long.MIN_VALUE); + var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField)); + assertEquals(numDocs, topDocs.scoreDocs[0].doc); + assertEquals(Long.MIN_VALUE, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]); + assertEquals(0, topDocs.scoreDocs[1].doc); + assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]); + assertEquals(1, topDocs.scoreDocs[2].doc); + assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]); + } + } + } + } + } + + public void testEmptyCompetitiveIteratorOptimizationAndHitsThresholdReached() throws Exception { + final int numDocs = + TestUtil.nextInt(random(), 128, 512); // Below IndexSearcher.DEFAULT_HITS_THRESHOLD + try (var dir = newDirectory()) { + try (var writer = + new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { + for (int i = 0; i < numDocs; i++) { + var doc = new Document(); + + doc.add(NumericDocValuesField.indexedField("field_1", i)); + doc.add(new LongPoint("field_2", i)); + doc.add(new NumericDocValuesField("field_2", i)); + + writer.addDocument(doc); + if (i % 100 == 0) { + writer.flush(); + } + } + + try (var reader = DirectoryReader.open(writer)) { + var indexSearcher = newSearcher(reader); + indexSearcher.setQueryCache(null); + for (String field : new String[] {"field_1", "field_2"}) { + var sortField = new SortField(field, SortField.Type.LONG, false); + var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField)); + assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation()); + assertEquals(numDocs, topDocs.totalHits.value()); + assertEquals(0, topDocs.scoreDocs[0].doc); + assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]); + assertEquals(1, topDocs.scoreDocs[1].doc); + assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]); + assertEquals(2, topDocs.scoreDocs[2].doc); + assertEquals(2L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]); + } + } + } + } + } + + private static void assertLongField( + String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext) + throws IOException { + var comparator1 = + (LongComparator) + new SortField(fieldName, SortField.Type.LONG, reverse) + .getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO); + comparator1.queueFull = true; + comparator1.hitsThresholdReached = true; + comparator1.bottom = bottom; + var leafComparator = comparator1.getLeafComparator(leafContext); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc()); + } + + private static void assertIntField( + String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext) + throws IOException { + var comparator1 = + (IntComparator) + new SortField(fieldName, SortField.Type.INT, reverse) + .getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO); + comparator1.queueFull = true; + comparator1.hitsThresholdReached = true; + comparator1.bottom = bottom; + var leafComparator = comparator1.getLeafComparator(leafContext); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc()); + } + + private static void assertFloatField( + String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext) + throws IOException { + var comparator1 = + (FloatComparator) + new SortField(fieldName, SortField.Type.FLOAT, reverse) + .getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO); + comparator1.queueFull = true; + comparator1.hitsThresholdReached = true; + comparator1.bottom = bottom; + var leafComparator = comparator1.getLeafComparator(leafContext); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc()); + } + + private static void assertDoubleField( + String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext) + throws IOException { + var comparator1 = + (DoubleComparator) + new SortField(fieldName, SortField.Type.DOUBLE, reverse) + .getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO); + comparator1.queueFull = true; + comparator1.hitsThresholdReached = true; + comparator1.bottom = bottom; + var leafComparator = comparator1.getLeafComparator(leafContext); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc()); + } +}