Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ Optimizations

* GITHUB#14963: Bypass HNSW graph building for tiny segments. (Shubham Chaudhary, Ben Trent)

* GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen)

Bug Fixes
---------------------
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ private class PointsCompetitiveDISIBuilder extends CompetitiveDISIBuilder {
// helps to be conservative about increasing the sampling interval
private int tryUpdateFailCount = 0;

public PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparator comparator) {
PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparator comparator)
throws IOException {
super(comparator);
LeafReaderContext context = comparator.context;
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
Expand All @@ -344,6 +345,7 @@ public PointsCompetitiveDISIBuilder(PointValues pointValues, NumericLeafComparat
+ bytesCount);
}
this.pointValues = pointValues;
postInitializeCompetitiveIterator();
}

@Override
Expand All @@ -364,6 +366,30 @@ int docCount() {
return pointValues.getDocCount();
}

/**
* If queue is full and global min/max point values are not competitive with bottom then set an
* empty iterator as competitive iterator.
*
* @throws IOException i/o exception while fetching min and max values from point values
*/
void postInitializeCompetitiveIterator() throws IOException {
if (queueFull && hitsThresholdReached) {
// if some documents have missing points, then check that missing values prohibits
// optimization
if (docCount() < maxDoc && isMissingValueCompetitive()) {
return;
}
long bottom = leafComparator.bottomAsComparableLong();
long minValue = sortableBytesToLong(pointValues.getMinPackedValue());
long maxValue = sortableBytesToLong(pointValues.getMaxPackedValue());
if (reverse == false && bottom < minValue) {
competitiveIterator.update(DocIdSetIterator.empty());
} else if (reverse && bottom > maxValue) {
competitiveIterator.update(DocIdSetIterator.empty());
}
}
}

@Override
void doUpdateCompetitiveIterator() throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
Expand Down Expand Up @@ -478,8 +504,8 @@ private class DVSkipperCompetitiveDISIBuilder extends CompetitiveDISIBuilder {
private final DocValuesSkipper skipper;
private final TwoPhaseIterator innerTwoPhase;

public DVSkipperCompetitiveDISIBuilder(
DocValuesSkipper skipper, NumericLeafComparator leafComparator) throws IOException {
DVSkipperCompetitiveDISIBuilder(DocValuesSkipper skipper, NumericLeafComparator leafComparator)
throws IOException {
super(leafComparator);
this.skipper = skipper;
NumericDocValues docValues =
Expand All @@ -497,13 +523,34 @@ public float matchCost() {
return 2; // 2 comparisons
}
};
postInitializeCompetitiveIterator();
}

@Override
int docCount() {
return skipper.docCount();
}

/**
* If queue is full and global min/max skipper are not competitive with bottom then set an empty
* iterator as competitive iterator.
*/
void postInitializeCompetitiveIterator() {
if (queueFull && hitsThresholdReached) {
// if some documents have missing doc values, check that missing values prohibits
// optimization
if (docCount() < maxDoc && isMissingValueCompetitive()) {
return;
}
long bottom = leafComparator.bottomAsComparableLong();
if (reverse == false && bottom < skipper.minValue()) {
competitiveIterator.update(DocIdSetIterator.empty());
} else if (reverse && bottom > skipper.maxValue()) {
competitiveIterator.update(DocIdSetIterator.empty());
}
}
}

@Override
void doUpdateCompetitiveIterator() {
TwoPhaseIterator twoPhaseIterator =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;

import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;

public class TestNumericComparator extends LuceneTestCase {

public void testEmptyCompetitiveIteratorOptimization() throws Exception {
final int numDocs = atLeast(1000);
try (var dir = newDirectory()) {
try (var writer =
new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (int i = 0; i < numDocs; i++) {
var doc = new Document();

doc.add(NumericDocValuesField.indexedField("long_field_1", i));
doc.add(new LongPoint("long_field_2", i));
doc.add(new NumericDocValuesField("long_field_2", i));

doc.add(NumericDocValuesField.indexedField("int_field_1", i));
doc.add(new IntPoint("int_field_2", i));
doc.add(new NumericDocValuesField("int_field_2", i));

doc.add(NumericDocValuesField.indexedField("float_field_1", i));
doc.add(new FloatPoint("float_field_2", i));
doc.add(new NumericDocValuesField("float_field_2", i));

doc.add(NumericDocValuesField.indexedField("double_field_1", i));
doc.add(new DoublePoint("double_field_2", i));
doc.add(new NumericDocValuesField("double_field_2", i));

writer.addDocument(doc);
writer.forceMerge(1);
}
try (var reader = DirectoryReader.open(writer)) {
assertEquals(1, reader.leaves().size());
var leafContext = reader.leaves().get(0);

// long field 1 ascending:
assertLongField("long_field_1", false, -1, leafContext);
// long field 1 descending:
assertLongField("long_field_1", true, numDocs + 1, leafContext);
// long field 2 ascending:
assertLongField("long_field_2", false, -1, leafContext);
// long field 2 descending:
assertLongField("long_field_2", true, numDocs + 1, leafContext);

// int field 1 ascending:
assertIntField("int_field_1", false, -1, leafContext);
// int field 1 descending:
assertIntField("int_field_1", true, numDocs + 1, leafContext);
// int field 2 ascending:
assertIntField("int_field_2", false, -1, leafContext);
// int field 2 descending:
assertIntField("int_field_2", true, numDocs + 1, leafContext);

// float field 1 ascending:
assertFloatField("float_field_1", false, -1, leafContext);
// float field 1 descending:
assertFloatField("float_field_1", true, numDocs + 1, leafContext);
// float field 2 ascending:
assertFloatField("float_field_2", false, -1, leafContext);
// float field 2 descending:
assertFloatField("float_field_2", true, numDocs + 1, leafContext);

// double field 1 ascending:
assertDoubleField("double_field_1", false, -1, leafContext);
// double field 1 descending:
assertDoubleField("double_field_1", true, numDocs + 1, leafContext);
// double field 2 ascending:
assertDoubleField("double_field_2", false, -1, leafContext);
// double field 2 descending:
assertDoubleField("double_field_2", true, numDocs + 1, leafContext);
}
}
}
}

public void testEmptyCompetitiveIteratorOptimizationWithMissingValue() throws Exception {
final int numDocs = atLeast(1000);
try (var dir = newDirectory()) {
try (var writer =
new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
// index docs without missing values:
for (int i = 0; i < numDocs; i++) {
var doc = new Document();

doc.add(NumericDocValuesField.indexedField("long_field_1", i));
doc.add(new LongPoint("long_field_2", i));
doc.add(new NumericDocValuesField("long_field_2", i));

writer.addDocument(doc);
if (i % 100 == 0) {
writer.flush();
}
}

// Index one doc with without long_field_1 and long_field_2 fields
var doc = new Document();
doc.add(new NumericDocValuesField("another_field", numDocs));
writer.addDocument(doc);
writer.flush();

try (var reader = DirectoryReader.open(writer)) {
var indexSearcher = newSearcher(reader);
indexSearcher.setQueryCache(null);
{
var sortField = new SortField("long_field_1", SortField.Type.LONG, false);
sortField.setMissingValue(Long.MIN_VALUE);
var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField));
assertEquals(numDocs, topDocs.scoreDocs[0].doc);
assertEquals(Long.MIN_VALUE, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(0, topDocs.scoreDocs[1].doc);
assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(1, topDocs.scoreDocs[2].doc);
assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
}
{
var sortField = new SortField("long_field_2", SortField.Type.LONG, false);
sortField.setMissingValue(Long.MIN_VALUE);
var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField));
assertEquals(numDocs, topDocs.scoreDocs[0].doc);
assertEquals(Long.MIN_VALUE, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(0, topDocs.scoreDocs[1].doc);
assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(1, topDocs.scoreDocs[2].doc);
assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
}
}
}
}
}

public void testEmptyCompetitiveIteratorOptimizationAndHitsThresholdReached() throws Exception {
final int numDocs =
TestUtil.nextInt(random(), 128, 512); // Below IndexSearcher.DEFAULT_HITS_THRESHOLD
try (var dir = newDirectory()) {
try (var writer =
new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (int i = 0; i < numDocs; i++) {
var doc = new Document();

doc.add(NumericDocValuesField.indexedField("field_1", i));
doc.add(new LongPoint("field_2", i));
doc.add(new NumericDocValuesField("field_2", i));

writer.addDocument(doc);
if (i % 100 == 0) {
writer.flush();
}
}

try (var reader = DirectoryReader.open(writer)) {
var indexSearcher = newSearcher(reader);
indexSearcher.setQueryCache(null);
for (String field : new String[] {"field_1", "field_2"}) {
var sortField = new SortField(field, SortField.Type.LONG, false);
var topDocs = indexSearcher.search(new MatchAllDocsQuery(), 3, new Sort(sortField));
assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation());
assertEquals(numDocs, topDocs.totalHits.value());
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(0L, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(1, topDocs.scoreDocs[1].doc);
assertEquals(1L, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(2, topDocs.scoreDocs[2].doc);
assertEquals(2L, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
}
}
}
}
}

private static void assertLongField(
String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext)
throws IOException {
var comparator1 =
(LongComparator)
new SortField(fieldName, SortField.Type.LONG, reverse)
.getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO);
comparator1.queueFull = true;
comparator1.hitsThresholdReached = true;
comparator1.bottom = bottom;
var leafComparator = comparator1.getLeafComparator(leafContext);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc());
}

private static void assertIntField(
String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext)
throws IOException {
var comparator1 =
(IntComparator)
new SortField(fieldName, SortField.Type.INT, reverse)
.getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO);
comparator1.queueFull = true;
comparator1.hitsThresholdReached = true;
comparator1.bottom = bottom;
var leafComparator = comparator1.getLeafComparator(leafContext);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc());
}

private static void assertFloatField(
String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext)
throws IOException {
var comparator1 =
(FloatComparator)
new SortField(fieldName, SortField.Type.FLOAT, reverse)
.getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO);
comparator1.queueFull = true;
comparator1.hitsThresholdReached = true;
comparator1.bottom = bottom;
var leafComparator = comparator1.getLeafComparator(leafContext);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc());
}

private static void assertDoubleField(
String fieldName, boolean reverse, int bottom, LeafReaderContext leafContext)
throws IOException {
var comparator1 =
(DoubleComparator)
new SortField(fieldName, SortField.Type.DOUBLE, reverse)
.getComparator(1, Pruning.GREATER_THAN_OR_EQUAL_TO);
comparator1.queueFull = true;
comparator1.hitsThresholdReached = true;
comparator1.bottom = bottom;
var leafComparator = comparator1.getLeafComparator(leafContext);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, leafComparator.competitiveIterator().nextDoc());
}
}