Skip to content

Commit

Permalink
LUCENE-9599 Disable sort optim on index sort (#2075)
Browse files Browse the repository at this point in the history
Disable sort optimization in comparators on index sort.

Currently, if search sort is equal or a part of the index sort, we have
an early termination in TopFieldCollector.
But comparators are not aware of the index sort, and may run
sort optimization even if the search sort is congruent with
the index sort.

This patch:
- adds `disableSkipping` method to `FieldComparator`,
 This method is called by `TopFieldCollector`, and currently called 
  when  the search sort is congruent with the index sort,
  but more conditions can be added. 
- disables sort optimization in comparators in this case.
- removes a private  `MultiComparatorLeafCollector` class, because the only
  class that extends `MultiComparatorLeafCollector` was `TopFieldLeafCollector`.
  The logic of the deleted `TopFieldLeafCollector` is added to `TopFieldLeafCollector`.

Relates to #1351
  • Loading branch information
mayya-sharipova committed Dec 3, 2020
1 parent f24b497 commit 69de1a4
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 39 deletions.
11 changes: 11 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ public int compareValues(T first, T second) {
public void setSingleSort() {
}

/**
* Informs the comparator that the skipping of documents should be disabled.
* This function is called by TopFieldCollector in cases when the skipping functionality
* should not be applied or not necessary. An example could be when
* search sort is a part of the index sort, and can be already efficiently
* handled by TopFieldCollector, and doing extra work for skipping in the comparator
* is redundant.
*/
public void disableSkipping() {
}

/** Sorts by descending relevance. NOTE: if you are
* sorting only by descending relevance and then
* secondarily by ascending docID, performance is faster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,5 @@ default DocIdSetIterator competitiveIterator() throws IOException {
*/
default void setHitsThresholdReached() throws IOException{
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -46,40 +46,33 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
// always compare lower than a real hit; this would
// save having to check queueFull on each insert

private static abstract class MultiComparatorLeafCollector implements LeafCollector {
private abstract class TopFieldLeafCollector implements LeafCollector {

final LeafFieldComparator comparator;
final int reverseMul;
Scorable scorer;
boolean collectedAllCompetitiveHits = false;

MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
// as all segments are sorted in the same way, enough to check only the 1st segment for indexSort
if (searchSortPartOfIndexSort == null) {
final Sort indexSort = context.reader().getMetaData().getSort();
searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort);
if (searchSortPartOfIndexSort) {
firstComparator.disableSkipping();
}
}
LeafFieldComparator[] comparators = queue.getComparators(context);
int[] reverseMuls = queue.getReverseMul();
if (comparators.length == 1) {
this.reverseMul = reverseMul[0];
this.reverseMul = reverseMuls[0];
this.comparator = comparators[0];
} else {
this.reverseMul = 1;
this.comparator = new MultiLeafFieldComparator(comparators, reverseMul);
this.comparator = new MultiLeafFieldComparator(comparators, reverseMuls);
}
}

@Override
public void setScorer(Scorable scorer) throws IOException {
comparator.setScorer(scorer);
this.scorer = scorer;
}
}

private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector {

final boolean canEarlyTerminate;
boolean collectedAllCompetitiveHits = false;

TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
super(queue.getComparators(context), queue.getReverseMul());
final Sort indexSort = context.reader().getMetaData().getSort();
canEarlyTerminate = canEarlyTerminate(sort, indexSort);
}

void countHit(int doc) throws IOException {
++totalHits;
hitsThresholdChecker.incrementHitCount();
Expand All @@ -100,7 +93,7 @@ boolean thresholdCheck(int doc) throws IOException {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
if (canEarlyTerminate) {
if (searchSortPartOfIndexSort) {
if (hitsThresholdChecker.isThresholdReached()) {
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
throw new CollectionTerminatedException();
Expand Down Expand Up @@ -139,7 +132,8 @@ void collectAnyHit(int doc, int hitsCollected) throws IOException {

@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
this.scorer = scorer;
comparator.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
Expand All @@ -154,8 +148,6 @@ public DocIdSetIterator competitiveIterator() throws IOException {

}

// TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
// as early termination should be implemented in their respective comparators and removed from a collector
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
return canEarlyTerminateOnDocId(searchSort) ||
canEarlyTerminateOnPrefix(searchSort, indexSort);
Expand Down Expand Up @@ -286,9 +278,11 @@ public void collect(int doc) throws IOException {

final int numHits;
final HitsThresholdChecker hitsThresholdChecker;
final FieldComparator.RelevanceComparator relevanceComparator;
final FieldComparator<?> firstComparator;
final boolean canSetMinScore;

Boolean searchSortPartOfIndexSort = null; // shows if Search Sort if a part of the Index Sort

// an accumulator that maintains the maximum of the segment's minimum competitive scores
final MaxScoreAccumulator minScoreAcc;
// the current local minimum competitive score already propagated to the underlying scorer
Expand All @@ -314,17 +308,15 @@ private TopFieldCollector(FieldValueHitQueue<Entry> pq, int numHits,
this.numHits = numHits;
this.hitsThresholdChecker = hitsThresholdChecker;
this.numComparators = pq.getComparators().length;
FieldComparator<?> firstComparator = pq.getComparators()[0];
this.firstComparator = pq.getComparators()[0];
int reverseMul = pq.reverseMul[0];

if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator;
scoreMode = ScoreMode.TOP_SCORES;
canSetMinScore = true;
} else {
relevanceComparator = null;
canSetMinScore = false;
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
Expand Down Expand Up @@ -360,8 +352,8 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore
&& queueFull
&& hitsThresholdChecker.isThresholdReached()) {
assert bottom != null && relevanceComparator != null;
float minScore = relevanceComparator.value(bottom.slot);
assert bottom != null;
float minScore = (float) firstComparator.value(bottom.slot);
if (minScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(minScore);
minCompetitiveScore = minScore;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,19 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
protected final T missingValue;
protected final String field;
protected final boolean reverse;
protected final boolean primarySort;
private final int bytesCount; // how many bytes are used to encode this number

protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached;
protected boolean queueFull;
private boolean canSkipDocuments;

protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
this.primarySort = (sortPos == 0);
this.canSkipDocuments = (sortPos == 0); // skipping functionality is only relevant for primary sort
this.bytesCount = bytesCount;
}

Expand All @@ -65,27 +65,32 @@ public void setSingleSort() {
singleSort = true;
}

@Override
public void disableSkipping() {
canSkipDocuments = false;
}

/**
* Leaf comparator for {@link NumericComparator} that provides skipping functionality
*/
public abstract class NumericLeafComparator implements LeafFieldComparator {
protected final NumericDocValues docValues;
private final PointValues pointValues;
private final boolean enableSkipping; // if skipping functionality should be enabled
private final boolean enableSkipping; // if skipping functionality should be enabled on this segment
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;

private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = 0;
private int updateCounter = 0;

public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.docValues = getNumericDocValues(context, field);
this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
if (pointValues != null) {
this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
this.enableSkipping = true; // skipping is enabled when points are available
this.maxDoc = context.reader().maxDoc();
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
Expand Down

0 comments on commit 69de1a4

Please sign in to comment.