LUCENE-9599 Disable sort optim on index sort (#2075)

Disable sort optimization in comparators on index sort. Currently, if search sort is equal or a part of the index sort, we have an early termination in TopFieldCollector. But comparators are not aware of the index sort, and may run sort optimization even if the search sort is congruent with the index sort. This patch: - adds `disableSkipping` method to `FieldComparator`, This method is called by `TopFieldCollector`, and currently called when the search sort is congruent with the index sort, but more conditions can be added. - disables sort optimization in comparators in this case. - removes a private `MultiComparatorLeafCollector` class, because the only class that extends `MultiComparatorLeafCollector` was `TopFieldLeafCollector`. The logic of the deleted `TopFieldLeafCollector` is added to `TopFieldLeafCollector`. Relates to #1351
apache · Dec 3, 2020 · 69de1a4 · 69de1a4
1 parent f24b497
commit 69de1a4
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 39 deletions.
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@@ -142,6 +142,17 @@ public int compareValues(T first, T second) {
   public void setSingleSort() {
   }
 
+  /**
+   * Informs the comparator that the skipping of documents should be disabled.
+   * This function is called by TopFieldCollector in cases when the skipping functionality
+   * should not be applied or not necessary. An example could be when
+   * search sort is a part of the index sort, and can be already efficiently
+   * handled by TopFieldCollector, and doing extra work for skipping in the comparator
+   * is redundant.
+   */
+  public void disableSkipping() {
+  }
+
   /** Sorts by descending relevance.  NOTE: if you are
    *  sorting only by descending relevance and then
    *  secondarily by ascending docID, performance is faster

diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java
@@ -131,5 +131,5 @@ default DocIdSetIterator competitiveIterator() throws IOException {
    */
   default void setHitsThresholdReached() throws IOException{
   }
-
+  
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -46,40 +46,33 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
   // always compare lower than a real hit; this would
   // save having to check queueFull on each insert
 
-  private static abstract class MultiComparatorLeafCollector implements LeafCollector {
+  private abstract class TopFieldLeafCollector implements LeafCollector  {
 
     final LeafFieldComparator comparator;
     final int reverseMul;
     Scorable scorer;
+    boolean collectedAllCompetitiveHits = false;
 
-    MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
+    TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
+      // as all segments are sorted in the same way, enough to check only the 1st segment for indexSort
+      if (searchSortPartOfIndexSort == null) {
+        final Sort indexSort = context.reader().getMetaData().getSort();
+        searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort);
+        if (searchSortPartOfIndexSort) {
+          firstComparator.disableSkipping();
+        }
+      }
+      LeafFieldComparator[] comparators = queue.getComparators(context);
+      int[] reverseMuls = queue.getReverseMul();
       if (comparators.length == 1) {
-        this.reverseMul = reverseMul[0];
+        this.reverseMul = reverseMuls[0];
         this.comparator = comparators[0];
       } else {
         this.reverseMul = 1;
-        this.comparator = new MultiLeafFieldComparator(comparators, reverseMul);
+        this.comparator = new MultiLeafFieldComparator(comparators, reverseMuls);
       }
     }
 
-    @Override
-    public void setScorer(Scorable scorer) throws IOException {
-      comparator.setScorer(scorer);
-      this.scorer = scorer;
-    }
-  }
-
-  private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector {
-
-    final boolean canEarlyTerminate;
-    boolean collectedAllCompetitiveHits = false;
-
-    TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
-      super(queue.getComparators(context), queue.getReverseMul());
-      final Sort indexSort = context.reader().getMetaData().getSort();
-      canEarlyTerminate = canEarlyTerminate(sort, indexSort);
-    }
-
     void countHit(int doc) throws IOException {
       ++totalHits;
       hitsThresholdChecker.incrementHitCount();
@@ -100,7 +93,7 @@ boolean thresholdCheck(int doc) throws IOException {
         // since docs are visited in doc Id order, if compare is 0, it means
         // this document is largest than anything else in the queue, and
         // therefore not competitive.
-        if (canEarlyTerminate) {
+        if (searchSortPartOfIndexSort) {
           if (hitsThresholdChecker.isThresholdReached()) {
             totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
             throw new CollectionTerminatedException();
@@ -139,7 +132,8 @@ void collectAnyHit(int doc, int hitsCollected) throws IOException {
 
     @Override
     public void setScorer(Scorable scorer) throws IOException {
-      super.setScorer(scorer);
+      this.scorer = scorer;
+      comparator.setScorer(scorer);
       minCompetitiveScore = 0f;
       updateMinCompetitiveScore(scorer);
       if (minScoreAcc != null) {
@@ -154,8 +148,6 @@ public DocIdSetIterator competitiveIterator() throws IOException {
 
   }
 
-  // TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
-  // as early termination should be implemented in their respective comparators and removed from a collector
   static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
     return canEarlyTerminateOnDocId(searchSort) ||
            canEarlyTerminateOnPrefix(searchSort, indexSort);
@@ -286,9 +278,11 @@ public void collect(int doc) throws IOException {
 
   final int numHits;
   final HitsThresholdChecker hitsThresholdChecker;
-  final FieldComparator.RelevanceComparator relevanceComparator;
+  final FieldComparator<?> firstComparator;
   final boolean canSetMinScore;
 
+  Boolean searchSortPartOfIndexSort = null; // shows if Search Sort if a part of the Index Sort
+
   // an accumulator that maintains the maximum of the segment's minimum competitive scores
   final MaxScoreAccumulator minScoreAcc;
   // the current local minimum competitive score already propagated to the underlying scorer
@@ -314,17 +308,15 @@ private TopFieldCollector(FieldValueHitQueue<Entry> pq, int numHits,
     this.numHits = numHits;
     this.hitsThresholdChecker = hitsThresholdChecker;
     this.numComparators = pq.getComparators().length;
-    FieldComparator<?> firstComparator = pq.getComparators()[0];
+    this.firstComparator = pq.getComparators()[0];
     int reverseMul = pq.reverseMul[0];
 
     if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
             && reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
             && hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
-      relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator;
       scoreMode = ScoreMode.TOP_SCORES;
       canSetMinScore = true;
     } else {
-      relevanceComparator = null;
       canSetMinScore = false;
       if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
         scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
@@ -360,8 +352,8 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
     if (canSetMinScore
           && queueFull
           && hitsThresholdChecker.isThresholdReached()) {
-      assert bottom != null && relevanceComparator != null;
-      float minScore = relevanceComparator.value(bottom.slot);
+      assert bottom != null;
+      float minScore = (float) firstComparator.value(bottom.slot);
       if (minScore > minCompetitiveScore) {
         scorer.setMinCompetitiveScore(minScore);
         minCompetitiveScore = minScore;

diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
@@ -39,19 +39,19 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
     protected final T missingValue;
     protected final String field;
     protected final boolean reverse;
-    protected final boolean primarySort;
     private final int bytesCount; // how many bytes are used to encode this number
 
     protected boolean topValueSet;
     protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
     protected boolean hitsThresholdReached;
     protected boolean queueFull;
+    private boolean canSkipDocuments;
 
     protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
         this.field = field;
         this.missingValue = missingValue;
         this.reverse = reverse;
-        this.primarySort = (sortPos == 0);
+        this.canSkipDocuments = (sortPos == 0); // skipping functionality is only relevant for primary sort
         this.bytesCount = bytesCount;
     }
 
@@ -65,27 +65,32 @@ public void setSingleSort() {
         singleSort = true;
     }
 
+    @Override
+    public void disableSkipping() {
+        canSkipDocuments = false;
+    }
+
     /**
      * Leaf comparator for {@link NumericComparator} that provides skipping functionality
      */
     public abstract class NumericLeafComparator implements LeafFieldComparator {
         protected final NumericDocValues docValues;
         private final PointValues pointValues;
-        private final boolean enableSkipping; // if skipping functionality should be enabled
+        private final boolean enableSkipping; // if skipping functionality should be enabled on this segment
         private final int maxDoc;
         private final byte[] minValueAsBytes;
         private final byte[] maxValueAsBytes;
-
+        
         private DocIdSetIterator competitiveIterator;
         private long iteratorCost;
         private int maxDocVisited = 0;
         private int updateCounter = 0;
 
         public NumericLeafComparator(LeafReaderContext context) throws IOException {
             this.docValues = getNumericDocValues(context, field);
-            this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
+            this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
             if (pointValues != null) {
-                this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
+                this.enableSkipping = true; // skipping is enabled when points are available
                 this.maxDoc = context.reader().maxDoc();
                 this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
                 this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;