apache · mayya-sharipova · Sep 8, 2020 · Aug 7, 2020 · Aug 12, 2020 · Aug 20, 2020
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -114,6 +114,12 @@ Improvements
   with doc values and points. In this case, there is an assumption that the same data is
   stored in these points and doc values (Mayya Sharipova, Jim Ferenczi, Adrien Grand)
 
+  LUCENE-9449: Enhance DocComparator to provide an iterator over competitive
+  documents when searching with "after". This iterator can quickly position
+  on the desired "after" document skipping all documents and segments before
+  "after".
+
+
 Bug fixes
 
 * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while 

diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@@ -491,7 +491,7 @@ public static final class DocComparator extends FieldComparator<Integer> impleme
     private final int[] docIDs;
     private int docBase;
     private int bottom;
-    private int topValue;
+    int topValue;
 
     /** Creates a new comparator based on document ids for {@code numHits} */
     public DocComparator(int numHits) {

diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
@@ -58,8 +58,8 @@ private static final class OneComparatorFieldValueHitQueue<T extends FieldValueH
     private final int oneReverseMul;
     private final FieldComparator<?> oneComparator;
 
-    public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
-      super(fields, size, filterNonCompetitiveDocs);
+    public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs, boolean hasAfter) {
+      super(fields, size, filterNonCompetitiveDocs, hasAfter);
 
       assert fields.length == 1;
       oneComparator = comparators[0];
@@ -95,8 +95,8 @@ protected boolean lessThan(final Entry hitA, final Entry hitB) {
    */
   private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
 
-    public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
-      super(fields, size, filterNonCompetitiveDocs);
+    public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs, boolean hasAfter) {
+      super(fields, size, filterNonCompetitiveDocs, hasAfter);
     }
 
     @Override
@@ -121,7 +121,7 @@ protected boolean lessThan(final Entry hitA, final Entry hitB) {
   }
 
   // prevent instantiation and extension.
-  private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
+  private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs, boolean hasAfter) {
     super(size);
     // When we get here, fields.length is guaranteed to be > 0, therefore no
     // need to check it again.
@@ -140,7 +140,7 @@ private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompet
         // try to rewrite the 1st comparator to the comparator that can skip non-competitive documents
         // skipping functionality is beneficial only for the 1st comparator
         comparators[i] = FilteringFieldComparator.wrapToFilteringComparator(field.getComparator(size, i),
-            field.reverse, numComparators == 1);
+            field.reverse, numComparators == 1, hasAfter);
       } else {
         comparators[i] = field.getComparator(size, i);
       }
@@ -160,18 +160,20 @@ private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompet
    *          The number of hits to retain. Must be greater than zero.
    * @param filterNonCompetitiveDocs
    *    {@code true} If comparators should be allowed to filter non-competitive documents, {@code false} otherwise
+   * @param hasAfter
+   *    {@code true} If this sort has "after" FieldDoc
    */
   public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size,
-      boolean filterNonCompetitiveDocs) {
+      boolean filterNonCompetitiveDocs, boolean hasAfter) {
 
     if (fields.length == 0) {
       throw new IllegalArgumentException("Sort must contain at least one field");
     }
 
     if (fields.length == 1) {
-      return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
+      return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs, hasAfter);
     } else {
-      return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
+      return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs, hasAfter);
     }
   }
 

diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteringDocComparator.java b/lucene/core/src/java/org/apache/lucene/search/FilteringDocComparator.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.LeafReaderContext;
+
+import java.io.IOException;
+
+/**
+ * A wrapper over {@code DocComparator} that has "after" FieldDoc.
+ * This comparator can quickly skip to the desired "after" document.
+ */
+class FilteringDocComparator<Integer> extends FilteringFieldComparator<Integer> {
+  public FilteringDocComparator(FieldComparator<Integer> in, boolean reverse, boolean singleSort) {
+    super(in, reverse, singleSort);
+  }
+
+  @Override
+  public final FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
+    LeafFieldComparator inLeafComparator = in.getLeafComparator(context);
+    return new FilteringDocLeafComparator(inLeafComparator, context);
+  }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteringDocLeafComparator.java b/lucene/core/src/java/org/apache/lucene/search/FilteringDocLeafComparator.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.LeafReaderContext;
+
+import java.io.IOException;
+
+/**
+ * This comparator is used when there is sort by _doc asc together with "after" FieldDoc.
+ * The comparator provides an iterator that can quickly skip to the desired "after" document.
+ */
+public class FilteringDocLeafComparator implements FilteringLeafFieldComparator {
+    private final FieldComparator.DocComparator in;
+    private DocIdSetIterator topValueIterator; // iterator that starts from topValue if possible
+    private final int minDoc;
+    private final int maxDoc;
+    private final int docBase;
+    private boolean iteratorUpdated = false;
+
+    public FilteringDocLeafComparator(LeafFieldComparator in, LeafReaderContext context) {
+        this.in = (FieldComparator.DocComparator) in;
+        this.minDoc = this.in.topValue + 1;
+        this.maxDoc = context.reader().maxDoc();
+        this.docBase = context.docBase;
+        this.topValueIterator = DocIdSetIterator.all(maxDoc);
+    }
+
+    @Override
+    public DocIdSetIterator competitiveIterator() {
+        return new DocIdSetIterator() {
+            private int doc;
+
+            @Override
+            public int nextDoc() throws IOException {
+                return doc = topValueIterator.nextDoc();
+            }
+
+            @Override
+            public int docID() {
+                return doc;
+            }
+
+            @Override
+            public long cost() {
+                return topValueIterator.cost();
+            }
+
+            @Override
+            public int advance(int target) throws IOException {
+                return doc = topValueIterator.advance(target);
+            }
+        };
+
+    }
+
+    @Override
+    public boolean iteratorUpdated() {
+        return iteratorUpdated;
+    }
+
+    @Override
+    public void setQueueFull() {
+    }
+
+    @Override
+    public void setHitsThresholdReached() {
+        if (iteratorUpdated) return;
+        if (docBase + maxDoc <= minDoc) {
+            topValueIterator = DocIdSetIterator.empty(); // skip this segment
+        }
+        final int segmentMinDoc = Math.max(0, minDoc - docBase);
+        topValueIterator = new MinDocIterator(segmentMinDoc, maxDoc);
+        iteratorUpdated = true;
+    }
+
+    @Override
+    public void setBottom(int slot) {
+        in.setBottom(slot);
+    }
+
+    @Override
+    public int compareBottom(int doc) {
+        return in.compareBottom(doc);
+    }
+
+    @Override
+    public int compareTop(int doc) {
+        return in.compareTop(doc);
+    }
+
+    @Override
+    public void copy(int slot, int doc) throws IOException {
+        in.copy(slot, doc);
+    }
+
+    @Override
+    public void setScorer(Scorable scorer) throws IOException {
+        in.setScorer(scorer);
+    }
+
+
+    static class MinDocIterator extends DocIdSetIterator {
+        final int segmentMinDoc;
+        final int maxDoc;
+        int doc = -1;
+
+        MinDocIterator(int segmentMinDoc, int maxDoc) {
+            this.segmentMinDoc = segmentMinDoc;
+            this.maxDoc = maxDoc;
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            return advance(doc + 1);
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            assert target > doc;
+            if (doc == -1) {
+                // skip directly to minDoc
+                doc = Math.max(target, segmentMinDoc);
+            } else {
+                doc = target;
+            }
+            if (doc >= maxDoc) {
+                doc = NO_MORE_DOCS;
+            }
+            return doc;
+        }
+
+        @Override
+        public long cost() {
+            return maxDoc - segmentMinDoc;
+        }
+    }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteringFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FilteringFieldComparator.java
@@ -68,10 +68,12 @@ public int compareValues(T first, T second) {
    * @param comparator – comparator to wrap
    * @param reverse – if this sort is reverse
    * @param singleSort – true if this sort is based on a single field and there are no other sort fields for tie breaking
+   * @param hasAfter – true if this sort has after FieldDoc
    * @return comparator wrapped as a filtering comparator or the original comparator if the filtering functionality
    * is not implemented for it
    */
-  public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort) {
+  public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort,
+      boolean hasAfter) {
     Class<?> comparatorClass = comparator.getClass();
     if (comparatorClass == FieldComparator.LongComparator.class){
       return new FilteringNumericComparator<>((FieldComparator.LongComparator) comparator, reverse, singleSort);
@@ -85,6 +87,9 @@ public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> co
     if (comparatorClass == FieldComparator.FloatComparator.class){
       return new FilteringNumericComparator<>((FieldComparator.FloatComparator) comparator, reverse, singleSort);
     }
+    if (comparatorClass == FieldComparator.DocComparator.class && hasAfter && reverse == false) { // if SortField.DOC with after
+      return new FilteringDocComparator<>((FieldComparator.DocComparator) comparator, reverse, singleSort);
+    }
     return comparator;
   }
 

diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteringLeafFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FilteringLeafFieldComparator.java
@@ -20,8 +20,9 @@
 
 /**
  * Decorates a wrapped LeafFieldComparator to add a functionality to skip over non-competitive docs.
- * FilteringLeafFieldComparator provides two additional functions to a LeafFieldComparator:
- *  {@code competitiveIterator()} and {@code setCanUpdateIterator()}.
+ * {code competitiveIterator()} provides an iterator over competitive documents.
+ * Collectors must inform this comparator when hits threshold is reached and when queue becomes full,
+ * which are signals for the comparator to start updating its competitive iterator.
  */
 public interface FilteringLeafFieldComparator extends LeafFieldComparator {
   /**
@@ -32,8 +33,22 @@ public interface FilteringLeafFieldComparator extends LeafFieldComparator {
   DocIdSetIterator competitiveIterator() throws IOException;
 
   /**
-   * Informs this leaf comparator that it is allowed to start updating its competitive iterator.
-   * This method is called from a collector when queue becomes full and threshold is reached.
+   * Informs this leaf comparator that hits threshold is reached.
+   * This method is called from a collector when hits threshold is reached.
+   * For some filtering comparators (e.g. {@code FilteringDocLeafComparator} reaching
+   * hits threshold is enough to start updating their iterators, even when queue is not yet full.
    */
-  void setCanUpdateIterator() throws IOException;
+  void setHitsThresholdReached() throws IOException;
+
+  /**
+   * Informs this leaf comparator that queue has become full.
+   * This method is called from a collector when queue becomes full.
+   */
+  void setQueueFull() throws IOException;
+
+  /**
+   * Returns {@code true} if the competitive iterator is updated.
+   * This tells the calling collector that it can update the {@code TotalHits.Relation} to {GREATER_THAN_OR_EQUAL_TO}
+   */
+  boolean iteratorUpdated() throws IOException;
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteringNumericLeafComparator.java b/lucene/core/src/java/org/apache/lucene/search/FilteringNumericLeafComparator.java
@@ -47,6 +47,8 @@ abstract class FilteringNumericLeafComparator implements FilteringLeafFieldCompa
   private int updateCounter = 0;
   private boolean canUpdateIterator = false; // set to true when queue becomes full and hitsThreshold is reached
   private DocIdSetIterator competitiveIterator;
+  private boolean queueFull = false;
+  private boolean hitsThresholdReached = false;
 
   public FilteringNumericLeafComparator(LeafFieldComparator in, LeafReaderContext context, String field,
         boolean reverse, boolean singleSort, boolean hasTopValue, int bytesCount) throws IOException {
@@ -97,9 +99,26 @@ public void setScorer(Scorable scorer) throws IOException {
   }
 
   @Override
-  public void setCanUpdateIterator() throws IOException {
-    this.canUpdateIterator = true;
-    updateCompetitiveIterator();
+  public boolean iteratorUpdated() {
+    return canUpdateIterator && (updateCounter > 0);
+  }
+
+  @Override
+  public void setQueueFull() throws IOException {
+    queueFull = true;
+    if (hitsThresholdReached && canUpdateIterator == false) { // for the 1st time queue becomes full and hitsThreshold is reached
+      canUpdateIterator = true;
+      updateCompetitiveIterator();
+    }
+  }
+
+  @Override
+  public void setHitsThresholdReached() throws IOException {
+    hitsThresholdReached = true;
+    if (queueFull && canUpdateIterator == false) { // for the 1st time queue becomes full and hitsThreshold is reached
+      canUpdateIterator = true;
+      updateCompetitiveIterator();
+    }
   }
 
   @Override