Revert 74559 (Avoid global ordinals in composite) (#78848)

* Revert "Update docs that composite agg no longer uses global ords (#74754)" This reverts commit ec799ab. * Revert "Avoid global ordinals in composite aggregation (#74559)" This reverts commit 5cfcb2f. Conflicts: server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/OrdinalValuesSource.java
elastic · Oct 7, 2021 · 83c34f4 · 83c34f4
1 parent e9933ea
commit 83c34f4
Show file tree

Hide file tree

Showing 8 changed files with 222 additions and 425 deletions.
diff --git a/docs/reference/mapping/params/eager-global-ordinals.asciidoc b/docs/reference/mapping/params/eager-global-ordinals.asciidoc
@@ -27,8 +27,8 @@ ordinal for each segment.
 Global ordinals are used if a search contains any of the following components:
 
 * Certain bucket aggregations on `keyword`, `ip`, and `flattened` fields. This
-includes `terms` aggregations as mentioned above, as well as
-`diversified_sampler` and `significant_terms`.
+includes `terms` aggregations as mentioned above, as well as `composite`,
+`diversified_sampler`, and `significant_terms`.
 * Bucket aggregations on `text` fields that require <<fielddata, `fielddata`>>
 to be enabled.
 * Operations on parent and child documents from a `join` field, including

diff --git a/...org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java b/...org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
@@ -18,9 +18,7 @@
 import org.elasticsearch.search.aggregations.LeafBucketCollector;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import static org.elasticsearch.core.Types.forciblyCast;
@@ -60,7 +58,6 @@ public int hashCode() {
 
     private LongArray docCounts;
     private boolean afterKeyIsSet = false;
-    private int leafReaderOrd = -1; // current LeafReaderContext ordinal
 
     /**
      * Constructs a composite queue with the specified size and sources.
@@ -235,26 +232,14 @@ LeafBucketCollector getLeafCollector(Comparable<?> forceLeadSourceValue,
                                          LeafReaderContext context, LeafBucketCollector in) throws IOException {
         int last = arrays.length - 1;
         LeafBucketCollector collector = in;
-        boolean requiresRehashingWhenSwitchingLeafReaders = false;
         while (last > 0) {
-            SingleDimensionValuesSource<?> valuesSource = arrays[last--];
-            requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
-            collector = valuesSource.getLeafCollector(context, collector);
+            collector = arrays[last--].getLeafCollector(context, collector);
         }
-        SingleDimensionValuesSource<?> valuesSource = arrays[last];
-        requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
         if (forceLeadSourceValue != null) {
-            collector = valuesSource.getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
+            collector = arrays[last].getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
         } else {
-            collector = valuesSource.getLeafCollector(context, collector);
+            collector = arrays[last].getLeafCollector(context, collector);
         }
-        boolean switchedLeafReaders = context.ord != leafReaderOrd;
-        if (map.isEmpty() == false && requiresRehashingWhenSwitchingLeafReaders && switchedLeafReaders) {
-            List<Map.Entry<Slot, Integer>> entries = new ArrayList<>(map.entrySet());
-            map.clear();
-            entries.forEach(e -> map.put(e.getKey(), e.getValue()));
-        }
-        leafReaderOrd = context.ord;
         return collector;
     }
 

diff --git a/...ava/org/elasticsearch/search/aggregations/bucket/composite/GlobalOrdinalValuesSource.java b/...ava/org/elasticsearch/search/aggregations/bucket/composite/GlobalOrdinalValuesSource.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.composite;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.core.CheckedFunction;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.StringFieldType;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.LeafBucketCollector;
+
+import java.io.IOException;
+
+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+
+/**
+ * A {@link SingleDimensionValuesSource} for global ordinals.
+ */
+class GlobalOrdinalValuesSource extends SingleDimensionValuesSource<BytesRef> {
+    private final CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc;
+    private LongArray values;
+    private SortedSetDocValues lookup;
+    private long currentValue;
+    private Long afterValueGlobalOrd;
+    private boolean isTopValueInsertionPoint;
+
+    private long lastLookupOrd = -1;
+    private BytesRef lastLookupValue;
+
+    GlobalOrdinalValuesSource(
+        BigArrays bigArrays,
+        MappedFieldType type,
+        CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc,
+        DocValueFormat format,
+        boolean missingBucket,
+        int size,
+        int reverseMul
+    ) {
+        super(bigArrays, format, type, missingBucket, size, reverseMul);
+        this.docValuesFunc = docValuesFunc;
+        this.values = bigArrays.newLongArray(Math.min(size, 100), false);
+    }
+
+    @Override
+    void copyCurrent(int slot) {
+        values = bigArrays.grow(values, slot + 1);
+        values.set(slot, currentValue);
+    }
+
+    @Override
+    int compare(int from, int to) {
+        return Long.compare(values.get(from), values.get(to)) * reverseMul;
+    }
+
+    @Override
+    int compareCurrent(int slot) {
+        return Long.compare(currentValue, values.get(slot)) * reverseMul;
+    }
+
+    @Override
+    int compareCurrentWithAfter() {
+        int cmp = Long.compare(currentValue, afterValueGlobalOrd);
+        if (cmp == 0 && isTopValueInsertionPoint) {
+            // the top value is missing in this shard, the comparison is against
+            // the insertion point of the top value so equality means that the value
+            // is "after" the insertion point.
+            return reverseMul;
+        }
+        return cmp * reverseMul;
+    }
+
+    @Override
+    int hashCode(int slot) {
+        return Long.hashCode(values.get(slot));
+    }
+
+    @Override
+    int hashCodeCurrent() {
+        return Long.hashCode(currentValue);
+    }
+
+    @Override
+    void setAfter(Comparable<?> value) {
+        if (missingBucket && value == null) {
+            afterValue = null;
+            afterValueGlobalOrd = -1L;
+        } else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
+            // the value might be not string if this field is missing in this shard but present in other shards
+            // and doesn't have a string type
+            afterValue = format.parseBytesRef(value.toString());
+        } else {
+            throw new IllegalArgumentException("invalid value, expected string, got " + value.getClass().getSimpleName());
+        }
+    }
+
+    @Override
+    BytesRef toComparable(int slot) throws IOException {
+        long globalOrd = values.get(slot);
+        if (missingBucket && globalOrd == -1) {
+            return null;
+        } else if (globalOrd == lastLookupOrd) {
+            return lastLookupValue;
+        } else {
+            lastLookupOrd = globalOrd;
+            lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
+            return lastLookupValue;
+        }
+    }
+
+    @Override
+    LeafBucketCollector getLeafCollector(LeafReaderContext context, LeafBucketCollector next) throws IOException {
+        final SortedSetDocValues dvs = docValuesFunc.apply(context);
+        if (lookup == null) {
+            initLookup(dvs);
+        }
+        return new LeafBucketCollector() {
+            @Override
+            public void collect(int doc, long bucket) throws IOException {
+                if (dvs.advanceExact(doc)) {
+                    long ord;
+                    while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
+                        currentValue = ord;
+                        next.collect(doc, bucket);
+                    }
+                } else if (missingBucket) {
+                    currentValue = -1;
+                    next.collect(doc, bucket);
+                }
+            }
+        };
+    }
+
+    @Override
+    LeafBucketCollector getLeafCollector(Comparable<BytesRef> value, LeafReaderContext context, LeafBucketCollector next)
+        throws IOException {
+        if (value.getClass() != BytesRef.class) {
+            throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass());
+        }
+        BytesRef term = (BytesRef) value;
+        final SortedSetDocValues dvs = docValuesFunc.apply(context);
+        if (lookup == null) {
+            initLookup(dvs);
+        }
+        return new LeafBucketCollector() {
+            boolean currentValueIsSet = false;
+
+            @Override
+            public void collect(int doc, long bucket) throws IOException {
+                if (currentValueIsSet == false) {
+                    if (dvs.advanceExact(doc)) {
+                        long ord;
+                        while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
+                            if (term.equals(lookup.lookupOrd(ord))) {
+                                currentValueIsSet = true;
+                                currentValue = ord;
+                                break;
+                            }
+                        }
+                    }
+                }
+                assert currentValueIsSet;
+                next.collect(doc, bucket);
+            }
+        };
+    }
+
+    @Override
+    SortedDocsProducer createSortedDocsProducerOrNull(IndexReader reader, Query query) {
+        if (checkIfSortedDocsIsApplicable(reader, fieldType) == false
+            || fieldType instanceof StringFieldType == false
+            || (query != null && query.getClass() != MatchAllDocsQuery.class)) {
+            return null;
+        }
+        return new TermsSortedDocsProducer(fieldType.name());
+    }
+
+    @Override
+    public void close() {
+        Releasables.close(values);
+    }
+
+    private void initLookup(SortedSetDocValues dvs) throws IOException {
+        lookup = dvs;
+        if (afterValue != null && afterValueGlobalOrd == null) {
+            afterValueGlobalOrd = lookup.lookupTerm(afterValue);
+            if (afterValueGlobalOrd < 0) {
+                // convert negative insert position
+                afterValueGlobalOrd = -afterValueGlobalOrd - 1;
+                isTopValueInsertionPoint = true;
+            }
+        }
+    }
+}