Skip to content

Commit

Permalink
Revert 74559 (Avoid global ordinals in composite) (#78848)
Browse files Browse the repository at this point in the history
* Revert "Update docs that composite agg no longer uses global ords (#74754)"

This reverts commit ec799ab.

* Revert "Avoid global ordinals in composite aggregation (#74559)"

This reverts commit 5cfcb2f.

 Conflicts:
	server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
	server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/OrdinalValuesSource.java
  • Loading branch information
not-napoleon committed Oct 7, 2021
1 parent e9933ea commit 83c34f4
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 425 deletions.
4 changes: 2 additions & 2 deletions docs/reference/mapping/params/eager-global-ordinals.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ ordinal for each segment.
Global ordinals are used if a search contains any of the following components:

* Certain bucket aggregations on `keyword`, `ip`, and `flattened` fields. This
includes `terms` aggregations as mentioned above, as well as
`diversified_sampler` and `significant_terms`.
includes `terms` aggregations as mentioned above, as well as `composite`,
`diversified_sampler`, and `significant_terms`.
* Bucket aggregations on `text` fields that require <<fielddata, `fielddata`>>
to be enabled.
* Operations on parent and child documents from a `join` field, including
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.core.Types.forciblyCast;
Expand Down Expand Up @@ -60,7 +58,6 @@ public int hashCode() {

private LongArray docCounts;
private boolean afterKeyIsSet = false;
private int leafReaderOrd = -1; // current LeafReaderContext ordinal

/**
* Constructs a composite queue with the specified size and sources.
Expand Down Expand Up @@ -235,26 +232,14 @@ LeafBucketCollector getLeafCollector(Comparable<?> forceLeadSourceValue,
LeafReaderContext context, LeafBucketCollector in) throws IOException {
int last = arrays.length - 1;
LeafBucketCollector collector = in;
boolean requiresRehashingWhenSwitchingLeafReaders = false;
while (last > 0) {
SingleDimensionValuesSource<?> valuesSource = arrays[last--];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last--].getLeafCollector(context, collector);
}
SingleDimensionValuesSource<?> valuesSource = arrays[last];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
if (forceLeadSourceValue != null) {
collector = valuesSource.getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
collector = arrays[last].getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
} else {
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last].getLeafCollector(context, collector);
}
boolean switchedLeafReaders = context.ord != leafReaderOrd;
if (map.isEmpty() == false && requiresRehashingWhenSwitchingLeafReaders && switchedLeafReaders) {
List<Map.Entry<Slot, Integer>> entries = new ArrayList<>(map.entrySet());
map.clear();
entries.forEach(e -> map.put(e.getKey(), e.getValue()));
}
leafReaderOrd = context.ord;
return collector;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.composite;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;

import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;

/**
* A {@link SingleDimensionValuesSource} for global ordinals.
*/
class GlobalOrdinalValuesSource extends SingleDimensionValuesSource<BytesRef> {
private final CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc;
private LongArray values;
private SortedSetDocValues lookup;
private long currentValue;
private Long afterValueGlobalOrd;
private boolean isTopValueInsertionPoint;

private long lastLookupOrd = -1;
private BytesRef lastLookupValue;

GlobalOrdinalValuesSource(
BigArrays bigArrays,
MappedFieldType type,
CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc,
DocValueFormat format,
boolean missingBucket,
int size,
int reverseMul
) {
super(bigArrays, format, type, missingBucket, size, reverseMul);
this.docValuesFunc = docValuesFunc;
this.values = bigArrays.newLongArray(Math.min(size, 100), false);
}

@Override
void copyCurrent(int slot) {
values = bigArrays.grow(values, slot + 1);
values.set(slot, currentValue);
}

@Override
int compare(int from, int to) {
return Long.compare(values.get(from), values.get(to)) * reverseMul;
}

@Override
int compareCurrent(int slot) {
return Long.compare(currentValue, values.get(slot)) * reverseMul;
}

@Override
int compareCurrentWithAfter() {
int cmp = Long.compare(currentValue, afterValueGlobalOrd);
if (cmp == 0 && isTopValueInsertionPoint) {
// the top value is missing in this shard, the comparison is against
// the insertion point of the top value so equality means that the value
// is "after" the insertion point.
return reverseMul;
}
return cmp * reverseMul;
}

@Override
int hashCode(int slot) {
return Long.hashCode(values.get(slot));
}

@Override
int hashCodeCurrent() {
return Long.hashCode(currentValue);
}

@Override
void setAfter(Comparable<?> value) {
if (missingBucket && value == null) {
afterValue = null;
afterValueGlobalOrd = -1L;
} else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
// the value might be not string if this field is missing in this shard but present in other shards
// and doesn't have a string type
afterValue = format.parseBytesRef(value.toString());
} else {
throw new IllegalArgumentException("invalid value, expected string, got " + value.getClass().getSimpleName());
}
}

@Override
BytesRef toComparable(int slot) throws IOException {
long globalOrd = values.get(slot);
if (missingBucket && globalOrd == -1) {
return null;
} else if (globalOrd == lastLookupOrd) {
return lastLookupValue;
} else {
lastLookupOrd = globalOrd;
lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
return lastLookupValue;
}
}

@Override
LeafBucketCollector getLeafCollector(LeafReaderContext context, LeafBucketCollector next) throws IOException {
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
@Override
public void collect(int doc, long bucket) throws IOException {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
currentValue = ord;
next.collect(doc, bucket);
}
} else if (missingBucket) {
currentValue = -1;
next.collect(doc, bucket);
}
}
};
}

@Override
LeafBucketCollector getLeafCollector(Comparable<BytesRef> value, LeafReaderContext context, LeafBucketCollector next)
throws IOException {
if (value.getClass() != BytesRef.class) {
throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass());
}
BytesRef term = (BytesRef) value;
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
boolean currentValueIsSet = false;

@Override
public void collect(int doc, long bucket) throws IOException {
if (currentValueIsSet == false) {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
if (term.equals(lookup.lookupOrd(ord))) {
currentValueIsSet = true;
currentValue = ord;
break;
}
}
}
}
assert currentValueIsSet;
next.collect(doc, bucket);
}
};
}

@Override
SortedDocsProducer createSortedDocsProducerOrNull(IndexReader reader, Query query) {
if (checkIfSortedDocsIsApplicable(reader, fieldType) == false
|| fieldType instanceof StringFieldType == false
|| (query != null && query.getClass() != MatchAllDocsQuery.class)) {
return null;
}
return new TermsSortedDocsProducer(fieldType.name());
}

@Override
public void close() {
Releasables.close(values);
}

private void initLookup(SortedSetDocValues dvs) throws IOException {
lookup = dvs;
if (afterValue != null && afterValueGlobalOrd == null) {
afterValueGlobalOrd = lookup.lookupTerm(afterValue);
if (afterValueGlobalOrd < 0) {
// convert negative insert position
afterValueGlobalOrd = -afterValueGlobalOrd - 1;
isTopValueInsertionPoint = true;
}
}
}
}

0 comments on commit 83c34f4

Please sign in to comment.