Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve reduction of terms aggregations #61779

Merged
merged 17 commits into from
Sep 4, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,20 @@ public static boolean isKeyDesc(BucketOrder order) {
return isOrder(order, KEY_DESC);
}

/**
* Return the primary {@link BucketOrder} if the provided <code>order</code>
* is a {@link CompoundOrder}.
*/
public static BucketOrder unwrap(BucketOrder order) {
jimczi marked this conversation as resolved.
Show resolved Hide resolved
if (order instanceof CompoundOrder) {
// check if its a compound order with the first element that matches
List<BucketOrder> orders = ((CompoundOrder) order).orderElements;
return orders.get(0);
} else {
return order;
}
}

/**
* Determine if the ordering strategy matches the expected one.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator {
}

protected StringTerms buildEmptyTermsAggregation() {
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
return new StringTerms(name, order, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ public int hashCode() {
}
}

public DoubleTerms(String name, BucketOrder order, int requiredSize, long minDocCount,
public DoubleTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
Map<String, Object> metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount,
List<Bucket> buckets, long docCountError) {
super(name, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError,
super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError,
otherDocCount, buckets, docCountError);
}

Expand All @@ -121,7 +121,7 @@ public String getWriteableName() {

@Override
public DoubleTerms create(List<Bucket> buckets) {
return new DoubleTerms(name, order, requiredSize, minDocCount, metadata, format, shardSize,
return new DoubleTerms(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize,
showTermDocCountError, otherDocCount, buckets, docCountError);
}

Expand All @@ -132,8 +132,8 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype)
}

@Override
protected DoubleTerms create(String name, List<Bucket> buckets, long docCountError, long otherDocCount) {
return new DoubleTerms(name, order, requiredSize, minDocCount, getMetadata(), format,
protected DoubleTerms create(String name, List<Bucket> buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount) {
return new DoubleTerms(name, reduceOrder, order, requiredSize, minDocCount, getMetadata(), format,
shardSize, showTermDocCountError, otherDocCount, buckets, docCountError);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes;
Expand All @@ -58,6 +59,7 @@
import java.util.function.LongUnaryOperator;

import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;

/**
* An aggregator of string values that relies on global ordinals in order to build buckets.
Expand Down Expand Up @@ -275,6 +277,7 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
LowCardinality(
String name,
AggregatorFactories factories,
Function<GlobalOrdinalsStringTermsAggregator, ResultStrategy<?, ?, ?>> resultStrategy,
ValuesSource.Bytes.WithOrdinals valuesSource,
BucketOrder order,
DocValueFormat format,
Expand All @@ -286,8 +289,8 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
boolean showTermDocCountError,
Map<String, Object> metadata
) throws IOException {
super(name, factories, a -> a.new StandardTermsResults(), valuesSource, order, format, bucketCountThresholds, null,
context, parent, remapGlobalOrds, collectionMode, showTermDocCountError, CardinalityUpperBound.ONE, metadata);
super(name, factories, resultStrategy, valuesSource, order, format, bucketCountThresholds, null, context,
parent, remapGlobalOrds, collectionMode, showTermDocCountError, CardinalityUpperBound.ONE, metadata);
assert factories == null || factories.countAggregators() == 0;
this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
}
Expand Down Expand Up @@ -724,8 +727,15 @@ void buildSubAggs(StringTerms.Bucket[][] topBucketsPreOrd) throws IOException {

@Override
StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bucket[] topBuckets) {
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError,
final BucketOrder reduceOrder;
if (isKeyOrder(order) == false) {
reduceOrder = isKeyOrder(order) ? InternalOrder.unwrap(order) : InternalOrder.key(true);
jimczi marked this conversation as resolved.
Show resolved Hide resolved
Arrays.sort(topBuckets, reduceOrder.comparator());
} else {
reduceOrder = order;
}
return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError,
otherDocCount, Arrays.asList(topBuckets), 0);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public abstract class InternalMappedTerms<A extends InternalTerms<A, B>, B exten

protected long docCountError;

protected InternalMappedTerms(String name, BucketOrder order, int requiredSize, long minDocCount,
protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
Map<String, Object> metadata, DocValueFormat format, int shardSize,
boolean showTermDocCountError, long otherDocCount, List<B> buckets, long docCountError) {
super(name, order, requiredSize, minDocCount, metadata);
super(name, reduceOrder, order, requiredSize, minDocCount, metadata);
this.format = format;
this.shardSize = shardSize;
this.showTermDocCountError = showTermDocCountError;
Expand Down
Loading