From cb9548f81147e8fb0540fcd54f276406cf11165c Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 17 Jun 2014 10:53:43 +0200 Subject: [PATCH] Changed the type of field docCounts to IntArray instead of LongArray, because a shard can't hold more than Integer.MAX_VALUE a LongArray just takes unnecessary space. Closes #6529 --- .../aggregations/bucket/BucketsAggregator.java | 14 +++++++------- .../GlobalOrdinalsSignificantTermsAggregator.java | 2 +- .../terms/GlobalOrdinalsStringTermsAggregator.java | 12 ++++++------ .../aggregations/bucket/terms/InternalOrder.java | 8 ++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 2660c9831e9fa..d7370ae6d6cac 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.common.lease.Releasable; -import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.IntArray; import org.elasticsearch.search.aggregations.*; import org.elasticsearch.search.aggregations.support.AggregationContext; @@ -32,12 +32,12 @@ */ public abstract class BucketsAggregator extends Aggregator { - private LongArray docCounts; + private IntArray docCounts; public BucketsAggregator(String name, BucketAggregationMode bucketAggregationMode, AggregatorFactories factories, long estimatedBucketsCount, AggregationContext context, Aggregator parent) { super(name, bucketAggregationMode, factories, estimatedBucketsCount, context, parent); - docCounts = bigArrays.newLongArray(estimatedBucketsCount, true); + docCounts = bigArrays.newIntArray(estimatedBucketsCount, true); } /** @@ -63,7 +63,7 @@ protected final void collectExistingBucket(int doc, long bucketOrd) throws IOExc collectBucketNoCounts(doc, bucketOrd); } - public LongArray getDocCounts() { + public IntArray getDocCounts() { return docCounts; } @@ -77,7 +77,7 @@ protected final void collectBucketNoCounts(int doc, long bucketOrd) throws IOExc /** * Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal) */ - protected final void incrementBucketDocCount(long inc, long bucketOrd) throws IOException { + protected final void incrementBucketDocCount(int inc, long bucketOrd) throws IOException { docCounts = bigArrays.grow(docCounts, bucketOrd + 1); docCounts.increment(bucketOrd, inc); } @@ -85,13 +85,13 @@ protected final void incrementBucketDocCount(long inc, long bucketOrd) throws IO /** * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal) */ - public final long bucketDocCount(long bucketOrd) { + public final int bucketDocCount(long bucketOrd) { if (bucketOrd >= docCounts.size()) { // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator. // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value // matches the filter, then the filter will never collect bucket ord 3. However, the long terms agg will call bucketAggregations(3) // on the filter aggregator anyway to build sub-aggregations. - return 0L; + return 0; } else { return docCounts.get(bucketOrd); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index 07b435042ea32..c6685a50b114c 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -82,7 +82,7 @@ public SignificantStringTerms buildAggregation(long owningBucketOrdinal) { continue; } final long bucketOrd = getBucketOrd(globalTermOrd); - final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); + final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) { continue; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 068fad9e99368..1352b19f51d76 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -27,7 +27,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.text.Text; -import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.index.fielddata.BytesValues; import org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.GlobalOrdinalMapping; @@ -131,7 +131,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) { continue; } final long bucketOrd = getBucketOrd(globalTermOrd); - final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); + final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) { continue; } @@ -260,15 +260,15 @@ protected void doClose() { */ public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { - private final LongArray segmentDocCounts; + private final IntArray segmentDocCounts; private Ordinals.Docs segmentOrdinals; - private LongArray current; + private IntArray current; public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode) { super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, null, aggregationContext, parent, collectionMode); - this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true); + this.segmentDocCounts = bigArrays.newIntArray(maxOrd, true); } @Override @@ -315,7 +315,7 @@ private void mapSegmentCountsToGlobalCounts() { // This is the cleanest way I can think of so far GlobalOrdinalMapping mapping = (GlobalOrdinalMapping) globalOrdinals; for (int i = 0; i < segmentDocCounts.size(); i++) { - final long inc = segmentDocCounts.set(i, 0); + final int inc = segmentDocCounts.set(i, 0); if (inc == 0) { continue; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java index 124696a9329a2..485c51c65f60d 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.terms; -import com.google.common.primitives.Longs; import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -161,9 +160,10 @@ protected Comparator comparator(Aggregator termsAggregator) { return new Comparator() { @Override public int compare(Terms.Bucket o1, Terms.Bucket o2) { - long v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd); - long v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd); - return asc ? Long.compare(v1, v2) : Long.compare(v2, v1); + int mul = asc ? 1 : -1; + int v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd); + int v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd); + return mul * (v1 - v2); } }; }