elastic · martijnvg · Nov 15, 2023 · Nov 14, 2023 · Nov 14, 2023 · kkrik-es
diff --git a/docs/changelog/102172.yaml b/docs/changelog/102172.yaml
@@ -0,0 +1,5 @@
+pr: 102172
+summary: Adjust Histogram's bucket accounting to be iteratively
+area: Aggregations
+type: bug
+issues: []
diff --git a/...r/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java b/...r/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java
@@ -26,6 +26,18 @@ public abstract class InternalMultiBucketAggregation<
     A extends InternalMultiBucketAggregation,
     B extends InternalMultiBucketAggregation.InternalBucket> extends InternalAggregation implements MultiBucketsAggregation {
 
+    /**
+     * When we pre-count the empty buckets we report them periodically
+     * because you can configure the date_histogram to create an astounding
+     * number of buckets. It'd take a while to count that high only to abort.
+     * So we report every couple thousand buckets. It's be simpler to report
+     * every single bucket we plan to allocate one at a time but that'd cause
+     * needless overhead on the circuit breakers. Counting a couple thousand
+     * buckets is plenty fast to fail this quickly in pathological cases and
+     * plenty large to keep the overhead minimal.
+     */
+    protected static final int REPORT_EMPTY_EVERY = 10_000;
+
     public InternalMultiBucketAggregation(String name, Map<String, Object> metadata) {
         super(name, metadata);
     }

diff --git a/...in/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java b/...in/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java
@@ -373,18 +373,6 @@ protected Bucket reduceBucket(List<Bucket> buckets, AggregationReduceContext con
         return createBucket(buckets.get(0).key, docCount, aggs);
     }
 
-    /**
-     * When we pre-count the empty buckets we report them periodically
-     * because you can configure the date_histogram to create an astounding
-     * number of buckets. It'd take a while to count that high only to abort.
-     * So we report every couple thousand buckets. It's be simpler to report
-     * every single bucket we plan to allocate one at a time but that'd cause
-     * needless overhead on the circuit breakers. Counting a couple thousand
-     * buckets is plenty fast to fail this quickly in pathological cases and
-     * plenty large to keep the overhead minimal.
-     */
-    private static final int REPORT_EMPTY_EVERY = 10_000;
-
     private void addEmptyBuckets(List<Bucket> list, AggregationReduceContext reduceContext) {
         /*
          * Make sure we have space for the empty buckets we're going to add by

diff --git a/...c/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java b/...c/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java
@@ -291,10 +291,11 @@ protected boolean lessThan(IteratorAndCurrent<Bucket> a, IteratorAndCurrent<Buck
         for (InternalAggregation aggregation : aggregations) {
             InternalHistogram histogram = (InternalHistogram) aggregation;
             if (histogram.buckets.isEmpty() == false) {
-                pq.add(new IteratorAndCurrent<Bucket>(histogram.buckets.iterator()));
+                pq.add(new IteratorAndCurrent<>(histogram.buckets.iterator()));
             }
         }
 
+        int consumeBucketCount = 0;
         List<Bucket> reducedBuckets = new ArrayList<>();
         if (pq.size() > 0) {
             // list of buckets coming from different shards that have the same key
@@ -310,6 +311,10 @@ protected boolean lessThan(IteratorAndCurrent<Bucket> a, IteratorAndCurrent<Buck
                     final Bucket reduced = reduceBucket(currentBuckets, reduceContext);
                     if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                         reducedBuckets.add(reduced);
+                        if (consumeBucketCount++ >= REPORT_EMPTY_EVERY) {
+                            reduceContext.consumeBucketsAndMaybeBreak(consumeBucketCount);
+                            consumeBucketCount = 0;
+                        }
                     }
                     currentBuckets.clear();
                     key = top.current().key;
@@ -330,10 +335,15 @@ protected boolean lessThan(IteratorAndCurrent<Bucket> a, IteratorAndCurrent<Buck
                 final Bucket reduced = reduceBucket(currentBuckets, reduceContext);
                 if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                     reducedBuckets.add(reduced);
+                    if (consumeBucketCount++ >= REPORT_EMPTY_EVERY) {
+                        reduceContext.consumeBucketsAndMaybeBreak(consumeBucketCount);
+                        consumeBucketCount = 0;
+                    }
                 }
             }
         }
 
+        reduceContext.consumeBucketsAndMaybeBreak(consumeBucketCount);
         return reducedBuckets;
     }
 
@@ -358,26 +368,14 @@ private double round(double key) {
         return Math.floor((key - emptyBucketInfo.offset) / emptyBucketInfo.interval) * emptyBucketInfo.interval + emptyBucketInfo.offset;
     }
 
-    /**
-     * When we pre-count the empty buckets we report them periodically
-     * because you can configure the histogram to create more buckets than
-     * there are atoms in the universe. It'd take a while to count that high
-     * only to abort. So we report every couple thousand buckets. It's be
-     * simpler to report every single bucket we plan to allocate one at a time
-     * but that'd cause needless overhead on the circuit breakers. Counting a
-     * couple thousand buckets is plenty fast to fail this quickly in
-     * pathological cases and plenty large to keep the overhead minimal.
-     */
-    private static final int REPORT_EMPTY_EVERY = 10_000;
-
     private void addEmptyBuckets(List<Bucket> list, AggregationReduceContext reduceContext) {
         /*
          * Make sure we have space for the empty buckets we're going to add by
          * counting all of the empties we plan to add and firing them into
          * consumeBucketsAndMaybeBreak.
          */
         class Counter implements DoubleConsumer {
-            private int size = list.size();
+            private int size = 0;
 
             @Override
             public void accept(double key) {
@@ -456,11 +454,9 @@ private void iterateEmptyBuckets(List<Bucket> list, ListIterator<Bucket> iter, D
     @Override
     public InternalAggregation reduce(List<InternalAggregation> aggregations, AggregationReduceContext reduceContext) {
         List<Bucket> reducedBuckets = reduceBuckets(aggregations, reduceContext);
-        boolean alreadyAccountedForBuckets = false;
         if (reduceContext.isFinalReduce()) {
             if (minDocCount == 0) {
                 addEmptyBuckets(reducedBuckets, reduceContext);
-                alreadyAccountedForBuckets = true;
             }
             if (InternalOrder.isKeyDesc(order)) {
                 // we just need to reverse here...
@@ -474,9 +470,6 @@ public InternalAggregation reduce(List<InternalAggregation> aggregations, Aggreg
                 CollectionUtil.introSort(reducedBuckets, order.comparator());
             }
         }
-        if (false == alreadyAccountedForBuckets) {
-            reduceContext.consumeBucketsAndMaybeBreak(reducedBuckets.size());
-        }
         return new InternalHistogram(getName(), reducedBuckets, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata());
     }