Pass resolved extended bounds to unmapped histogram aggregator

Previous to this change the unresolved extended bounds was passed into the histogram aggregator which meant extendedbounds.min and extendedbounds.max was passed through as null. This had two effects on the histogram aggregator: 1. If the histogram aggregator was unmapped across all shards, the reduce phase would not add buckets for the extended bounds and the response would contain zero buckets 2. If the histogram aggregator was not unmapped in some shards, the reduce phase might sometimes chose to reduce based on the unmapped shard response and therefore the extended bounds would be ignored. This change resolves the extended bounds in the unmapped case and solves the above two issues. Closes #19009
elastic · Jun 27, 2016 · 376a14d · 376a14d
1 parent ba3020e
commit 376a14d
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 2 deletions.
diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java b/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java
@@ -173,7 +173,16 @@ public long minDocCount() {
         @Override
         protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
                 Map<String, Object> metaData) throws IOException {
-            return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, extendedBounds, null, config.formatter(),
+            // we need to round the bounds given by the user and we have to do it for every aggregator we crate
+            // as the rounding is not necessarily an idempotent operation.
+            // todo we need to think of a better structure to the factory/agtor code so we won't need to do that
+            ExtendedBounds roundedBounds = null;
+            if (extendedBounds != null) {
+                // we need to process & validate here using the parser
+                extendedBounds.processAndValidate(name, aggregationContext.searchContext(), config.parser());
+                roundedBounds = extendedBounds.round(rounding);
+            }
+            return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, roundedBounds, null, config.formatter(),
                     histogramFactory, aggregationContext, parent, pipelineAggregators, metaData);
         }
 

diff --git a/modules/lang-groovy/src/test/java/org/elasticsearch/messy/tests/HistogramTests.java b/modules/lang-groovy/src/test/java/org/elasticsearch/messy/tests/HistogramTests.java
@@ -23,6 +23,7 @@
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.groovy.GroovyPlugin;
@@ -40,7 +41,6 @@
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 
@@ -970,6 +970,78 @@ public void testEmptyAggregation() throws Exception {
         assertThat(histo.getBuckets().isEmpty(), is(true));
     }
 
+    public void testEmptyWithExtendedBounds() throws Exception {
+        int lastDataBucketKey = (numValueBuckets - 1) * interval;
+
+        // randomizing the number of buckets on the min bound
+        // (can sometimes fall within the data range, but more frequently will fall before the data range)
+        int addedBucketsLeft = randomIntBetween(0, numValueBuckets);
+        long boundsMinKey = addedBucketsLeft * interval;
+        if (frequently()) {
+            boundsMinKey = -boundsMinKey;
+        } else {
+            addedBucketsLeft = 0;
+        }
+        long boundsMin = boundsMinKey + randomIntBetween(0, interval - 1);
+
+        // randomizing the number of buckets on the max bound
+        // (can sometimes fall within the data range, but more frequently will fall after the data range)
+        int addedBucketsRight = randomIntBetween(0, numValueBuckets);
+        long boundsMaxKeyDelta = addedBucketsRight * interval;
+        if (rarely()) {
+            addedBucketsRight = 0;
+            boundsMaxKeyDelta = -boundsMaxKeyDelta;
+        }
+        long boundsMaxKey = lastDataBucketKey + boundsMaxKeyDelta;
+        long boundsMax = boundsMaxKey + randomIntBetween(0, interval - 1);
+
+        // it could be that the random bounds.min we chose ended up greater than bounds.max - this should cause an
+        // error
+        boolean invalidBoundsError = boundsMin > boundsMax;
+
+        // constructing the newly expected bucket list
+        int bucketsCount = numValueBuckets + addedBucketsLeft + addedBucketsRight;
+        long[] extendedValueCounts = new long[bucketsCount];
+        System.arraycopy(valueCounts, 0, extendedValueCounts, addedBucketsLeft, valueCounts.length);
+
+        SearchResponse response = null;
+        try {
+            response = client().prepareSearch("idx")
+                    .setQuery(QueryBuilders.termQuery("foo", "bar")).addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME)
+                            .interval(interval).minDocCount(0).extendedBounds(boundsMin, boundsMax))
+                    .execute().actionGet();
+
+            if (invalidBoundsError) {
+                fail("Expected an exception to be thrown when bounds.min is greater than bounds.max");
+                return;
+            }
+
+        } catch (Exception e) {
+            if (invalidBoundsError) {
+                // expected
+                return;
+            } else {
+                throw e;
+            }
+        }
+        assertSearchResponse(response);
+
+        Histogram histo = response.getAggregations().get("histo");
+        assertThat(histo, notNullValue());
+        assertThat(histo.getName(), equalTo("histo"));
+        List<? extends Bucket> buckets = histo.getBuckets();
+        assertThat(buckets.size(), equalTo(bucketsCount));
+
+        long key = Math.min(boundsMinKey, 0);
+        for (int i = 0; i < bucketsCount; i++) {
+            Histogram.Bucket bucket = buckets.get(i);
+            assertThat(bucket, notNullValue());
+            assertThat(((Number) bucket.getKey()).longValue(), equalTo(key));
+            assertThat(bucket.getDocCount(), equalTo(0L));
+            key += interval;
+        }
+    }
+
     @Test
     public void singleValuedField_WithExtendedBounds() throws Exception {
         int lastDataBucketKey = (numValueBuckets - 1) * interval;