From 0898df4aaca2149744e272f10764a63561f7b3c9 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Thu, 13 Feb 2020 08:59:44 -0500 Subject: [PATCH] Add histogram field type support to boxplot aggs (#52265) Add support for the histogram field type to boxplot aggs. Closes #52233 Relates to #33112 --- docs/reference/aggregations/metrics.asciidoc | 1 + .../metrics/boxplot-aggregation.asciidoc | 3 +- .../metrics/percentile-aggregation.asciidoc | 4 +- .../mapping/types/histogram.asciidoc | 1 + .../boxplot/BoxplotAggregationBuilder.java | 6 +-- .../analytics/boxplot/BoxplotAggregator.java | 45 +++++++++++++------ .../boxplot/BoxplotAggregatorFactory.java | 6 +-- .../HistogramPercentileAggregationTests.java | 37 +++++++++++++-- 8 files changed, 76 insertions(+), 27 deletions(-) diff --git a/docs/reference/aggregations/metrics.asciidoc b/docs/reference/aggregations/metrics.asciidoc index 5bcc96d9ae8fc..691c938a3c9b6 100644 --- a/docs/reference/aggregations/metrics.asciidoc +++ b/docs/reference/aggregations/metrics.asciidoc @@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[] include::metrics/median-absolute-deviation-aggregation.asciidoc[] +include::metrics/boxplot-aggregation.asciidoc[] diff --git a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc b/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc index 74c20e805fbfa..51317ab125e1a 100644 --- a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc @@ -4,7 +4,8 @@ === Boxplot Aggregation A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. -These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script. +These values can be generated by a provided script or extracted from specific numeric or +<> in the documents. The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum median, first quartile (25th percentile) and third quartile (75th percentile) values. diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index 7e9869a003948..10439c25a26d0 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -285,7 +285,7 @@ GET latency/_search <1> Compression controls memory usage and approximation error -// tag::[t-digest] +// tag::t-digest[] The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the more nodes available, the higher the accuracy (and large memory footprint) proportional to the volume of data. The `compression` parameter limits the maximum number of @@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a of data which arrives sorted and in-order) the default settings will produce a TDigest roughly 64KB in size. In practice data tends to be more random and the TDigest will use less memory. -// tag::[t-digest] +// end::t-digest[] ==== HDR Histogram diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index fe4209c52b772..440530b110247 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -37,6 +37,7 @@ following aggregations and queries: * <> aggregation * <> aggregation +* <> aggregation * <> query [[mapping-types-histogram-building-histogram]] diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java index 4d0fdfa25ecfa..8ce98ab91f950 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java @@ -29,7 +29,7 @@ import static org.elasticsearch.search.aggregations.metrics.PercentilesMethod.COMPRESSION_FIELD; -public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly { public static final String NAME = "boxplot"; @@ -37,7 +37,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le static { PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME); - ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false); + ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD); } @@ -98,7 +98,7 @@ public double compression() { @Override protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext, - ValuesSourceConfig config, + ValuesSourceConfig config, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder) throws IOException { return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java index 1c3a01b773d4f..dec42ddbd6159 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java @@ -11,6 +11,8 @@ import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; @@ -29,12 +31,12 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue { - private final ValuesSource.Numeric valuesSource; + private final ValuesSource valuesSource; private final DocValueFormat format; protected ObjectArray states; protected final double compression; - BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression, + BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); @@ -58,23 +60,38 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); - return new LeafBucketCollectorBase(sub, values) { - @Override - public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - - if (values.advanceExact(doc)) { + if (valuesSource instanceof ValuesSource.Histogram) { + final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { - final int valueCount = values.docValueCount(); - for (int i = 0; i < valueCount; i++) { - state.add(values.nextValue()); + final HistogramValue sketch = values.histogram(); + while(sketch.next()) { + state.add(sketch.value(), sketch.count()); } } } - } - }; + }; + } else { + final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + states = bigArrays.grow(states, bucket + 1); + if (values.advanceExact(doc)) { + TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); + if (values.advanceExact(doc)) { + final int valueCount = values.docValueCount(); + for (int i = 0; i < valueCount; i++) { + state.add(values.nextValue()); + } + } + } + } + }; + } } private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java index 190f65137f4be..ac34667a9e173 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java @@ -20,12 +20,12 @@ import java.util.List; import java.util.Map; -public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory { +public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory { private final double compression; BoxplotAggregatorFactory(String name, - ValuesSourceConfig config, + ValuesSourceConfig config, double compression, QueryShardContext queryShardContext, AggregatorFactory parent, @@ -46,7 +46,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java index 6d95f74e4428a..097ebc3bc5693 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java @@ -27,6 +27,8 @@ import org.elasticsearch.search.aggregations.metrics.TDigestState; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.analytics.boxplot.Boxplot; +import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import java.util.ArrayList; @@ -131,8 +133,7 @@ public void testHDRHistogram() throws Exception { } } - public void testTDigestHistogram() throws Exception { - + private void setupTDigestHistogram(int compression) throws Exception { XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() .startObject() .startObject("_doc") @@ -170,8 +171,6 @@ public void testTDigestHistogram() throws Exception { PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2); client().admin().indices().putMapping(request2).actionGet(); - - int compression = TestUtil.nextInt(random(), 200, 300); TDigestState histogram = new TDigestState(compression); BulkRequest bulkRequest = new BulkRequest(); @@ -218,6 +217,11 @@ public void testTDigestHistogram() throws Exception { response = client().prepareSearch("pre_agg").get(); assertEquals(numDocs / frq, response.getHits().getTotalHits().value); + } + + public void testTDigestHistogram() throws Exception { + int compression = TestUtil.nextInt(random(), 200, 300); + setupTDigestHistogram(compression); PercentilesAggregationBuilder builder = AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST) @@ -236,6 +240,31 @@ public void testTDigestHistogram() throws Exception { } } + public void testBoxplotHistogram() throws Exception { + int compression = TestUtil.nextInt(random(), 200, 300); + setupTDigestHistogram(compression); + BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression); + + SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get(); + SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get(); + SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get(); + + Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg"); + Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg"); + Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg"); + assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0); + assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0); + assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0); + assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0); + + assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0); + assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0); + assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0); + assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0); + assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0); + assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0); + } + @Override protected Collection> getPlugins() { List> plugins = new ArrayList<>(super.getPlugins());