From 4e9b15350c96d02f2fb23eca15166baae684d75e Mon Sep 17 00:00:00 2001 From: Steve Gordon Date: Mon, 8 Mar 2021 13:12:09 +0000 Subject: [PATCH] Add variable width histogram aggregation --- docs/aggregations.asciidoc | 4 + .../multi-terms-aggregation-usage.asciidoc | 2 +- .../variable-width-histogram-usage.asciidoc | 89 +++++++++++++++++++ src/Nest/Aggregations/AggregateDictionary.cs | 2 + src/Nest/Aggregations/AggregateFormatter.cs | 43 ++++++++- src/Nest/Aggregations/AggregationContainer.cs | 12 +++ .../VariableWidthHistogramAggregation.cs | 72 +++++++++++++++ .../VariableWidthHistogramBucket.cs | 21 +++++ .../Visitor/AggregationVisitor.cs | 4 + .../Aggregations/Visitor/AggregationWalker.cs | 5 ++ .../MultiTermsAggregationUsageTests.cs | 2 +- .../VariableWidthHistogramUsageTests.cs | 86 ++++++++++++++++++ 12 files changed, 338 insertions(+), 4 deletions(-) create mode 100644 docs/aggregations/bucket/variable-width-histogram/variable-width-histogram-usage.asciidoc create mode 100644 src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramAggregation.cs create mode 100644 src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramBucket.cs create mode 100644 tests/Tests/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramUsageTests.cs diff --git a/docs/aggregations.asciidoc b/docs/aggregations.asciidoc index efa1b67f30b..4c536fc7b45 100644 --- a/docs/aggregations.asciidoc +++ b/docs/aggregations.asciidoc @@ -188,6 +188,8 @@ In addition to the buckets themselves, the bucket aggregations also compute and * <> +* <> + [NOTE] -- Bucketing aggregations can have sub-aggregations (bucketing or metric). The sub-aggregations will be computed @@ -253,6 +255,8 @@ include::aggregations/bucket/significant-text/significant-text-aggregation-usage include::aggregations/bucket/terms/terms-aggregation-usage.asciidoc[] +include::aggregations/bucket/variable-width-histogram/variable-width-histogram-usage.asciidoc[] + [[pipeline-aggregations]] == Pipeline Aggregations diff --git a/docs/aggregations/bucket/multi-terms/multi-terms-aggregation-usage.asciidoc b/docs/aggregations/bucket/multi-terms/multi-terms-aggregation-usage.asciidoc index d64c8f43b3d..86f0b594987 100644 --- a/docs/aggregations/bucket/multi-terms/multi-terms-aggregation-usage.asciidoc +++ b/docs/aggregations/bucket/multi-terms/multi-terms-aggregation-usage.asciidoc @@ -17,7 +17,7 @@ please modify the original csharp file found at the link and submit the PR with A multi-bucket value source based aggregation where buckets are dynamically built - one per unique set of values. -See the Elasticsearch documentation on {ref_current}//search-aggregations-bucket-multi-terms-aggregation.html[multi terms aggregation] for more detail. +See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-multi-terms-aggregation.html[multi terms aggregation] for more detail. ==== Fluent DSL example diff --git a/docs/aggregations/bucket/variable-width-histogram/variable-width-histogram-usage.asciidoc b/docs/aggregations/bucket/variable-width-histogram/variable-width-histogram-usage.asciidoc new file mode 100644 index 00000000000..8d2dea65485 --- /dev/null +++ b/docs/aggregations/bucket/variable-width-histogram/variable-width-histogram-usage.asciidoc @@ -0,0 +1,89 @@ +:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/7.x + +:github: https://github.com/elastic/elasticsearch-net + +:nuget: https://www.nuget.org/packages + +//// +IMPORTANT NOTE +============== +This file has been generated from https://github.com/elastic/elasticsearch-net/tree/7.x/src/Tests/Tests/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramUsageTests.cs. +If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file, +please modify the original csharp file found at the link and submit the PR with that change. Thanks! +//// + +[[variable-width-histogram-usage]] +=== Variable Width Histogram Usage + +A multi-bucket aggregation similar to Histogram. However, the width of each bucket is not specified. Rather, a target number of buckets is provided +and bucket intervals are dynamically determined based on the document distribution. + +See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-variablewidthhistogram-aggregation.html[multi terms aggregation] for more detail. + +==== Fluent DSL example + +[source,csharp] +---- +a => a +.VariableWidthHistogram("commits", v => v + .Field(f => f.NumberOfCommits) + .Buckets(2) + .InitialBuffer(2) + .ShardSize(100) + .Meta(m => m + .Add("foo", "bar") + )) +---- + +==== Object Initializer syntax example + +[source,csharp] +---- +new VariableWidthHistogramAggregation("commits") +{ + Field = Field(f => f.NumberOfCommits), + Buckets = 2, + InitialBuffer = 2, + ShardSize = 100, + Meta = new Dictionary + { + { "foo", "bar" } + } +} +---- + +[source,javascript] +.Example json output +---- +{ + "commits": { + "meta": { + "foo": "bar" + }, + "variable_width_histogram": { + "field": "numberOfCommits", + "buckets": 2, + "initial_buffer": 2, + "shard_size": 100 + } + } +} +---- + +==== Handling Responses + +[source,csharp] +---- +response.ShouldBeValid(); +var counts = response.Aggregations.VariableWidthHistogram("commits"); +counts.Should().NotBeNull(); +counts.Buckets.Should().HaveCountGreaterThan(0); +var firstBucket = counts.Buckets.First(); +firstBucket.Key.Should().BeGreaterOrEqualTo(0); +firstBucket.Minimum.Should().BeGreaterOrEqualTo(0); +firstBucket.Maximum.Should().BeGreaterOrEqualTo(0); +firstBucket.DocCount.Should().BeGreaterOrEqualTo(1); +counts.Meta.Should().NotBeNull().And.HaveCount(1); +counts.Meta["foo"].Should().Be("bar"); +---- + diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index 43520407fe9..956ee5dc1ba 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -222,6 +222,8 @@ public MultiBucketAggregate> RareTerms(string key) public MultiBucketAggregate DateHistogram(string key) => GetMultiBucketAggregate(key); + public MultiBucketAggregate VariableWidthHistogram(string key) => GetMultiBucketAggregate(key); + public MultiTermsAggregate MultiTerms(string key) => MultiTerms(key); public MultiTermsAggregate MultiTerms(string key) diff --git a/src/Nest/Aggregations/AggregateFormatter.cs b/src/Nest/Aggregations/AggregateFormatter.cs index c6f0b894fc6..ed3648e2c36 100644 --- a/src/Nest/Aggregations/AggregateFormatter.cs +++ b/src/Nest/Aggregations/AggregateFormatter.cs @@ -25,7 +25,8 @@ internal class AggregateFormatter : IJsonFormatter { Parser.From, 1 }, { Parser.To, 2 }, { Parser.KeyAsString, 3 }, - { Parser.DocCount, 4 } + { Parser.DocCount, 4 }, + { Parser.Min, 5 } }; private static readonly byte[] BucketsField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.Buckets); @@ -219,6 +220,9 @@ private IBucket ReadBucket(ref JsonReader reader, IJsonFormatterResolver formatt case 4: item = GetFiltersBucket(ref reader, formatterResolver); break; + case 5: + item = GetVariableWidthHistogramBucket(ref reader, formatterResolver); + break; } } else @@ -939,7 +943,42 @@ private IBucket GetDateHistogramBucket(ref JsonReader reader, IJsonFormatterReso return dateHistogram; } - + + private IBucket GetVariableWidthHistogramBucket(ref JsonReader reader, IJsonFormatterResolver formatterResolver) + { + var min = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "key" + reader.ReadNext(); // : + var key = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "max" + reader.ReadNext(); // : + var max = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "doc_count" + reader.ReadNext(); // : + var docCount = reader.ReadInt64(); + + Dictionary subAggregates = null; + if (reader.GetCurrentJsonToken() == JsonToken.ValueSeparator) + { + reader.ReadNext(); // , + var propertyName = reader.ReadPropertyName(); + subAggregates = GetSubAggregates(ref reader, propertyName, formatterResolver); + } + + var variableWidthHistogram = new VariableWidthHistogramBucket(subAggregates) + { + Key = key, + Minimum = min, + Maximum = max, + DocCount = docCount, + }; + + return variableWidthHistogram; + } + private IBucket GetKeyedBucket(ref JsonReader reader, IJsonFormatterResolver formatterResolver) { var token = reader.GetCurrentJsonToken(); diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index eb866ebc850..2e49ce0f0ed 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -295,6 +295,9 @@ public interface IAggregationContainer [DataMember(Name = "multi_terms")] IMultiTermsAggregation MultiTerms { get; set; } + + [DataMember(Name = "variable_width_histogram")] + IVariableWidthHistogramAggregation VariableWidthHistogram { get; set; } void Accept(IAggregationVisitor visitor); } @@ -446,6 +449,8 @@ public class AggregationContainer : IAggregationContainer public IMultiTermsAggregation MultiTerms { get; set; } + public IVariableWidthHistogramAggregation VariableWidthHistogram { get; set; } + public void Accept(IAggregationVisitor visitor) { if (visitor.Scope == AggregationVisitorScope.Unknown) visitor.Scope = AggregationVisitorScope.Aggregation; @@ -615,6 +620,8 @@ public class AggregationContainerDescriptor : DescriptorBase, ITopMetricsAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.TopMetrics = d); + public AggregationContainerDescriptor VariableWidthHistogram(string name, + Func, IVariableWidthHistogramAggregation> selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.VariableWidthHistogram = d); + /// /// Fluent methods do not assign to properties on `this` directly but on IAggregationContainers inside /// `this.Aggregations[string, IContainer] diff --git a/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramAggregation.cs b/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramAggregation.cs new file mode 100644 index 00000000000..ded9df3c434 --- /dev/null +++ b/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramAggregation.cs @@ -0,0 +1,72 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using System.Linq.Expressions; +using System.Runtime.Serialization; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + [InterfaceDataContract] + [ReadAs(typeof(VariableWidthHistogramAggregation))] + public interface IVariableWidthHistogramAggregation : IBucketAggregation + { + /// + /// The field to target. + /// + [DataMember(Name = "field")] + Field Field { get; set; } + + [DataMember(Name = "buckets")] + int? Buckets { get; set; } + + [DataMember(Name = "initial_buffer")] + int? InitialBuffer { get; set; } + + [DataMember(Name = "shard_size")] + int? ShardSize { get; set; } + } + + public class VariableWidthHistogramAggregation : BucketAggregationBase, IVariableWidthHistogramAggregation + { + public VariableWidthHistogramAggregation(string name) : base(name) { } + + /// + public Field Field { get; set; } + /// + public int? Buckets { get; set; } + /// + public int? InitialBuffer { get; set; } + /// + public int? ShardSize { get; set; } + + internal override void WrapInContainer(AggregationContainer c) => c.VariableWidthHistogram = this; + } + + public class VariableWidthHistogramAggregationDescriptor + : BucketAggregationDescriptorBase, IVariableWidthHistogramAggregation, T>, IVariableWidthHistogramAggregation + where T : class + { + Field IVariableWidthHistogramAggregation.Field { get; set; } + int? IVariableWidthHistogramAggregation.Buckets { get; set; } + int? IVariableWidthHistogramAggregation.InitialBuffer { get; set; } + int? IVariableWidthHistogramAggregation.ShardSize { get; set; } + + /// + public VariableWidthHistogramAggregationDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); + + /// + public VariableWidthHistogramAggregationDescriptor Field(Expression> field) => Assign(field, (a, v) => a.Field = v); + + /// + public VariableWidthHistogramAggregationDescriptor Buckets(int? buckets) => Assign(buckets, (a, v) => a.Buckets = v); + + /// + public VariableWidthHistogramAggregationDescriptor InitialBuffer(int? initialBuffer) => Assign(initialBuffer, (a, v) => a.InitialBuffer = v); + + /// + public VariableWidthHistogramAggregationDescriptor ShardSize(int? shardSize) => Assign(shardSize, (a, v) => a.ShardSize = v); + } +} diff --git a/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramBucket.cs b/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramBucket.cs new file mode 100644 index 00000000000..3ab6691a232 --- /dev/null +++ b/src/Nest/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramBucket.cs @@ -0,0 +1,21 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Collections.Generic; + +namespace Nest +{ + public class VariableWidthHistogramBucket : BucketBase + { + public VariableWidthHistogramBucket(IReadOnlyDictionary dict) : base(dict) { } + + public double Key { get; set; } + + public double Minimum { get; set; } + + public double Maximum { get; set; } + + public long DocCount { get; set; } + } +} diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index bd833fda9da..df7bf32ee8d 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -161,6 +161,8 @@ public interface IAggregationVisitor void Visit(ITTestAggregation aggregation); void Visit(IMultiTermsAggregation aggregation); + + void Visit(IVariableWidthHistogramAggregation aggregation); } public class AggregationVisitor : IAggregationVisitor @@ -307,5 +309,7 @@ public virtual void Visit(IAggregation aggregation) { } public virtual void Visit(IAggregationContainer aggregationContainer) { } + public virtual void Visit(IVariableWidthHistogramAggregation aggregationContainer) { } + } } diff --git a/src/Nest/Aggregations/Visitor/AggregationWalker.cs b/src/Nest/Aggregations/Visitor/AggregationWalker.cs index f51c2a79b9e..e0bcea0551f 100644 --- a/src/Nest/Aggregations/Visitor/AggregationWalker.cs +++ b/src/Nest/Aggregations/Visitor/AggregationWalker.cs @@ -190,6 +190,11 @@ public void Walk(IAggregationContainer aggregation, IAggregationVisitor visitor) AcceptAggregation(aggregation.Composite, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.MedianAbsoluteDeviation, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.TTest, visitor, (v, d) => v.Visit(d)); + AcceptAggregation(aggregation.VariableWidthHistogram, visitor, (v, d) => + { + v.Visit(d); + Accept(v, d.Aggregations); + }); } } } diff --git a/tests/Tests/Aggregations/Bucket/MultiTerms/MultiTermsAggregationUsageTests.cs b/tests/Tests/Aggregations/Bucket/MultiTerms/MultiTermsAggregationUsageTests.cs index 6b5203cb961..e392a25455a 100644 --- a/tests/Tests/Aggregations/Bucket/MultiTerms/MultiTermsAggregationUsageTests.cs +++ b/tests/Tests/Aggregations/Bucket/MultiTerms/MultiTermsAggregationUsageTests.cs @@ -18,7 +18,7 @@ namespace Tests.Aggregations.Bucket.MultiTerms /** * A multi-bucket value source based aggregation where buckets are dynamically built - one per unique set of values. * - * See the Elasticsearch documentation on {ref_current}//search-aggregations-bucket-multi-terms-aggregation.html[multi terms aggregation] for more detail. + * See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-multi-terms-aggregation.html[multi terms aggregation] for more detail. */ [SkipVersion("<7.12.0", "Multi terms aggregation added in 7.12.0")] public class MultiTermsAggregationUsageTests : AggregationUsageTestBase diff --git a/tests/Tests/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramUsageTests.cs b/tests/Tests/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramUsageTests.cs new file mode 100644 index 00000000000..c58b8de3760 --- /dev/null +++ b/tests/Tests/Aggregations/Bucket/VariableWidthHistogram/VariableWidthHistogramUsageTests.cs @@ -0,0 +1,86 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using System.Collections.Generic; +using System.Linq; +using Elastic.Elasticsearch.Xunit.XunitPlumbing; +using FluentAssertions; +using Nest; +using Tests.Core.Extensions; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; +using static Nest.Infer; + +namespace Tests.Aggregations.Bucket.VariableWidthHistogram +{ + /** + * A multi-bucket aggregation similar to Histogram. However, the width of each bucket is not specified. Rather, a target number of buckets is provided + * and bucket intervals are dynamically determined based on the document distribution. + * + * See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-variablewidthhistogram-aggregation.html[multi terms aggregation] for more detail. + */ + [SkipVersion("<7.11.0", "Variable width aggregation added in 7.11.0")] + public class VariableWidthHistogramUsageTests : AggregationUsageTestBase + { + public VariableWidthHistogramUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } + + protected override object AggregationJson => new + { + commits = new + { + meta = new + { + foo = "bar" + }, + variable_width_histogram = new + { + field = "numberOfCommits", + buckets = 2, + initial_buffer = 2, + shard_size = 100 + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .VariableWidthHistogram("commits", v => v + .Field(f => f.NumberOfCommits) + .Buckets(2) + .InitialBuffer(2) + .ShardSize(100) + .Meta(m => m + .Add("foo", "bar") + )); + + protected override AggregationDictionary InitializerAggs => + new VariableWidthHistogramAggregation("commits") + { + Field = Field(f => f.NumberOfCommits), + Buckets = 2, + InitialBuffer = 2, + ShardSize = 100, + Meta = new Dictionary + { + { "foo", "bar" } + } + }; + + protected override void ExpectResponse(ISearchResponse response) + { + response.ShouldBeValid(); + var counts = response.Aggregations.VariableWidthHistogram("commits"); + counts.Should().NotBeNull(); + counts.Buckets.Should().HaveCountGreaterThan(0); + var firstBucket = counts.Buckets.First(); + firstBucket.Key.Should().BeGreaterOrEqualTo(0); + firstBucket.Minimum.Should().BeGreaterOrEqualTo(0); + firstBucket.Maximum.Should().BeGreaterOrEqualTo(0); + firstBucket.DocCount.Should().BeGreaterOrEqualTo(1); + counts.Meta.Should().NotBeNull().And.HaveCount(1); + counts.Meta["foo"].Should().Be("bar"); + } + } +}