From f65dfddabc5074534c4ae0a7e965c75ac3244d59 Mon Sep 17 00:00:00 2001 From: Russ Cam Date: Wed, 15 Apr 2020 14:37:33 +1000 Subject: [PATCH 1/2] Implement Boxplot aggregation Relates: elastic/elasticsearch#51948 This commit implements the boxplot aggregation. Integration tests run against XPackCluster because it requires a license to use. --- docs/aggregations.asciidoc | 4 + .../boxplot-aggregation-usage.asciidoc | 88 +++++++++++++++++++ src/Nest/Aggregations/AggregateDictionary.cs | 2 + src/Nest/Aggregations/AggregateFormatter.cs | 31 +++++++ src/Nest/Aggregations/AggregationContainer.cs | 14 +++ .../Metric/Boxplot/BoxplotAggregate.cs | 14 +++ .../Metric/Boxplot/BoxplotAggregation.cs | 51 +++++++++++ .../Visitor/AggregationVisitor.cs | 4 + .../Aggregations/Visitor/AggregationWalker.cs | 1 + .../Aggregations/AggregationUsageTestBase.cs | 12 ++- .../Boxplot/BoxplotAggregationUsageTests.cs | 80 +++++++++++++++++ 11 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 docs/aggregations/metric/boxplot/boxplot-aggregation-usage.asciidoc create mode 100644 src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregate.cs create mode 100644 src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregation.cs create mode 100644 tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs diff --git a/docs/aggregations.asciidoc b/docs/aggregations.asciidoc index ddf442fb218..707ef0491e6 100644 --- a/docs/aggregations.asciidoc +++ b/docs/aggregations.asciidoc @@ -36,6 +36,8 @@ The values are typically extracted from the fields of the document (using the fi * <> +* <> + * <> * <> @@ -74,6 +76,8 @@ See the Elasticsearch documentation on {ref_current}/search-aggregations-metrics include::aggregations/metric/average/average-aggregation-usage.asciidoc[] +include::aggregations/metric/boxplot/boxplot-aggregation-usage.asciidoc[] + include::aggregations/metric/cardinality/cardinality-aggregation-usage.asciidoc[] include::aggregations/metric/extended-stats/extended-stats-aggregation-usage.asciidoc[] diff --git a/docs/aggregations/metric/boxplot/boxplot-aggregation-usage.asciidoc b/docs/aggregations/metric/boxplot/boxplot-aggregation-usage.asciidoc new file mode 100644 index 00000000000..5012fafa324 --- /dev/null +++ b/docs/aggregations/metric/boxplot/boxplot-aggregation-usage.asciidoc @@ -0,0 +1,88 @@ +:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master + +:github: https://github.com/elastic/elasticsearch-net + +:nuget: https://www.nuget.org/packages + +//// +IMPORTANT NOTE +============== +This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs. +If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file, +please modify the original csharp file found at the link and submit the PR with that change. Thanks! +//// + +[[boxplot-aggregation-usage]] +=== Boxplot Aggregation Usage + +A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. +These values can be generated by a provided script or extracted from specific numeric or histogram fields in the documents. + +boxplot aggregation returns essential information for making a box plot: minimum, maximum median, first quartile (25th percentile) +and third quartile (75th percentile) values. + +Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-boxplot-aggregation.html[Boxplot Aggregation] + +==== Fluent DSL example + +[source,csharp] +---- +a => a +.Boxplot("boxplot_commits", plot => plot + .Meta(m => m + .Add("foo", "bar") + ) + .Field(p => p.NumberOfCommits) + .Missing(10) + .Compression(100) +) +---- + +==== Object Initializer syntax example + +[source,csharp] +---- +new BoxplotAggregation("boxplot_commits", Field(p => p.NumberOfCommits)) +{ + Meta = new Dictionary + { + { "foo", "bar" } + }, + Missing = 10, + Compression = 100 +} +---- + +[source,javascript] +.Example json output +---- +{ + "boxplot_commits": { + "meta": { + "foo": "bar" + }, + "boxplot": { + "field": "numberOfCommits", + "missing": 10.0, + "compression": 100.0 + } + } +} +---- + +==== Handling Responses + +[source,csharp] +---- +response.ShouldBeValid(); +var boxplot = response.Aggregations.Boxplot("boxplot_commits"); +boxplot.Should().NotBeNull(); +boxplot.Min.Should().BeGreaterOrEqualTo(0); +boxplot.Max.Should().BeGreaterOrEqualTo(0); +boxplot.Q1.Should().BeGreaterOrEqualTo(0); +boxplot.Q2.Should().BeGreaterOrEqualTo(0); +boxplot.Q3.Should().BeGreaterOrEqualTo(0); +boxplot.Meta.Should().NotBeNull().And.HaveCount(1); +boxplot.Meta["foo"].Should().Be("bar"); +---- + diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index defa6e149c8..9ea077d2341 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -236,6 +236,8 @@ public CompositeBucketAggregate Composite(string key) public ValueAggregate MedianAbsoluteDeviation(string key) => TryGet(key); + public BoxplotAggregate Boxplot(string key) => TryGet(key); + private TAggregate TryGet(string key) where TAggregate : class, IAggregate => BackingDictionary.TryGetValue(key, out var agg) ? agg as TAggregate : null; diff --git a/src/Nest/Aggregations/AggregateFormatter.cs b/src/Nest/Aggregations/AggregateFormatter.cs index 406802df356..d73357e0d3a 100644 --- a/src/Nest/Aggregations/AggregateFormatter.cs +++ b/src/Nest/Aggregations/AggregateFormatter.cs @@ -51,6 +51,7 @@ internal class AggregateFormatter : IJsonFormatter { Parser.Hits, 8 }, { Parser.Location, 9 }, { Parser.Fields, 10 }, + { Parser.Min, 11 } }; private static readonly byte[] SumOtherDocCount = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.SumOtherDocCount); @@ -151,6 +152,9 @@ private IAggregate ReadAggregate(ref JsonReader reader, IJsonFormatterResolver f case 10: aggregate = GetMatrixStatsAggregate(ref reader, formatterResolver, meta); break; + case 11: + aggregate = GetBoxplotAggregate(ref reader, formatterResolver, meta); + break; } } else @@ -212,6 +216,32 @@ private IAggregate GetMatrixStatsAggregate(ref JsonReader reader, IJsonFormatter return matrixStats; } + private IAggregate GetBoxplotAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver, IReadOnlyDictionary meta) + { + var boxplot = new BoxplotAggregate + { + Min = reader.ReadDouble(), + Meta = meta + }; + reader.ReadNext(); // , + reader.ReadNext(); // "max" + reader.ReadNext(); // : + boxplot.Max = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "q1" + reader.ReadNext(); // : + boxplot.Q1 = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "q2" + reader.ReadNext(); // : + boxplot.Q2 = reader.ReadDouble(); + reader.ReadNext(); // , + reader.ReadNext(); // "q3" + reader.ReadNext(); // : + boxplot.Q3 = reader.ReadDouble(); + return boxplot; + } + private IAggregate GetTopHitsAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver, IReadOnlyDictionary meta) { var count = 0; @@ -982,6 +1012,7 @@ private static class Parser public const string Location = "location"; public const string MaxScore = "max_score"; public const string Meta = "meta"; + public const string Min = "min"; public const string MinLength = "min_length"; public const string Score = "score"; diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index e342a76c42a..1bf3ddb6f8b 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -92,6 +92,9 @@ public interface IAggregationContainer [DataMember(Name = "avg_bucket")] IAverageBucketAggregation AverageBucket { get; set; } + [DataMember(Name = "boxplot")] + IBoxplotAggregation Boxplot { get; set; } + [DataMember(Name = "bucket_script")] IBucketScriptAggregation BucketScript { get; set; } @@ -274,6 +277,9 @@ public class AggregationContainer : IAggregationContainer public IAverageBucketAggregation AverageBucket { get; set; } + /// + public IBoxplotAggregation Boxplot { get; set; } + public IBucketScriptAggregation BucketScript { get; set; } public IBucketSelectorAggregation BucketSelector { get; set; } @@ -422,6 +428,8 @@ public class AggregationContainerDescriptor : DescriptorBase, IStringStatsAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.StringStats = d); + /// + public AggregationContainerDescriptor Boxplot(string name, + Func, IBoxplotAggregation> selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.Boxplot = d); + /// /// Fluent methods do not assign to properties on `this` directly but on IAggregationContainers inside /// `this.Aggregations[string, IContainer] diff --git a/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregate.cs b/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregate.cs new file mode 100644 index 00000000000..9fb8e596f69 --- /dev/null +++ b/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregate.cs @@ -0,0 +1,14 @@ +namespace Nest { + public class BoxplotAggregate : MetricAggregateBase + { + public double Min { get; set; } + + public double Max { get; set; } + + public double Q1 { get; set; } + + public double Q2 { get; set; } + + public double Q3 { get; set; } + } +} diff --git a/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregation.cs b/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregation.cs new file mode 100644 index 00000000000..eb0b4faacfb --- /dev/null +++ b/src/Nest/Aggregations/Metric/Boxplot/BoxplotAggregation.cs @@ -0,0 +1,51 @@ +using System.Runtime.Serialization; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + /// + /// A metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. + /// These values can be generated by a provided script or extracted from specific numeric or histogram fields in the documents. + /// + /// Available in Elasticsearch 7.7.0+ with at least basic license level + /// + [InterfaceDataContract] + [ReadAs(typeof(BoxplotAggregation))] + public interface IBoxplotAggregation : IMetricAggregation + { + /// + /// Balances memory utilization with estimation accuracy. + /// Increasing compression, increases the accuracy of percentiles at the cost + /// of more memory. Larger compression values also make the algorithm slower since the underlying tree data structure grows in size, + /// resulting in more expensive operations. + /// + [DataMember(Name = "compression")] + double? Compression { get; set; } + } + + /// + public class BoxplotAggregation : MetricAggregationBase, IBoxplotAggregation + { + internal BoxplotAggregation() { } + + public BoxplotAggregation(string name, Field field) : base(name, field) { } + + internal override void WrapInContainer(AggregationContainer c) => c.Boxplot = this; + + /// + public double? Compression { get; set; } + } + + /// + public class BoxplotAggregationDescriptor + : MetricAggregationDescriptorBase, IBoxplotAggregation, T> + , IBoxplotAggregation + where T : class + { + double? IBoxplotAggregation.Compression { get; set; } + + /// + public BoxplotAggregationDescriptor Compression(double? compression) => + Assign(compression, (a, v) => a.Compression = v); + } +} diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index 3620ac20a94..25c0a3f0563 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -139,6 +139,8 @@ public interface IAggregationVisitor void Visit(IMovingFunctionAggregation aggregation); void Visit(IStringStatsAggregation aggregation); + + void Visit(IBoxplotAggregation aggregation); } public class AggregationVisitor : IAggregationVisitor @@ -263,6 +265,8 @@ public virtual void Visit(IMovingFunctionAggregation aggregation) { } public virtual void Visit(IStringStatsAggregation aggregation) { } + public virtual void Visit(IBoxplotAggregation aggregation) { } + public virtual void Visit(IAggregation aggregation) { } public virtual void Visit(IAggregationContainer aggregationContainer) { } diff --git a/src/Nest/Aggregations/Visitor/AggregationWalker.cs b/src/Nest/Aggregations/Visitor/AggregationWalker.cs index aea1e1a7862..fa4bc8e4381 100644 --- a/src/Nest/Aggregations/Visitor/AggregationWalker.cs +++ b/src/Nest/Aggregations/Visitor/AggregationWalker.cs @@ -38,6 +38,7 @@ public void Walk(IAggregationContainer aggregation, IAggregationVisitor visitor) visitor.Visit(aggregation); AcceptAggregation(aggregation.Average, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.AverageBucket, visitor, (v, d) => v.Visit(d)); + AcceptAggregation(aggregation.Boxplot, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.BucketScript, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.BucketSort, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.BucketSelector, visitor, (v, d) => v.Visit(d)); diff --git a/tests/Tests/Aggregations/AggregationUsageTestBase.cs b/tests/Tests/Aggregations/AggregationUsageTestBase.cs index 24959619b7a..4307a230107 100644 --- a/tests/Tests/Aggregations/AggregationUsageTestBase.cs +++ b/tests/Tests/Aggregations/AggregationUsageTestBase.cs @@ -1,5 +1,6 @@ using System; using System.Threading.Tasks; +using Elastic.Managed.Ephemeral; using Elastic.Xunit.XunitPlumbing; using Elasticsearch.Net; using Nest; @@ -13,10 +14,15 @@ namespace Tests.Aggregations { - public abstract class AggregationUsageTestBase - : ApiIntegrationTestBase, ISearchRequest, SearchDescriptor, SearchRequest> - { + public abstract class AggregationUsageTestBase : AggregationUsageTestBase { protected AggregationUsageTestBase(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + } + + public abstract class AggregationUsageTestBase + : ApiIntegrationTestBase, ISearchRequest, SearchDescriptor, SearchRequest> + where TCluster : IEphemeralCluster, INestTestCluster, new() + { + protected AggregationUsageTestBase(TCluster cluster, EndpointUsage usage) : base(cluster, usage) { } protected virtual Nest.Indices AgainstIndex { get; } = Index(); diff --git a/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs b/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs new file mode 100644 index 00000000000..a9b3d7c64d3 --- /dev/null +++ b/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs @@ -0,0 +1,80 @@ +using System; +using System.Collections.Generic; +using Elastic.Xunit.XunitPlumbing; +using FluentAssertions; +using Nest; +using Tests.Core.Extensions; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; +using static Nest.Infer; + +namespace Tests.Aggregations.Metric.Boxplot +{ + /** + * A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. + * These values can be generated by a provided script or extracted from specific numeric or histogram fields in the documents. + * + * boxplot aggregation returns essential information for making a box plot: minimum, maximum median, first quartile (25th percentile) + * and third quartile (75th percentile) values. + * + * Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-boxplot-aggregation.html[Boxplot Aggregation] + */ + [SkipVersion("<7.7.0", "introduced in 7.7.0")] + public class BoxplotAggregationUsageTests : AggregationUsageTestBase + { + public BoxplotAggregationUsageTests(XPackCluster i, EndpointUsage usage) : base(i, usage) { } + + protected override object AggregationJson => new + { + boxplot_commits = new + { + meta = new + { + foo = "bar" + }, + boxplot = new + { + field = "numberOfCommits", + missing = 10.0, + compression = 100.0 + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .Boxplot("boxplot_commits", plot => plot + .Meta(m => m + .Add("foo", "bar") + ) + .Field(p => p.NumberOfCommits) + .Missing(10) + .Compression(100) + ); + + protected override AggregationDictionary InitializerAggs => + new BoxplotAggregation("boxplot_commits", Field(p => p.NumberOfCommits)) + { + Meta = new Dictionary + { + { "foo", "bar" } + }, + Missing = 10, + Compression = 100 + }; + + protected override void ExpectResponse(ISearchResponse response) + { + response.ShouldBeValid(); + var boxplot = response.Aggregations.Boxplot("boxplot_commits"); + boxplot.Should().NotBeNull(); + boxplot.Min.Should().BeGreaterOrEqualTo(0); + boxplot.Max.Should().BeGreaterOrEqualTo(0); + boxplot.Q1.Should().BeGreaterOrEqualTo(0); + boxplot.Q2.Should().BeGreaterOrEqualTo(0); + boxplot.Q3.Should().BeGreaterOrEqualTo(0); + boxplot.Meta.Should().NotBeNull().And.HaveCount(1); + boxplot.Meta["foo"].Should().Be("bar"); + } + } +} From eb19f5eaa75b108c0f097ea63cf3b31191f05b93 Mon Sep 17 00:00:00 2001 From: Russ Cam Date: Fri, 17 Apr 2020 15:19:02 +1000 Subject: [PATCH 2/2] Run boxplot against ReadOnlyCluster which has basic license --- tests/Tests/Aggregations/AggregationUsageTestBase.cs | 11 +++-------- .../Metric/Boxplot/BoxplotAggregationUsageTests.cs | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/Tests/Aggregations/AggregationUsageTestBase.cs b/tests/Tests/Aggregations/AggregationUsageTestBase.cs index 4307a230107..87114fc0300 100644 --- a/tests/Tests/Aggregations/AggregationUsageTestBase.cs +++ b/tests/Tests/Aggregations/AggregationUsageTestBase.cs @@ -14,15 +14,10 @@ namespace Tests.Aggregations { - public abstract class AggregationUsageTestBase : AggregationUsageTestBase { - protected AggregationUsageTestBase(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } - } - - public abstract class AggregationUsageTestBase - : ApiIntegrationTestBase, ISearchRequest, SearchDescriptor, SearchRequest> - where TCluster : IEphemeralCluster, INestTestCluster, new() + public abstract class AggregationUsageTestBase + : ApiIntegrationTestBase, ISearchRequest, SearchDescriptor, SearchRequest> { - protected AggregationUsageTestBase(TCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + protected AggregationUsageTestBase(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } protected virtual Nest.Indices AgainstIndex { get; } = Index(); diff --git a/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs b/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs index a9b3d7c64d3..9a51e57a016 100644 --- a/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs +++ b/tests/Tests/Aggregations/Metric/Boxplot/BoxplotAggregationUsageTests.cs @@ -21,9 +21,9 @@ namespace Tests.Aggregations.Metric.Boxplot * Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-boxplot-aggregation.html[Boxplot Aggregation] */ [SkipVersion("<7.7.0", "introduced in 7.7.0")] - public class BoxplotAggregationUsageTests : AggregationUsageTestBase + public class BoxplotAggregationUsageTests : AggregationUsageTestBase { - public BoxplotAggregationUsageTests(XPackCluster i, EndpointUsage usage) : base(i, usage) { } + public BoxplotAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } protected override object AggregationJson => new {