From f9a9588ed764d336af22792ba945bed163e74e4a Mon Sep 17 00:00:00 2001 From: mandis Date: Thu, 24 Sep 2020 16:54:43 +0200 Subject: [PATCH 1/2] Added diversified sampler aggregation --- src/Nest/Aggregations/AggregateDictionary.cs | 2 + src/Nest/Aggregations/AggregationContainer.cs | 12 ++++ .../DiversifiedSamplerAggregation.cs | 69 +++++++++++++++++++ ...ersifiedSamplerAggregationExecutionHint.cs | 18 +++++ .../Visitor/AggregationVisitor.cs | 4 ++ .../DiversifiedSamplerAggregationPage.cs | 68 +++++++++++++++--- ...DiversifiedSamplerAggregationUsageTests.cs | 67 ++++++++++++++++++ 7 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregation.cs create mode 100644 src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationExecutionHint.cs create mode 100644 tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index 84f9fbc2024..e03af4a7b8e 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -134,6 +134,8 @@ public FiltersAggregate Filters(string key) public SingleBucketAggregate Sampler(string key) => TryGet(key); + public SingleBucketAggregate DiversifiedSampler(string key) => TryGet(key); + public GeoCentroidAggregate GeoCentroid(string key) => TryGet(key); public SignificantTermsAggregate SignificantTerms(string key) diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index 9c984e38359..3bcbd3e6333 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -135,6 +135,9 @@ public interface IAggregationContainer [DataMember(Name = "derivative")] IDerivativeAggregation Derivative { get; set; } + [DataMember(Name = "diversified_sampler")] + IDiversifiedSamplerAggregation DiversifiedSampler { get; set; } + [DataMember(Name = "extended_stats")] IExtendedStatsAggregation ExtendedStats { get; set; } @@ -323,6 +326,8 @@ public class AggregationContainer : IAggregationContainer public IDerivativeAggregation Derivative { get; set; } + public IDiversifiedSamplerAggregation DiversifiedSampler { get; set; } + public IExtendedStatsAggregation ExtendedStats { get; set; } public IExtendedStatsBucketAggregation ExtendedStatsBucket { get; set; } @@ -484,6 +489,8 @@ public class AggregationContainerDescriptor : DescriptorBase, ISamplerAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.Sampler = d); + public AggregationContainerDescriptor DiversifiedSampler(string name, + Func, IDiversifiedSamplerAggregation> selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.DiversifiedSampler = d); + public AggregationContainerDescriptor GeoCentroid(string name, Func, IGeoCentroidAggregation> selector ) => diff --git a/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregation.cs b/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregation.cs new file mode 100644 index 00000000000..15b426a9f14 --- /dev/null +++ b/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregation.cs @@ -0,0 +1,69 @@ +using System; +using System.Linq.Expressions; +using System.Runtime.Serialization; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + [InterfaceDataContract] + [ReadAs(typeof(DiversifiedSamplerAggregation))] + public interface IDiversifiedSamplerAggregation : IBucketAggregation + { + [DataMember(Name ="execution_hint")] + DiversifiedSamplerAggregationExecutionHint? ExecutionHint { get; set; } + + [DataMember(Name = "field")] + Field Field { get; set; } + + [DataMember(Name ="max_docs_per_value")] + int? MaxDocsPerValue { get; set; } + + [DataMember(Name ="script")] + IScript Script { get; set; } + + [DataMember(Name ="shard_size")] + int? ShardSize { get; set; } + } + + public class DiversifiedSamplerAggregation : BucketAggregationBase, IDiversifiedSamplerAggregation + { + internal DiversifiedSamplerAggregation() { } + + public DiversifiedSamplerAggregation(string name) : base(name) { } + + public DiversifiedSamplerAggregationExecutionHint? ExecutionHint { get; set; } + public Field Field { get; set; } + public int? MaxDocsPerValue { get; set; } + public IScript Script { get; set; } + public int? ShardSize { get; set; } + + internal override void WrapInContainer(AggregationContainer c) => c.DiversifiedSampler = this; + } + + public class DiversifiedSamplerAggregationDescriptor + : BucketAggregationDescriptorBase, IDiversifiedSamplerAggregation, T>, IDiversifiedSamplerAggregation + where T : class + { + DiversifiedSamplerAggregationExecutionHint? IDiversifiedSamplerAggregation.ExecutionHint { get; set; } + Field IDiversifiedSamplerAggregation.Field { get; set; } + int? IDiversifiedSamplerAggregation.MaxDocsPerValue { get; set; } + IScript IDiversifiedSamplerAggregation.Script { get; set; } + int? IDiversifiedSamplerAggregation.ShardSize { get; set; } + + public DiversifiedSamplerAggregationDescriptor ExecutionHint(DiversifiedSamplerAggregationExecutionHint? executionHint) => + Assign(executionHint, (a, v) => a.ExecutionHint = v); + + public DiversifiedSamplerAggregationDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); + + public DiversifiedSamplerAggregationDescriptor Field(Expression> field) => Assign(field, (a, v) => a.Field = v); + + public DiversifiedSamplerAggregationDescriptor MaxDocsPerValue(int? maxDocs) => Assign(maxDocs, (a, v) => a.MaxDocsPerValue = v); + + public DiversifiedSamplerAggregationDescriptor Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v); + + public DiversifiedSamplerAggregationDescriptor Script(Func scriptSelector) => + Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor())); + + public DiversifiedSamplerAggregationDescriptor ShardSize(int? shardSize) => Assign(shardSize, (a, v) => a.ShardSize = v); + } +} diff --git a/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationExecutionHint.cs b/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationExecutionHint.cs new file mode 100644 index 00000000000..a962cd94553 --- /dev/null +++ b/src/Nest/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationExecutionHint.cs @@ -0,0 +1,18 @@ +using System.Runtime.Serialization; +using Elasticsearch.Net; + +namespace Nest +{ + [StringEnum] + public enum DiversifiedSamplerAggregationExecutionHint + { + [EnumMember(Value = "map")] + Map, + + [EnumMember(Value = "global_ordinals")] + GlobalOrdinals, + + [EnumMember(Value = "bytes_hash")] + BytesHash + } +} diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index 91feb16704c..4d1c16da9be 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -130,6 +130,8 @@ public interface IAggregationVisitor void Visit(ISamplerAggregation aggregation); + void Visit(IDiversifiedSamplerAggregation aggregation); + void Visit(IGeoCentroidAggregation aggregation); void Visit(ICompositeAggregation aggregation); @@ -213,6 +215,8 @@ public virtual void Visit(IBucketScriptAggregation aggregation) { } public virtual void Visit(ISamplerAggregation aggregation) { } + public virtual void Visit(IDiversifiedSamplerAggregation aggregation) { } + public virtual void Visit(IBucketSelectorAggregation aggregation) { } public virtual void Visit(IBucketSortAggregation aggregation) { } diff --git a/tests/Examples/Aggregations/Bucket/DiversifiedSamplerAggregationPage.cs b/tests/Examples/Aggregations/Bucket/DiversifiedSamplerAggregationPage.cs index 53046b546e9..842e6927b22 100644 --- a/tests/Examples/Aggregations/Bucket/DiversifiedSamplerAggregationPage.cs +++ b/tests/Examples/Aggregations/Bucket/DiversifiedSamplerAggregationPage.cs @@ -10,15 +10,35 @@ namespace Examples.Aggregations.Bucket { public class DiversifiedSamplerAggregationPage : ExampleBase { - [U(Skip = "Example not implemented")] + [U] [Description("aggregations/bucket/diversified-sampler-aggregation.asciidoc:30")] public void Line30() { // tag::3344c3478f1e8bbbef683757638a34f4[] - var response0 = new SearchResponse(); + var searchResponse = client.Search(s => s + .Index("stackoverflow") + .Query(q => q + .QueryString(qs => qs + .Query("tags:elasticsearch") + ) + ) + .Aggregations(a => a + .DiversifiedSampler("my_unbiased_sample", s => s + .ShardSize(200) + .Field("author") + .Aggregations(agg => agg + .SignificantTerms("keywords", k => k + .Field("tags") + .Exclude(new string[] { "elasticsearch" }) + ) + ) + ) + ) + .Size(0) + ); // end::3344c3478f1e8bbbef683757638a34f4[] - response0.MatchesExample(@"POST /stackoverflow/_search?size=0 + searchResponse.MatchesExample(@"POST /stackoverflow/_search?size=0 { ""query"": { ""query_string"": { @@ -41,18 +61,46 @@ public void Line30() } } } - }"); + }", (e, b) => + { + e.Uri.Query = e.Uri.Query.Replace("size=0", string.Empty); + b["size"] = 0; + }); } - [U(Skip = "Example not implemented")] + [U] [Description("aggregations/bucket/diversified-sampler-aggregation.asciidoc:95")] public void Line95() { // tag::07afce825c09de17a3d73a02b17a0a97[] - var response0 = new SearchResponse(); + var searchResponse = client.Search(s => s + .Index("stackoverflow") + .Query(q => q + .QueryString(qs => qs + .Query("tags:kibana") + ) + ) + .Aggregations(a => a + .DiversifiedSampler("my_unbiased_sample", s => s + .ShardSize(200) + .MaxDocsPerValue(3) + .Script(sc => sc + .Source("doc['tags'].hashCode()") + .Lang("painless") + ) + .Aggregations(agg => agg + .SignificantTerms("keywords", k => k + .Field("tags") + .Exclude(new string[] { "kibana" }) + ) + ) + ) + ) + .Size(0) + ); // end::07afce825c09de17a3d73a02b17a0a97[] - response0.MatchesExample(@"POST /stackoverflow/_search?size=0 + searchResponse.MatchesExample(@"POST /stackoverflow/_search?size=0 { ""query"": { ""query_string"": { @@ -79,7 +127,11 @@ public void Line95() } } } - }"); + }", (e, b) => + { + e.Uri.Query = e.Uri.Query.Replace("size=0", string.Empty); + b["size"] = 0; + }); } } } diff --git a/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs b/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs new file mode 100644 index 00000000000..87947a35dfd --- /dev/null +++ b/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs @@ -0,0 +1,67 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using Nest; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; + +namespace Tests.Aggregations.Bucket.DiversifiedSampler +{ + public class DiversifiedSamplerAggregationUsageTests : AggregationUsageTestBase + { + public DiversifiedSamplerAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } + + protected override object AggregationJson => new + { + diversified_sample = new + { + diversified_sampler = new + { + execution_hint = "global_ordinals", + field = "type", + max_docs_per_value = 10, + shard_size = 200 + }, + aggs = new + { + significant_names = new + { + significant_terms = new + { + field = "name" + } + } + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .DiversifiedSampler("diversified_sample", sm => sm + .ExecutionHint(DiversifiedSamplerAggregationExecutionHint.GlobalOrdinals) + .Field(doc => doc.Type) + .MaxDocsPerValue(10) + .ShardSize(200) + .Aggregations(aa => aa + .SignificantTerms("significant_names", st => st + .Field(p => p.Name) + ) + ) + ); + + protected override AggregationDictionary InitializerAggs => + new DiversifiedSamplerAggregation("diversified_sample") + { + ExecutionHint = DiversifiedSamplerAggregationExecutionHint.GlobalOrdinals, + Field = new Field("type"), + MaxDocsPerValue = 10, + ShardSize = 200, + Aggregations = new SignificantTermsAggregation("significant_names") + { + Field = "name" + } + }; + } +} From 19a956e26e00da311cc16f0c4148a680fdebc92b Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 20 Oct 2020 10:14:54 +0200 Subject: [PATCH 2/2] add skip version for usage test when testing against a version below 7.9.0 --- .../DiversifiedSamplerAggregationUsageTests.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs b/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs index 87947a35dfd..43940b71f67 100644 --- a/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs +++ b/tests/Tests/Aggregations/Bucket/DiversifiedSampler/DiversifiedSamplerAggregationUsageTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information using System; +using Elastic.Elasticsearch.Xunit.XunitPlumbing; using Nest; using Tests.Core.ManagedElasticsearch.Clusters; using Tests.Domain; @@ -10,6 +11,7 @@ namespace Tests.Aggregations.Bucket.DiversifiedSampler { + [SkipVersion("<7.9.0", "introduced in 7.9.0")] public class DiversifiedSamplerAggregationUsageTests : AggregationUsageTestBase { public DiversifiedSamplerAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { }