-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds support for the string_stats aggregation introduced in Elasticsearch 7.6.0. It is a metric aggregation but does not implement IMetricAggregation because the type of Missing field is a string and not a double value. Missing is implemented as object as the hierarchy of metric aggregations will be changed as part of #4332, and Missing will be an object. The documentation for StringStats indicates that the distribution is returned in descending probability order, but are modelled as a JSON object. Following internal discussion, this is modelled as a dictionary on the response as it is considered this modelling will not diminish functionality. Closes #4369 (cherry picked from commit cc45fb1)
- Loading branch information
Showing
7 changed files
with
312 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
src/Nest/Aggregations/Metric/StringStats/StringStatsAggregate.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
using System.Collections.Generic; | ||
using Elasticsearch.Net; | ||
|
||
namespace Nest | ||
{ | ||
public class StringStatsAggregate : MetricAggregateBase | ||
{ | ||
/// <summary> | ||
/// The average length computed over all terms. | ||
/// </summary> | ||
public double AverageLength { get; set; } | ||
|
||
/// <summary> | ||
/// The number of non-empty fields counted. | ||
/// </summary> | ||
public long Count { get; set; } | ||
|
||
/// <summary> | ||
/// The length of the longest term. | ||
/// </summary> | ||
public int MaxLength { get; set; } | ||
|
||
/// <summary> | ||
/// The length of the shortest term. | ||
/// </summary> | ||
public int MinLength { get; set; } | ||
|
||
/// <summary> | ||
/// The Shannon Entropy value computed over all terms collected by the aggregation. | ||
/// Shannon entropy quantifies the amount of information contained in the field. | ||
/// It is a very useful metric for measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc. | ||
/// </summary> | ||
public double Entropy { get; set; } | ||
|
||
/// <summary> | ||
/// The probability of each character appearing in all terms. | ||
/// </summary> | ||
public IReadOnlyDictionary<string, double> Distribution { get; set; } = EmptyReadOnly<string, double>.Dictionary; | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
src/Nest/Aggregations/Metric/StringStats/StringStatsAggregation.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq.Expressions; | ||
using System.Runtime.Serialization; | ||
using Elasticsearch.Net.Utf8Json; | ||
|
||
namespace Nest | ||
{ | ||
/// <summary> | ||
/// A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents. | ||
/// These values can be retrieved either from specific keyword fields in the documents or can be generated by a provided script. | ||
/// <para /> | ||
/// Available in Elasticsearch 7.6.0+ with at least basic license level | ||
/// </summary> | ||
[InterfaceDataContract] | ||
[ReadAs(typeof(StringStatsAggregation))] | ||
public interface IStringStatsAggregation : IAggregation | ||
{ | ||
/// <summary> | ||
/// The field to perform the aggregation on | ||
/// </summary> | ||
[DataMember(Name = "field")] | ||
Field Field { get; set; } | ||
|
||
/// <summary> | ||
/// A value to use for documents missing a value for the field | ||
/// </summary> | ||
[DataMember(Name = "missing")] | ||
object Missing { get; set; } | ||
|
||
/// <summary> | ||
/// Compute the string stats based on a script | ||
/// </summary> | ||
[DataMember(Name = "script")] | ||
IScript Script { get; set; } | ||
|
||
/// <summary> | ||
/// Include the probability distribution for all characters in the response. | ||
/// </summary> | ||
[DataMember(Name = "show_distribution")] | ||
bool? ShowDistribution { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation"/> | ||
public class StringStatsAggregation : AggregationBase, IStringStatsAggregation | ||
{ | ||
internal StringStatsAggregation() { } | ||
|
||
public StringStatsAggregation(string name, Field field) : base(name) => Field = field; | ||
|
||
internal override void WrapInContainer(AggregationContainer c) => c.StringStats = this; | ||
|
||
/// <inheritdoc /> | ||
public Field Field { get; set; } | ||
|
||
/// <inheritdoc /> | ||
public object Missing { get; set; } | ||
|
||
/// <inheritdoc /> | ||
public IScript Script { get; set; } | ||
|
||
/// <inheritdoc /> | ||
public bool? ShowDistribution { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation"/> | ||
public class StringStatsAggregationDescriptor<T> | ||
: DescriptorBase<StringStatsAggregationDescriptor<T>, IStringStatsAggregation>, IStringStatsAggregation | ||
where T : class | ||
{ | ||
Field IStringStatsAggregation.Field { get; set; } | ||
IDictionary<string, object> IAggregation.Meta { get; set; } | ||
object IStringStatsAggregation.Missing { get; set; } | ||
string IAggregation.Name { get; set; } | ||
|
||
IScript IStringStatsAggregation.Script { get; set; } | ||
|
||
bool? IStringStatsAggregation.ShowDistribution { get; set; } | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.Field"/> | ||
public StringStatsAggregationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.Field"/> | ||
public StringStatsAggregationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.Script"/> | ||
public StringStatsAggregationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v); | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.Script"/> | ||
public StringStatsAggregationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) => | ||
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor())); | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.Missing"/> | ||
public StringStatsAggregationDescriptor<T> Missing(object missing) => Assign(missing, (a, v) => a.Missing = v); | ||
|
||
/// <inheritdoc cref="IAggregation.Meta"/> | ||
public StringStatsAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) => | ||
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>())); | ||
|
||
/// <inheritdoc cref="IStringStatsAggregation.ShowDistribution"/> | ||
public StringStatsAggregationDescriptor<T> ShowDistribution(bool? showDistribution = true) => | ||
Assign(showDistribution, (a, v) => a.ShowDistribution = v); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.