Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add StringStats aggregation #4370

Merged
merged 1 commit into from
Feb 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Nest/Aggregations/AggregateDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ public ScriptedMetricAggregate ScriptedMetric(string key)

public StatsAggregate Stats(string key) => TryGet<StatsAggregate>(key);

public StringStatsAggregate StringStats(string key) => TryGet<StringStatsAggregate>(key);

public StatsAggregate StatsBucket(string key) => TryGet<StatsAggregate>(key);

public ExtendedStatsAggregate ExtendedStats(string key) => TryGet<ExtendedStatsAggregate>(key);
Expand Down
61 changes: 57 additions & 4 deletions src/Nest/Aggregations/AggregateFormatter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ internal class AggregateFormatter : IJsonFormatter<IAggregate>

private static readonly byte[] KeysField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.Keys);
private static readonly byte[] MetaField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.Meta);
private static readonly byte[] MinLengthField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.MinLength);

private static readonly AutomataDictionary RootFields = new AutomataDictionary
{
Expand Down Expand Up @@ -133,7 +134,7 @@ private IAggregate ReadAggregate(ref JsonReader reader, IJsonFormatterResolver f
aggregate = GetMultiBucketAggregate(ref reader, formatterResolver, ref propertyName, meta);
break;
case 5:
aggregate = GetStatsAggregate(ref reader, meta);
aggregate = GetStatsAggregate(ref reader, formatterResolver, meta);
break;
case 6:
aggregate = GetSingleBucketAggregate(ref reader, formatterResolver, meta);
Expand Down Expand Up @@ -397,16 +398,67 @@ private IAggregate GetSingleBucketAggregate(ref JsonReader reader, IJsonFormatte
return new SingleBucketAggregate(subAggregates) { DocCount = docCount, Meta = meta };
}

private IAggregate GetStatsAggregate(ref JsonReader reader, IReadOnlyDictionary<string, object> meta)
private IAggregate GetStringStatsAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver,
IReadOnlyDictionary<string, object> meta, long count
)
{
// string stats aggregation
var minLength = reader.ReadInt32();
reader.ReadNext(); // ,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comments 👍

reader.ReadNext(); // "max_length"
reader.ReadNext(); // :
var maxLength = reader.ReadInt32();
reader.ReadNext(); // ,
reader.ReadNext(); // "avg_length"
reader.ReadNext(); // :
var avgLength = reader.ReadDouble();
reader.ReadNext(); // ,
reader.ReadNext(); // "entropy"
reader.ReadNext(); // :
var entropy = reader.ReadDouble();

var aggregate = new StringStatsAggregate
{
Meta = meta,
Count = count,
MinLength = minLength,
MaxLength = maxLength,
AverageLength = avgLength,
Entropy = entropy
};

if (reader.ReadIsValueSeparator())
{
reader.ReadNext(); // "distribution"
reader.ReadNext(); // :
var distribution = formatterResolver
.GetFormatter<IReadOnlyDictionary<string, double>>()
.Deserialize(ref reader, formatterResolver);

// only set distribution if present, leaving empty dictionary when absent
aggregate.Distribution = distribution;
}

return aggregate;
}

private IAggregate GetStatsAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver, IReadOnlyDictionary<string, object> meta
)
{
var count = reader.ReadNullableLong().GetValueOrDefault(0);

if (reader.GetCurrentJsonToken() == JsonToken.EndObject)
return new GeoCentroidAggregate { Count = count, Meta = meta };

reader.ReadNext(); // ,
reader.ReadNext(); // "min"
reader.ReadNext(); // :

var property = reader.ReadPropertyNameSegmentRaw();

// string stats aggregation
if (property.EqualsBytes(MinLengthField))
return GetStringStatsAggregate(ref reader, formatterResolver, meta, count);

// stats or extended stats aggregation
var min = reader.ReadNullableDouble();
reader.ReadNext(); // ,
reader.ReadNext(); // "max"
Expand Down Expand Up @@ -930,6 +982,7 @@ private static class Parser
public const string Location = "location";
public const string MaxScore = "max_score";
public const string Meta = "meta";
public const string MinLength = "min_length";

public const string Score = "score";

Expand Down
13 changes: 13 additions & 0 deletions src/Nest/Aggregations/AggregationContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ public interface IAggregationContainer
[DataMember(Name = "median_absolute_deviation")]
IMedianAbsoluteDeviationAggregation MedianAbsoluteDeviation { get; set; }

[DataMember(Name = "string_stats")]
IStringStatsAggregation StringStats { get; set; }

void Accept(IAggregationVisitor visitor);
}

Expand Down Expand Up @@ -377,6 +380,8 @@ public class AggregationContainer : IAggregationContainer

public IMedianAbsoluteDeviationAggregation MedianAbsoluteDeviation { get; set; }

public IStringStatsAggregation StringStats { get; set; }

public void Accept(IAggregationVisitor visitor)
{
if (visitor.Scope == AggregationVisitorScope.Unknown) visitor.Scope = AggregationVisitorScope.Aggregation;
Expand Down Expand Up @@ -526,6 +531,8 @@ public class AggregationContainerDescriptor<T> : DescriptorBase<AggregationConta

IMedianAbsoluteDeviationAggregation IAggregationContainer.MedianAbsoluteDeviation { get; set; }

IStringStatsAggregation IAggregationContainer.StringStats { get; set; }

public void Accept(IAggregationVisitor visitor)
{
if (visitor.Scope == AggregationVisitorScope.Unknown) visitor.Scope = AggregationVisitorScope.Aggregation;
Expand Down Expand Up @@ -818,6 +825,12 @@ public void Accept(IAggregationVisitor visitor)
) =>
_SetInnerAggregation(name, selector, (a, d) => a.MedianAbsoluteDeviation = d);

/// <inheritdoc cref="IStringStatsAggregation"/>
public AggregationContainerDescriptor<T> StringStats(string name,
Func<StringStatsAggregationDescriptor<T>, IStringStatsAggregation> selector
) =>
_SetInnerAggregation(name, selector, (a, d) => a.StringStats = d);

/// <summary>
/// Fluent methods do not assign to properties on `this` directly but on IAggregationContainers inside
/// `this.Aggregations[string, IContainer]
Expand Down
40 changes: 40 additions & 0 deletions src/Nest/Aggregations/Metric/StringStats/StringStatsAggregate.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using System.Collections.Generic;
using Elasticsearch.Net;

namespace Nest
{
public class StringStatsAggregate : MetricAggregateBase
{
/// <summary>
/// The average length computed over all terms.
/// </summary>
public double AverageLength { get; set; }

/// <summary>
/// The number of non-empty fields counted.
/// </summary>
public long Count { get; set; }

/// <summary>
/// The length of the longest term.
/// </summary>
public int MaxLength { get; set; }

/// <summary>
/// The length of the shortest term.
/// </summary>
public int MinLength { get; set; }

/// <summary>
/// The Shannon Entropy value computed over all terms collected by the aggregation.
/// Shannon entropy quantifies the amount of information contained in the field.
/// It is a very useful metric for measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc.
/// </summary>
public double Entropy { get; set; }

/// <summary>
/// The probability of each character appearing in all terms.
/// </summary>
public IReadOnlyDictionary<string, double> Distribution { get; set; } = EmptyReadOnly<string, double>.Dictionary;
}
}
104 changes: 104 additions & 0 deletions src/Nest/Aggregations/Metric/StringStats/StringStatsAggregation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;
using System.Collections.Generic;
using System.Linq.Expressions;
using System.Runtime.Serialization;
using Elasticsearch.Net.Utf8Json;

namespace Nest
{
/// <summary>
/// A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
/// These values can be retrieved either from specific keyword fields in the documents or can be generated by a provided script.
/// <para />
/// Available in Elasticsearch 7.6.0+ with at least basic license level
/// </summary>
[InterfaceDataContract]
[ReadAs(typeof(StringStatsAggregation))]
public interface IStringStatsAggregation : IAggregation
{
/// <summary>
/// The field to perform the aggregation on
/// </summary>
[DataMember(Name = "field")]
Field Field { get; set; }

/// <summary>
/// A value to use for documents missing a value for the field
/// </summary>
[DataMember(Name = "missing")]
object Missing { get; set; }

/// <summary>
/// Compute the string stats based on a script
/// </summary>
[DataMember(Name = "script")]
IScript Script { get; set; }

/// <summary>
/// Include the probability distribution for all characters in the response.
/// </summary>
[DataMember(Name = "show_distribution")]
bool? ShowDistribution { get; set; }
}

/// <inheritdoc cref="IStringStatsAggregation"/>
public class StringStatsAggregation : AggregationBase, IStringStatsAggregation
{
internal StringStatsAggregation() { }

public StringStatsAggregation(string name, Field field) : base(name) => Field = field;

internal override void WrapInContainer(AggregationContainer c) => c.StringStats = this;

/// <inheritdoc />
public Field Field { get; set; }

/// <inheritdoc />
public object Missing { get; set; }

/// <inheritdoc />
public IScript Script { get; set; }

/// <inheritdoc />
public bool? ShowDistribution { get; set; }
}

/// <inheritdoc cref="IStringStatsAggregation"/>
public class StringStatsAggregationDescriptor<T>
: DescriptorBase<StringStatsAggregationDescriptor<T>, IStringStatsAggregation>, IStringStatsAggregation
where T : class
{
Field IStringStatsAggregation.Field { get; set; }
IDictionary<string, object> IAggregation.Meta { get; set; }
object IStringStatsAggregation.Missing { get; set; }
string IAggregation.Name { get; set; }

IScript IStringStatsAggregation.Script { get; set; }

bool? IStringStatsAggregation.ShowDistribution { get; set; }

/// <inheritdoc cref="IStringStatsAggregation.Field"/>
public StringStatsAggregationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="IStringStatsAggregation.Field"/>
public StringStatsAggregationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="IStringStatsAggregation.Script"/>
public StringStatsAggregationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v);

/// <inheritdoc cref="IStringStatsAggregation.Script"/>
public StringStatsAggregationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) =>
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor()));

/// <inheritdoc cref="IStringStatsAggregation.Missing"/>
public StringStatsAggregationDescriptor<T> Missing(object missing) => Assign(missing, (a, v) => a.Missing = v);

/// <inheritdoc cref="IAggregation.Meta"/>
public StringStatsAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) =>
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>()));

/// <inheritdoc cref="IStringStatsAggregation.ShowDistribution"/>
public StringStatsAggregationDescriptor<T> ShowDistribution(bool? showDistribution = true) =>
Assign(showDistribution, (a, v) => a.ShowDistribution = v);
}
}
4 changes: 4 additions & 0 deletions src/Nest/Aggregations/Visitor/AggregationVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ public interface IAggregationVisitor
void Visit(ICompositeAggregation aggregation);

void Visit(IMedianAbsoluteDeviationAggregation aggregation);

void Visit(IStringStatsAggregation aggregation);
}

public class AggregationVisitor : IAggregationVisitor
Expand Down Expand Up @@ -239,6 +241,8 @@ public class AggregationVisitor : IAggregationVisitor

public virtual void Visit(IMedianAbsoluteDeviationAggregation aggregation) { }

public virtual void Visit(IStringStatsAggregation aggregation) { }

public virtual void Visit(IAggregation aggregation) { }

public virtual void Visit(IAggregationContainer aggregationContainer) { }
Expand Down
Loading