Skip to content

Commit

Permalink
Aggregations fix: support include/exclude strings formatted for IP an…
Browse files Browse the repository at this point in the history
…d date fields in terms and significant_terms aggregations.

Closes #17705
  • Loading branch information
markharwood committed May 18, 2016
1 parent db4809d commit a846ff9
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,14 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
}
}
assert execution != null;
return execution.create(name, factories, valuesSource, config.format(), bucketCountThresholds, includeExclude, context, parent,

DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+ "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}

return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent,
significanceHeuristic, this, pipelineAggregators, metaData);
}

Expand All @@ -227,7 +234,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
}
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators,
Expand All @@ -248,7 +255,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
}
Expand All @@ -262,7 +269,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
Expand All @@ -277,7 +284,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,13 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
}
}
}
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+ "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}

return execution.create(name, factories, valuesSource, order, config.format(), bucketCountThresholds, includeExclude, context, parent,
return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent,
collectMode, showTermDocCountError, pipelineAggregators, metaData);
}

Expand All @@ -171,7 +176,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
pipelineAggregators, metaData);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators,
Expand All @@ -192,7 +197,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
}
Expand All @@ -211,7 +216,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);
Expand All @@ -231,7 +236,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;

Expand Down Expand Up @@ -135,7 +136,8 @@ public boolean accept(BytesRef value) {
}

public static abstract class OrdinalsFilter {
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException;
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
throws IOException;

}

Expand All @@ -152,7 +154,8 @@ private AutomatonBackedOrdinalsFilter(Automaton automaton) {
*
*/
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals);
Expand All @@ -179,7 +182,7 @@ public TermListBackedOrdinalsFilter(SortedSet<BytesRef> includeValues, SortedSet
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, WithOrdinals valueSource) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if(includeValues!=null){
if (includeValues != null) {
for (BytesRef term : includeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
Expand Down Expand Up @@ -534,33 +537,46 @@ private Automaton toAutomaton() {
return a;
}

public StringFilter convertToStringFilter() {
public StringFilter convertToStringFilter(DocValueFormat format) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton());
}
return new TermListBackedStringFilter(includeValues, excludeValues);
return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}

public OrdinalsFilter convertToOrdinalsFilter() {
private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUserFormattedValues, DocValueFormat format) {
SortedSet<BytesRef> result = endUserFormattedValues;
if (endUserFormattedValues != null) {
if (format != DocValueFormat.RAW) {
result = new TreeSet<>();
for (BytesRef formattedVal : endUserFormattedValues) {
result.add(format.parseBytesRef(formattedVal.utf8ToString()));
}
}
}
return result;
}

public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {

if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton());
}
return new TermListBackedOrdinalsFilter(includeValues, excludeValues);
return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}

public LongFilter convertToLongFilter() {
public LongFilter convertToLongFilter(DocValueFormat format) {
int numValids = includeValues == null ? 0 : includeValues.size();
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
result.addAccept(Long.parseLong(val.utf8ToString()));
result.addAccept(format.parseLong(val.utf8ToString(), false, null));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
result.addReject(Long.parseLong(val.utf8ToString()));
result.addReject(format.parseLong(val.utf8ToString(), false, null));
}
}
return result;
Expand All @@ -572,13 +588,13 @@ public LongFilter convertToDoubleFilter() {
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
double dval=Double.parseDouble(val.utf8ToString());
double dval = Double.parseDouble(val.utf8ToString());
result.addAccept(NumericUtils.doubleToSortableLong(dval));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
double dval=Double.parseDouble(val.utf8ToString());
double dval = Double.parseDouble(val.utf8ToString());
result.addReject(NumericUtils.doubleToSortableLong(dval));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,33 @@ setup:

- match: { aggregations.ip_terms.buckets.1.doc_count: 1 }

- do:
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "include" : [ "127.0.0.1" ] } } } }

- match: { hits.total: 3 }

- length: { aggregations.ip_terms.buckets: 1 }

- match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" }

- do:
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : [ "127.0.0.1" ] } } } }

- match: { hits.total: 3 }

- length: { aggregations.ip_terms.buckets: 1 }

- match: { aggregations.ip_terms.buckets.0.key: "::1" }

- do:
catch: request
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : "127.*" } } } }



---
"Boolean test":
- do:
Expand Down Expand Up @@ -300,4 +327,27 @@ setup:
- match: { aggregations.date_terms.buckets.1.key_as_string: "2014-09-01T00:00:00.000Z" }

- match: { aggregations.date_terms.buckets.1.doc_count: 1 }

- do:
search:
body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "include" : [ "2016-05-03" ] } } } }

- match: { hits.total: 3 }

- length: { aggregations.date_terms.buckets: 1 }

- match: { aggregations.date_terms.buckets.0.key_as_string: "2016-05-03T00:00:00.000Z" }

- match: { aggregations.date_terms.buckets.0.doc_count: 2 }

- do:
search:
body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "exclude" : [ "2016-05-03" ] } } } }

- match: { hits.total: 3 }

- length: { aggregations.date_terms.buckets: 1 }

- match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" }

- match: { aggregations.date_terms.buckets.0.doc_count: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,28 @@
- is_false: aggregations.ip_terms.buckets.0.key_as_string

- match: { aggregations.ip_terms.buckets.0.doc_count: 1 }

- do:
search:
body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } }

- match: { hits.total: 1 }

- length: { aggregations.ip_terms.buckets: 1 }

- match: { aggregations.ip_terms.buckets.0.key: "::1" }

- do:
search:
body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } }

- match: { hits.total: 1 }

- length: { aggregations.ip_terms.buckets: 0 }

- do:
catch: request
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } }


0 comments on commit a846ff9

Please sign in to comment.