Skip to content

Commit

Permalink
Aggs - support for arrays of numeric values in include/exclude clauses
Browse files Browse the repository at this point in the history
Closes #7714
  • Loading branch information
markharwood committed Sep 25, 2014
1 parent daac73f commit 4f69a98
Show file tree
Hide file tree
Showing 11 changed files with 368 additions and 27 deletions.
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
Expand All @@ -40,9 +41,9 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {

public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format,
long estimatedBucketCount, BucketCountThresholds bucketCountThresholds,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, IncludeExclude.LongFilter includeExclude) {

super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false);
super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, includeExclude);
this.termsAggFactory = termsAggFactory;
}

Expand Down
Expand Up @@ -194,17 +194,22 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount
return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this);
}

if (includeExclude != null) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
"settings as it can only be applied to string values");

if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " +
"settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}

if (valuesSource instanceof ValuesSource.Numeric) {

if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics");
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this);
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this, longFilter);
}

throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() +
Expand Down
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
Expand All @@ -45,6 +46,8 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
private int includeFlags;
private String excludePattern;
private int excludeFlags;
private String[] includeTerms = null;
private String[] excludeTerms = null;
private FilterBuilder filterBuilder;
private SignificanceHeuristicBuilder significanceHeuristicBuilder;

Expand Down Expand Up @@ -129,10 +132,44 @@ public SignificantTermsBuilder include(String regex) {
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder include(String regex, int flags) {
if (includeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.includePattern = regex;
this.includeFlags = flags;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public SignificantTermsBuilder include(String [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = terms;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public SignificantTermsBuilder include(long [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = longsArrToStringArr(terms);
return this;
}

private String[] longsArrToStringArr(long[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Long.toString(terms[i]);
}
return termsAsString;
}


/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
Expand All @@ -151,10 +188,36 @@ public SignificantTermsBuilder exclude(String regex) {
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder exclude(String regex, int flags) {
if (excludeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.excludePattern = regex;
this.excludeFlags = flags;
return this;
}

/**
* Define a set of terms that should not be aggregated.
*/
public SignificantTermsBuilder exclude(String [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.excludeTerms = terms;
return this;
}


/**
* Define a set of terms that should not be aggregated.
*/
public SignificantTermsBuilder exclude(long [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of longs or a regex, not both");
}
this.excludeTerms = longsArrToStringArr(terms);
return this;
}

@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
Expand All @@ -176,6 +239,10 @@ protected XContentBuilder internalXContent(XContentBuilder builder, Params param
.endObject();
}
}
if (includeTerms != null) {
builder.array("include", includeTerms);
}

if (excludePattern != null) {
if (excludeFlags == 0) {
builder.field("exclude", excludePattern);
Expand All @@ -186,6 +253,9 @@ protected XContentBuilder internalXContent(XContentBuilder builder, Params param
.endObject();
}
}
if (excludeTerms != null) {
builder.array("exclude", excludeTerms);
}

if (filterBuilder != null) {
builder.field(SignificantTermsParametersParser.BACKGROUND_FILTER.getPreferredName());
Expand Down
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
Expand All @@ -37,8 +38,8 @@
public class DoubleTermsAggregator extends LongTermsAggregator {

public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError) {
super(name, factories, valuesSource, format, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, collectionMode, showTermDocCountError);
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, IncludeExclude.LongFilter longFilter) {
super(name, factories, valuesSource, format, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, collectionMode, showTermDocCountError, longFilter);
}

@Override
Expand Down
Expand Up @@ -27,6 +27,8 @@
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.LongFilter;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
Expand All @@ -46,13 +48,15 @@ public class LongTermsAggregator extends TermsAggregator {
protected final LongHash bucketOrds;
private boolean showTermDocCountError;
private SortedNumericDocValues values;
private LongFilter longFilter;

public LongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError) {
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, IncludeExclude.LongFilter longFilter) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds, order, subAggCollectMode);
this.valuesSource = valuesSource;
this.showTermDocCountError = showTermDocCountError;
this.formatter = format != null ? format.formatter() : null;
this.longFilter = longFilter;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
}

Expand Down Expand Up @@ -82,13 +86,16 @@ public void collect(int doc, long owningBucketOrdinal) throws IOException {
for (int i = 0; i < valuesCount; ++i) {
final long val = values.valueAt(i);
if (previous != val || i == 0) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
collectExistingBucket(doc, bucketOrdinal);
} else {
collectBucket(doc, bucketOrdinal);
if ((longFilter == null) || (longFilter.accept(val))) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
collectExistingBucket(doc, bucketOrdinal);
} else {
collectBucket(doc, bucketOrdinal);
}
}

previous = val;
}
}
Expand Down
Expand Up @@ -231,16 +231,26 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount
return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
}

if (includeExclude != null) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
"settings as it can only be applied to string values");
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " +
"settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}

if (valuesSource instanceof ValuesSource.Numeric) {
IncludeExclude.LongFilter longFilter = null;
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
if (includeExclude != null) {
longFilter = includeExclude.convertToDoubleFilter();
}
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode,
showTermDocCountError, longFilter);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount,
order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError, longFilter);
}

throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() +
Expand Down
Expand Up @@ -117,12 +117,51 @@ public TermsBuilder include(String regex, int flags) {
*/
public TermsBuilder include(String [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = terms;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public TermsBuilder include(long [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = longsArrToStringArr(terms);
return this;
}

private String[] longsArrToStringArr(long[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Long.toString(terms[i]);
}
return termsAsString;
}


/**
* Define a set of terms that should be aggregated.
*/
public TermsBuilder include(double [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = doubleArrToStringArr(terms);
return this;
}

private String[] doubleArrToStringArr(double[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Double.toString(terms[i]);
}
return termsAsString;
}

/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
Expand All @@ -141,7 +180,7 @@ public TermsBuilder exclude(String regex) {
*/
public TermsBuilder exclude(String regex, int flags) {
if (excludeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludePattern = regex;
this.excludeFlags = flags;
Expand All @@ -153,12 +192,36 @@ public TermsBuilder exclude(String regex, int flags) {
*/
public TermsBuilder exclude(String [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = terms;
return this;
}


/**
* Define a set of terms that should not be aggregated.
*/
public TermsBuilder exclude(long [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = longsArrToStringArr(terms);
return this;
}

/**
* Define a set of terms that should not be aggregated.
*/
public TermsBuilder exclude(double [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = doubleArrToStringArr(terms);
return this;
}



/**
* When using scripts, the value type indicates the types of the values the script is generating.
Expand Down

0 comments on commit 4f69a98

Please sign in to comment.