Skip to content

Commit

Permalink
Aggs - support for arrays of numeric values in include/exclude clauses
Browse files Browse the repository at this point in the history
Closes #7714
  • Loading branch information
markharwood committed Sep 25, 2014
1 parent a90d7b1 commit e97b8fd
Show file tree
Hide file tree
Showing 11 changed files with 368 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
Expand All @@ -40,9 +41,9 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {

public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format,
long estimatedBucketCount, BucketCountThresholds bucketCountThresholds,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, IncludeExclude.LongFilter includeExclude) {

super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false);
super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, includeExclude);
this.termsAggFactory = termsAggFactory;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,17 +194,22 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount
return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this);
}

if (includeExclude != null) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
"settings as it can only be applied to string values");

if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " +
"settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}

if (valuesSource instanceof ValuesSource.Numeric) {

if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics");
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this);
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this, longFilter);
}

throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
Expand All @@ -45,6 +46,8 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
private int includeFlags;
private String excludePattern;
private int excludeFlags;
private String[] includeTerms = null;
private String[] excludeTerms = null;
private FilterBuilder filterBuilder;
private SignificanceHeuristicBuilder significanceHeuristicBuilder;

Expand Down Expand Up @@ -129,10 +132,44 @@ public SignificantTermsBuilder include(String regex) {
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder include(String regex, int flags) {
if (includeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.includePattern = regex;
this.includeFlags = flags;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public SignificantTermsBuilder include(String [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = terms;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public SignificantTermsBuilder include(long [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = longsArrToStringArr(terms);
return this;
}

private String[] longsArrToStringArr(long[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Long.toString(terms[i]);
}
return termsAsString;
}


/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
Expand All @@ -151,10 +188,36 @@ public SignificantTermsBuilder exclude(String regex) {
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder exclude(String regex, int flags) {
if (excludeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.excludePattern = regex;
this.excludeFlags = flags;
return this;
}

/**
* Define a set of terms that should not be aggregated.
*/
public SignificantTermsBuilder exclude(String [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
}
this.excludeTerms = terms;
return this;
}


/**
* Define a set of terms that should not be aggregated.
*/
public SignificantTermsBuilder exclude(long [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of longs or a regex, not both");
}
this.excludeTerms = longsArrToStringArr(terms);
return this;
}

@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
Expand All @@ -176,6 +239,10 @@ protected XContentBuilder internalXContent(XContentBuilder builder, Params param
.endObject();
}
}
if (includeTerms != null) {
builder.array("include", includeTerms);
}

if (excludePattern != null) {
if (excludeFlags == 0) {
builder.field("exclude", excludePattern);
Expand All @@ -186,6 +253,9 @@ protected XContentBuilder internalXContent(XContentBuilder builder, Params param
.endObject();
}
}
if (excludeTerms != null) {
builder.array("exclude", excludeTerms);
}

if (filterBuilder != null) {
builder.field(SignificantTermsParametersParser.BACKGROUND_FILTER.getPreferredName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
Expand All @@ -37,8 +38,8 @@
public class DoubleTermsAggregator extends LongTermsAggregator {

public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError) {
super(name, factories, valuesSource, format, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, collectionMode, showTermDocCountError);
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, IncludeExclude.LongFilter longFilter) {
super(name, factories, valuesSource, format, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, collectionMode, showTermDocCountError, longFilter);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.LongFilter;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
Expand All @@ -46,13 +48,15 @@ public class LongTermsAggregator extends TermsAggregator {
protected final LongHash bucketOrds;
private boolean showTermDocCountError;
private SortedNumericDocValues values;
private LongFilter longFilter;

public LongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError) {
Terms.Order order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, IncludeExclude.LongFilter longFilter) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds, order, subAggCollectMode);
this.valuesSource = valuesSource;
this.showTermDocCountError = showTermDocCountError;
this.formatter = format != null ? format.formatter() : null;
this.longFilter = longFilter;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
}

Expand Down Expand Up @@ -82,13 +86,16 @@ public void collect(int doc, long owningBucketOrdinal) throws IOException {
for (int i = 0; i < valuesCount; ++i) {
final long val = values.valueAt(i);
if (previous != val || i == 0) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
collectExistingBucket(doc, bucketOrdinal);
} else {
collectBucket(doc, bucketOrdinal);
if ((longFilter == null) || (longFilter.accept(val))) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
collectExistingBucket(doc, bucketOrdinal);
} else {
collectBucket(doc, bucketOrdinal);
}
}

previous = val;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,26 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount
return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
}

if (includeExclude != null) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
"settings as it can only be applied to string values");
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " +
"settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}

if (valuesSource instanceof ValuesSource.Numeric) {
IncludeExclude.LongFilter longFilter = null;
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
if (includeExclude != null) {
longFilter = includeExclude.convertToDoubleFilter();
}
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode,
showTermDocCountError, longFilter);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError);
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount,
order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError, longFilter);
}

throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,51 @@ public TermsBuilder include(String regex, int flags) {
*/
public TermsBuilder include(String [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = terms;
return this;
}

/**
* Define a set of terms that should be aggregated.
*/
public TermsBuilder include(long [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = longsArrToStringArr(terms);
return this;
}

private String[] longsArrToStringArr(long[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Long.toString(terms[i]);
}
return termsAsString;
}


/**
* Define a set of terms that should be aggregated.
*/
public TermsBuilder include(double [] terms) {
if (includePattern != null) {
throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both");
}
this.includeTerms = doubleArrToStringArr(terms);
return this;
}

private String[] doubleArrToStringArr(double[] terms) {
String[] termsAsString = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
termsAsString[i] = Double.toString(terms[i]);
}
return termsAsString;
}

/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
Expand All @@ -141,7 +180,7 @@ public TermsBuilder exclude(String regex) {
*/
public TermsBuilder exclude(String regex, int flags) {
if (excludeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludePattern = regex;
this.excludeFlags = flags;
Expand All @@ -153,12 +192,36 @@ public TermsBuilder exclude(String regex, int flags) {
*/
public TermsBuilder exclude(String [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = terms;
return this;
}


/**
* Define a set of terms that should not be aggregated.
*/
public TermsBuilder exclude(long [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = longsArrToStringArr(terms);
return this;
}

/**
* Define a set of terms that should not be aggregated.
*/
public TermsBuilder exclude(double [] terms) {
if (excludePattern != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
}
this.excludeTerms = doubleArrToStringArr(terms);
return this;
}



/**
* When using scripts, the value type indicates the types of the values the script is generating.
Expand Down
Loading

4 comments on commit e97b8fd

@roytmana
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this commit related to #7714 be back-ported to 1.x branch?
For me it is a deal breaker for migrating from facets? Majority of my facets are on numeric fields (IDs) and need ability to include/exclude specific values as selected by user so without arrays of terms including numerics I am stuck :-(

thanks
Alex

@markharwood
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last entry on issue 7714 is a commit of this change to the 1.x branch and the one prior is the commit to master.

I suspect what you might be after is a commit to the 1.4 branch which has changes for the next 1.4 minor release?
The master branch is any changes for a future 2.0 release.
The 1.x branch is any changes for a future 1.5 release.

To help keep track we label the versions a change was pushed to on the closed PR that relates to the issue e.g. #7727 cites versions 1.5 and 2.0

@roytmana
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Mark. I missed the labels. I somehow thought that time-wise (september) 1.4 was cut after this commit and would include it. Guess I will have to wait for 1.5. Do you think it will be a longer then 1 month iteration like 1.4 was?

Do you by any chance push 1.x snapshot builds to any public maven repo?

@cythrawll
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello, I need this patch as well, for the numerical filtering on include clause (which means I can't even use facets for my purposes). It looks like I might have to patch this against 1.4 in order to continue until 1.5 comes out.

Please sign in to comment.