From 45f56ffd3083f91a43a1bd95aa6b3de0c6e2b080 Mon Sep 17 00:00:00 2001 From: markharwood Date: Fri, 12 Sep 2014 20:05:37 +0100 Subject: [PATCH] Aggs - support for arrays of numeric values in include/exclude clauses Closes #7714 --- .../SignificantLongTermsAggregator.java | 5 +- .../SignificantTermsAggregatorFactory.java | 13 +++- .../significant/SignificantTermsBuilder.java | 70 +++++++++++++++++ .../bucket/terms/DoubleTermsAggregator.java | 5 +- .../bucket/terms/LongTermsAggregator.java | 21 ++++-- .../bucket/terms/TermsAggregatorFactory.java | 20 +++-- .../bucket/terms/TermsBuilder.java | 69 ++++++++++++++++- .../bucket/terms/support/IncludeExclude.java | 75 ++++++++++++++++++- .../aggregations/bucket/DoubleTermsTests.java | 32 ++++++++ .../aggregations/bucket/LongTermsTests.java | 31 ++++++++ .../bucket/SignificantTermsTests.java | 54 +++++++++++++ 11 files changed, 368 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java index 68cf81bff152d..be4367caf4ac3 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java @@ -24,6 +24,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.format.ValueFormat; @@ -40,9 +41,9 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, - AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { + AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, IncludeExclude.LongFilter includeExclude) { - super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false); + super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, includeExclude); this.termsAggFactory = termsAggFactory; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 5f74bd020aed3..afc6fa829b90f 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -194,9 +194,10 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this); } - if (includeExclude != null) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " + - "settings as it can only be applied to string values"); + + if ((includeExclude != null) && (includeExclude.isRegexBased())) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields"); } if (valuesSource instanceof ValuesSource.Numeric) { @@ -204,7 +205,11 @@ protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { throw new UnsupportedOperationException("No support for examining floating point numerics"); } - return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this); + IncludeExclude.LongFilter longFilter = null; + if (includeExclude != null) { + longFilter = includeExclude.convertToLongFilter(); + } + return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this, longFilter); } throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index 7a7bcb9cf8fab..48d70c0f9c20a 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.significant; +import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -45,6 +46,8 @@ public class SignificantTermsBuilder extends AggregationBuilder 0) { + valids = new LongOpenHashSet(numValids); + } + if (numInvalids > 0) { + invalids = new LongOpenHashSet(numInvalids); + } + } + + public boolean accept(long value) { + return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value))); + } + + private void addAccept(long val) { + valids.add(val); + } + + private void addReject(long val) { + invalids.add(val); + } + } + private final Matcher include; private final Matcher exclude; private final CharsRefBuilder scratch = new CharsRefBuilder(); @@ -281,4 +309,43 @@ public IncludeExclude includeExclude() { } } + public boolean isRegexBased() { + return hasRegexTest; + } + + public LongFilter convertToLongFilter() { + int numValids = includeValues == null ? 0 : includeValues.size(); + int numInvalids = excludeValues == null ? 0 : excludeValues.size(); + LongFilter result = new LongFilter(numValids, numInvalids); + if (includeValues != null) { + for (BytesRef val : includeValues) { + result.addAccept(Long.parseLong(val.utf8ToString())); + } + } + if (excludeValues != null) { + for (BytesRef val : excludeValues) { + result.addReject(Long.parseLong(val.utf8ToString())); + } + } + return result; + } + public LongFilter convertToDoubleFilter() { + int numValids = includeValues == null ? 0 : includeValues.size(); + int numInvalids = excludeValues == null ? 0 : excludeValues.size(); + LongFilter result = new LongFilter(numValids, numInvalids); + if (includeValues != null) { + for (BytesRef val : includeValues) { + double dval=Double.parseDouble(val.utf8ToString()); + result.addAccept( NumericUtils.doubleToSortableLong(dval)); + } + } + if (excludeValues != null) { + for (BytesRef val : excludeValues) { + double dval=Double.parseDouble(val.utf8ToString()); + result.addReject( NumericUtils.doubleToSortableLong(dval)); + } + } + return result; + } + } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java index 423d741647dd6..ec2399873355d 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java @@ -279,6 +279,38 @@ public void singleValueField_WithMaxSize() throws Exception { assertThat(bucket.getDocCount(), equalTo(1l)); } } + + @Test + public void singleValueFieldWithFiltering() throws Exception { + double includes[] = { 1, 2, 3, 98.2 }; + double excludes[] = { 2, 4, 99 }; + double empty[] = {}; + testIncludeExcludeResults(includes, empty, new double[] { 1, 2, 3 }); + testIncludeExcludeResults(includes, excludes, new double[] { 1, 3 }); + testIncludeExcludeResults(empty, excludes, new double[] { 0, 1, 3 }); + } + + private void testIncludeExcludeResults(double[] includes, double[] excludes, double[] expecteds) { + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(terms("terms") + .field(SINGLE_VALUED_FIELD_NAME) + .include(includes) + .exclude(excludes) + .collectMode(randomFrom(SubAggCollectionMode.values()))) + .execute().actionGet(); + assertSearchResponse(response); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(expecteds.length)); + + for (int i = 0; i < expecteds.length; i++) { + Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]); + assertThat(bucket, notNullValue()); + assertThat(bucket.getDocCount(), equalTo(1l)); + } + } + @Test public void singleValueField_OrderedByTermAsc() throws Exception { diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java index bbb9ffb55dc54..59a7319b99500 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java @@ -255,7 +255,38 @@ public void singleValueField() throws Exception { assertThat(bucket.getDocCount(), equalTo(1l)); } } + + @Test + public void singleValueFieldWithFiltering() throws Exception { + long includes[] = { 1, 2, 3, 98 }; + long excludes[] = { -1, 2, 4 }; + long empty[] = {}; + testIncludeExcludeResults(includes, empty, new long[] { 1, 2, 3 }); + testIncludeExcludeResults(includes, excludes, new long[] { 1, 3 }); + testIncludeExcludeResults(empty, excludes, new long[] { 0, 1, 3 }); + } + + private void testIncludeExcludeResults(long[] includes, long[] excludes, long[] expecteds) { + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(terms("terms") + .field(SINGLE_VALUED_FIELD_NAME) + .include(includes) + .exclude(excludes) + .collectMode(randomFrom(SubAggCollectionMode.values()))) + .execute().actionGet(); + assertSearchResponse(response); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(expecteds.length)); + for (int i = 0; i < expecteds.length; i++) { + Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]); + assertThat(bucket, notNullValue()); + assertThat(bucket.getDocCount(), equalTo(1l)); + } + } + @Test public void singleValueField_WithMaxSize() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java index d28ca22195fe7..f1718e09cacab 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -123,6 +123,23 @@ public void structuredAnalysis() throws Exception { Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber(); assertTrue(topCategory.equals(new Long(SNOWBOARDING_CATEGORY))); } + + @Test + public void structuredAnalysisWithIncludeExclude() throws Exception { + long[] excludeTerms = { MUSIC_CATEGORY }; + SearchResponse response = client().prepareSearch("test") + .setSearchType(SearchType.QUERY_AND_FETCH) + .setQuery(new TermQueryBuilder("_all", "paul")) + .setFrom(0).setSize(60).setExplain(true) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()) + .minDocCount(1).exclude(excludeTerms)) + .execute() + .actionGet(); + assertSearchResponse(response); + SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms"); + Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber(); + assertTrue(topCategory.equals(new Long(OTHER_CATEGORY))); + } @Test public void includeExclude() throws Exception { @@ -160,6 +177,43 @@ public void includeExclude() throws Exception { assertThat(terms.contains("weller"), is(true)); } + @Test + public void includeExcludeExactValues() throws Exception { + String []incExcTerms={"weller","nosuchterm"}; + SearchResponse response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint()) + .exclude(incExcTerms)) + .get(); + assertSearchResponse(response); + SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms"); + Set terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(6)); + assertThat(terms.contains("jam"), is(true)); + assertThat(terms.contains("council"), is(true)); + assertThat(terms.contains("style"), is(true)); + assertThat(terms.contains("paul"), is(true)); + assertThat(terms.contains("of"), is(true)); + assertThat(terms.contains("the"), is(true)); + + response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint()) + .include(incExcTerms)) + .get(); + assertSearchResponse(response); + topTerms = response.getAggregations().get("mySignificantTerms"); + terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(1)); + assertThat(terms.contains("weller"), is(true)); + } + @Test public void unmapped() throws Exception { SearchResponse response = client().prepareSearch("idx_unmapped")