Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promote longs to doubles when a terms agg mixes decimal and non-decimal numbers #22449

Merged
merged 4 commits into from Jan 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -22,10 +22,12 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -152,4 +154,32 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
protected Bucket[] createBucketsArray(int size) {
return new Bucket[size];
}

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
boolean promoteToDouble = false;
for (InternalAggregation agg : aggregations) {
if (agg instanceof LongTerms && ((LongTerms) agg).format == DocValueFormat.RAW) {
/**
* this terms agg mixes longs and doubles, we must promote longs to doubles to make the internal aggs
* compatible
*/
promoteToDouble = true;
break;
}
}
if (promoteToDouble == false) {
return super.doReduce(aggregations, reduceContext);
}
List<InternalAggregation> newAggs = new ArrayList<>();
for (InternalAggregation agg : aggregations) {
if (agg instanceof LongTerms) {
DoubleTerms dTerms = LongTerms.convertLongTermsToDouble((LongTerms) agg, format);
newAggs.add(dTerms);
} else {
newAggs.add(agg);
}
}
return newAggs.get(0).doReduce(newAggs, reduceContext);
}
}
Expand Up @@ -22,10 +22,12 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -152,4 +154,32 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
protected Bucket[] createBucketsArray(int size) {
return new Bucket[size];
}

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
for (InternalAggregation agg : aggregations) {
if (agg instanceof DoubleTerms) {
return agg.doReduce(aggregations, reduceContext);
}
}
return super.doReduce(aggregations, reduceContext);
}

/**
* Converts a {@link LongTerms} into a {@link DoubleTerms}, returning the value of the specified long terms as doubles.
*/
static DoubleTerms convertLongTermsToDouble(LongTerms longTerms, DocValueFormat decimalFormat) {
List<Terms.Bucket> buckets = longTerms.getBuckets();
List<DoubleTerms.Bucket> newBuckets = new ArrayList<>();
for (Terms.Bucket bucket : buckets) {
newBuckets.add(new DoubleTerms.Bucket(bucket.getKeyAsNumber().doubleValue(),
bucket.getDocCount(), (InternalAggregations) bucket.getAggregations(), longTerms.showTermDocCountError,
longTerms.showTermDocCountError ? bucket.getDocCountError() : 0, decimalFormat));
}
return new DoubleTerms(longTerms.getName(), longTerms.order, longTerms.requiredSize,
longTerms.minDocCount, longTerms.pipelineAggregators(),
longTerms.metaData, longTerms.format, longTerms.shardSize,
longTerms.showTermDocCountError, longTerms.otherDocCount,
newBuckets, longTerms.docCountError);
}
}
Expand Up @@ -19,6 +19,7 @@
package org.elasticsearch.search.aggregations.bucket.terms;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand All @@ -27,11 +28,21 @@
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.support.ValueType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class TermsAggregatorTests extends AggregatorTestCase {

public void testTermsAggregator() throws Exception {
Expand Down Expand Up @@ -83,4 +94,108 @@ public void testTermsAggregator() throws Exception {
directory.close();
}

public void testMixLongAndDouble() throws IOException {
for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.LONG)
.executionHint(executionMode.toString())
.field("number")
.order(Terms.Order.term(true));
List<InternalAggregation> aggs = new ArrayList<> ();
int numLongs = randomIntBetween(1, 3);
for (int i = 0; i < numLongs; i++) {
final Directory dir;
try (IndexReader reader = createIndexWithLongs()) {
dir = ((DirectoryReader) reader).directory();
IndexSearcher searcher = new IndexSearcher(reader);
MappedFieldType fieldType =
new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
fieldType.setName("number");
fieldType.setHasDocValues(true);
aggs.add(buildInternalAggregation(aggregationBuilder, fieldType, searcher));
}
dir.close();
}
int numDoubles = randomIntBetween(1, 3);
for (int i = 0; i < numDoubles; i++) {
final Directory dir;
try (IndexReader reader = createIndexWithDoubles()) {
dir = ((DirectoryReader) reader).directory();
IndexSearcher searcher = new IndexSearcher(reader);
MappedFieldType fieldType =
new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
fieldType.setName("number");
fieldType.setHasDocValues(true);
aggs.add(buildInternalAggregation(aggregationBuilder, fieldType, searcher));
}
dir.close();
}
InternalAggregation.ReduceContext ctx =
new InternalAggregation.ReduceContext(new MockBigArrays(Settings.EMPTY,
new NoneCircuitBreakerService()), null);
for (InternalAggregation internalAgg : aggs) {
InternalAggregation mergedAggs = internalAgg.doReduce(aggs, ctx);
assertTrue(mergedAggs instanceof DoubleTerms);
long expected = numLongs + numDoubles;
List<Terms.Bucket> buckets = ((DoubleTerms) mergedAggs).getBuckets();
assertEquals(4, buckets.size());
assertEquals("1.0", buckets.get(0).getKeyAsString());
assertEquals(expected, buckets.get(0).getDocCount());
assertEquals("10.0", buckets.get(1).getKeyAsString());
assertEquals(expected * 2, buckets.get(1).getDocCount());
assertEquals("100.0", buckets.get(2).getKeyAsString());
assertEquals(expected * 2, buckets.get(2).getDocCount());
assertEquals("1000.0", buckets.get(3).getKeyAsString());
assertEquals(expected, buckets.get(3).getDocCount());
}
}
}

private IndexReader createIndexWithLongs() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new SortedNumericDocValuesField("number", 10));
document.add(new SortedNumericDocValuesField("number", 100));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", 1));
document.add(new SortedNumericDocValuesField("number", 100));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", 10));
document.add(new SortedNumericDocValuesField("number", 1000));
indexWriter.addDocument(document);
indexWriter.close();
return DirectoryReader.open(directory);
}

private IndexReader createIndexWithDoubles() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(10.0d)));
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(100.0d)));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(1.0d)));
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(100.0d)));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(10.0d)));
document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(1000.0d)));
indexWriter.addDocument(document);
indexWriter.close();
return DirectoryReader.open(directory);
}

private InternalAggregation buildInternalAggregation(TermsAggregationBuilder builder, MappedFieldType fieldType,
IndexSearcher searcher) throws IOException {
try (TermsAggregator aggregator = createAggregator(builder, fieldType, searcher)) {
aggregator.preCollection();
searcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
return aggregator.buildAggregation(0L);
}
}

}
7 changes: 7 additions & 0 deletions docs/reference/aggregations/bucket/terms-aggregation.asciidoc
Expand Up @@ -794,3 +794,10 @@ had a value.
--------------------------------------------------

<1> Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`.

==== Mixing field types

WARNING: When aggregating on multiple indices the type of the aggregated field may not be the same in all indices.
Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix
of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers.
This can result in a loss of precision in the bucket values.
Expand Up @@ -18,12 +18,26 @@ setup:
type: long
double:
type: double
number:
type: long
scaled_float:
type: scaled_float
scaling_factor: 100
date:
type: date

- do:
indices.create:
index: test_2
body:
settings:
number_of_replicas: 0
mappings:
test:
properties:
number:
type: double

- do:
cluster.health:
wait_for_status: green
Expand Down Expand Up @@ -143,6 +157,7 @@ setup:
- do:
catch: request
search:
index: test_1
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : "127.*" } } } }


Expand Down Expand Up @@ -667,3 +682,68 @@ setup:
- match: { aggregations.double_terms.buckets.0.key: 3.5 }

- match: { aggregations.double_terms.buckets.0.doc_count: 1 }

---
"Mixing longs and doubles":

- skip:
version: " - 5.99.99"
reason: in 6.0 longs and doubles are compatible within a terms agg (longs are promoted to doubles)

- do:
index:
index: test_1
type: test
id: 1
body: {"number": 100}

- do:
index:
index: test_1
type: test
id: 2
body: {"number": 10}

- do:
index:
index: test_2
type: test
id: 3
body: {"number": 100.0}

- do:
index:
index: test_2
type: test
id: 1
body: {"number": 10.0}

- do:
index:
index: test_2
type: test
id: 2
body: {"number": 14.6}

- do:
indices.refresh: {}

- do:
search:
body: { "size" : 0, "aggs" : { "number_terms" : { "terms" : { "field" : "number" } } } }

- match: { hits.total: 5 }

- length: { aggregations.number_terms.buckets: 3 }

- match: { aggregations.number_terms.buckets.0.key: 10.0 }

- match: { aggregations.number_terms.buckets.0.doc_count: 2 }

- match: { aggregations.number_terms.buckets.1.key: 100.0 }

- match: { aggregations.number_terms.buckets.1.doc_count: 2 }

- match: { aggregations.number_terms.buckets.2.key: 14.6 }

- match: { aggregations.number_terms.buckets.2.doc_count: 1 }