From add3c67628d42220d9e666ac01840dc973502b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 18 Jun 2018 17:21:06 +0200 Subject: [PATCH 1/2] Fix use of timezone in date_histogram rewrite Currently, DateHistogramAggregationBuilder#rewriteTimeZone uses the aggregation date math parser and time zone to check whether all values in a read have the same timezone to speed up computation. However, the upper and lower bounds to check are retrieved as longs in epoch_millis, so they don't need to get parsed using a time zone or a parser other than "epoch_millis". This changes this behaviour that was causing problems when the field type mapping was specifying only "epoch_millis" as a format but a different timezone than UTC was used. Closes #31392 --- .../DateHistogramAggregationBuilder.java | 12 ++++--- .../aggregations/bucket/DateHistogramIT.java | 34 ++++++++++++++++++- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java index bb391f21f1e40..2820d52c01d3c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.joda.DateMathParser; import org.elasticsearch.common.rounding.DateTimeUnit; import org.elasticsearch.common.rounding.Rounding; import org.elasticsearch.common.unit.TimeValue; @@ -33,6 +34,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.Relation; import org.elasticsearch.index.query.QueryShardContext; @@ -70,6 +72,7 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuilder implements MultiBucketAggregationBuilder { public static final String NAME = "date_histogram"; + private static DateMathParser DEFAULT_DATE_PARSER = new DateMathParser(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER); public static final Map DATE_FIELD_UNITS; @@ -380,7 +383,7 @@ DateTimeZone rewriteTimeZone(QueryShardContext context) throws IOException { Long anyInstant = null; final IndexNumericFieldData fieldData = context.getForField(ft); for (LeafReaderContext ctx : reader.leaves()) { - AtomicNumericFieldData leafFD = ((IndexNumericFieldData) fieldData).load(ctx); + AtomicNumericFieldData leafFD = fieldData.load(ctx); SortedNumericDocValues values = leafFD.getLongValues(); if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { anyInstant = values.nextValue(); @@ -406,11 +409,10 @@ DateTimeZone rewriteTimeZone(QueryShardContext context) throws IOException { // rounding rounds down, so 'nextTransition' is a good upper bound final long high = nextTransition; - final DocValueFormat format = ft.docValueFormat(null, null); - final Object formattedLow = format.format(low); - final Object formattedHigh = format.format(high); + final Object formattedLow = DocValueFormat.RAW.format(low); + final Object formattedHigh = DocValueFormat.RAW.format(high); if (ft.isFieldWithinQuery(reader, formattedLow, formattedHigh, - true, false, tz, null, context) == Relation.WITHIN) { + true, false, DateTimeZone.UTC, DEFAULT_DATE_PARSER, context) == Relation.WITHIN) { // All values in this reader have the same offset despite daylight saving times. // This is very common for location-based timezones such as Europe/Paris in // combination with time-based indices. diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java index a4a561cfee35f..26e6f4c076553 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java @@ -34,6 +34,7 @@ import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.aggregations.AggregationExecutionException; +import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.ExtendedBounds; @@ -41,7 +42,6 @@ import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket; import org.elasticsearch.search.aggregations.metrics.avg.Avg; import org.elasticsearch.search.aggregations.metrics.sum.Sum; -import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matchers; import org.joda.time.DateTime; @@ -1341,6 +1341,38 @@ public void testExceptionOnNegativeInterval() { } } + /** + * https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with + * "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone + * with daylight savings time is used. This test was added to check this is working now + * @throws ExecutionException + * @throws InterruptedException + */ + public void testRewriteTimeZone_EpochMillisFormat() throws InterruptedException, ExecutionException { + String index = "test31392"; + assertAcked(client().admin().indices().prepareCreate(index).addMapping("type", "d", "type=date,format=epoch_millis").get()); + indexRandom(true, client().prepareIndex(index, "type").setSource("d", "1477954800000")); + ensureSearchable(index); + SearchResponse response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d") + .dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin"))).execute().actionGet(); + assertSearchResponse(response); + Histogram histo = response.getAggregations().get("histo"); + assertThat(histo.getBuckets().size(), equalTo(1)); + assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("1477954800000")); + assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L)); + + response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d") + .dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin")).format("yyyy-MM-dd")) + .execute().actionGet(); + assertSearchResponse(response); + histo = response.getAggregations().get("histo"); + assertThat(histo.getBuckets().size(), equalTo(1)); + assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2016-11-01")); + assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L)); + + internalCluster().wipeIndices(index); + } + /** * When DST ends, local time turns back one hour, so between 2am and 4am wall time we should have four buckets: * "2015-10-25T02:00:00.000+02:00", From 057de43853c533878d37a3e77fccde026399b17a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 19 Jun 2018 16:28:29 +0200 Subject: [PATCH 2/2] iter --- .../histogram/DateHistogramAggregationBuilder.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java index 2820d52c01d3c..bb785efde488e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java @@ -26,6 +26,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.joda.DateMathParser; +import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.rounding.DateTimeUnit; import org.elasticsearch.common.rounding.Rounding; import org.elasticsearch.common.unit.TimeValue; @@ -34,11 +35,9 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; -import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.Relation; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -61,6 +60,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -72,7 +72,7 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuilder implements MultiBucketAggregationBuilder { public static final String NAME = "date_histogram"; - private static DateMathParser DEFAULT_DATE_PARSER = new DateMathParser(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER); + private static DateMathParser EPOCH_MILLIS_PARSER = new DateMathParser(Joda.forPattern("epoch_millis", Locale.ROOT)); public static final Map DATE_FIELD_UNITS; @@ -409,10 +409,8 @@ DateTimeZone rewriteTimeZone(QueryShardContext context) throws IOException { // rounding rounds down, so 'nextTransition' is a good upper bound final long high = nextTransition; - final Object formattedLow = DocValueFormat.RAW.format(low); - final Object formattedHigh = DocValueFormat.RAW.format(high); - if (ft.isFieldWithinQuery(reader, formattedLow, formattedHigh, - true, false, DateTimeZone.UTC, DEFAULT_DATE_PARSER, context) == Relation.WITHIN) { + if (ft.isFieldWithinQuery(reader, low, high, true, false, DateTimeZone.UTC, EPOCH_MILLIS_PARSER, + context) == Relation.WITHIN) { // All values in this reader have the same offset despite daylight saving times. // This is very common for location-based timezones such as Europe/Paris in // combination with time-based indices.