Skip to content

Commit

Permalink
Fix a DST error in date_histogram (backport of #52016) (#52294)
Browse files Browse the repository at this point in the history
When `date_histogram` attempts to optimize itself it for a particular
time zone it checks to see if the entire shard is within the same
"transition". Most time zone transition once every size months or
thereabouts so the optimization can usually kicks in.

*But* it crashes when you attempt feed it a time zone who's last DST
transition was before epoch. The reason for this is a little twisted:
before this patch it'd find the next and previous transitions in
milliseconds since epoch. Then it'd cast them to `Long`s and pass them
into the `DateFieldType` to check if the shard's contents were within
the range. The trouble is they are then converted to `String`s which are
*then* parsed back to `Instant`s which are then convertd to `long`s. And
the parser doesn't like most negative numbers. And everything before
epoch is negative.

This change removes the
`long` -> `Long` -> `String` -> `Instant` -> `long` chain in favor of
passing the `long` -> `Instant` -> `long` which avoids the fairly complex
parsing code and handles a bunch of interesting edge cases around
epoch. And other edge cases around `date_nanos`.

Closes #50265
  • Loading branch information
nik9000 committed Feb 13, 2020
1 parent 238df81 commit f6cc48a
Show file tree
Hide file tree
Showing 7 changed files with 581 additions and 119 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ setup:
settings:
number_of_replicas: 0
mappings:
"properties":
"number":
"type" : "integer"
"date":
"type" : "date"
properties:
number:
type: integer
date:
type: date
- do:
cluster.health:
wait_for_status: green
Expand Down Expand Up @@ -196,3 +196,297 @@ setup:
- match: { aggregations.histo.buckets.2.key_as_string: "2016-01-01T00:00:00.000Z" }

- match: { aggregations.histo.buckets.2.doc_count: 2 }

---
"date_histogram":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type: date
fields:
nanos:
type: date_nanos

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date.nanos
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram with offset":
- skip:
version: "all"
reason: "AwaitsFix https://github.com/elastic/elasticsearch/issues/51525"
# When fixed, reinstate these lines
#version: " - 7.1.99"
#reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }


---
"date_histogram on range":
- skip:
version: " - 7.4.1"
reason: doc values on ranges implemented in 7.4.1

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram on range with offset":
- skip:
version: " - 7.4.1"
reason: doc values on ranges implemented in 7.4.1

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram with pre-epoch daylight savings time transition":
- skip:
version: " - 7.6.1"
reason: bug fixed in 7.6.1
# Add date_nanos to the mapping. We couldn't do it during setup because that
# is run against 6.8 which doesn't have date_nanos
- do:
indices.put_mapping:
index: test_1
body:
properties:
number:
type: integer
date:
type: date
fields:
nanos:
type: date_nanos

- do:
bulk:
index: test_1
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
fixed_interval: 1ms
time_zone: America/Phoenix

- match: { hits.total.value: 1 }
- length: { aggregations.histo.buckets: 1 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date.nanos
fixed_interval: 1ms
time_zone: America/Phoenix

- match: { hits.total.value: 1 }
- length: { aggregations.histo.buckets: 1 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ public static ZoneId of(String zoneId) {
return ZoneId.of(zoneId).normalized();
}

private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

Expand All @@ -231,6 +231,26 @@ public static long toLong(Instant instant) {
return instant.getEpochSecond() * 1_000_000_000 + instant.getNano();
}

/**
* Returns an instant that is with valid nanosecond resolution. If
* the parameter is before the valid nanosecond range then this returns
* the minimum {@linkplain Instant} valid for nanosecond resultion. If
* the parameter is after the valid nanosecond range then this returns
* the maximum {@linkplain Instant} valid for nanosecond resolution.
* <p>
* Useful for checking if all values for the field are within some range,
* even if the range's endpoints are not valid nanosecond resolution.
*/
public static Instant clampToNanosRange(Instant instant) {
if (instant.isBefore(Instant.EPOCH)) {
return Instant.EPOCH;
}
if (instant.isAfter(MAX_NANOSECOND_INSTANT)) {
return MAX_NANOSECOND_INSTANT;
}
return instant;
}

/**
* convert a long value to a java time instant
* the long value resembles the nanoseconds since the epoch
Expand Down

0 comments on commit f6cc48a

Please sign in to comment.