Skip to content

Commit

Permalink
Preserve date_histogram format when aggregating unmapped fields
Browse files Browse the repository at this point in the history
Currently when aggregating on an unmapped date field (e.g. using a
date_histogram) we don't preserve the aggregations `format` setting but instead
use the default format. This can lead to loosing the aggregations `format` when
aggregating over several indices where some of them contain unmapped date fields
and are encountered first in the reduce phase.

Related to #31760
  • Loading branch information
Christoph Büscher committed Nov 5, 2018
1 parent 85f8458 commit 17b275e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.joda.Joda;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
Expand Down Expand Up @@ -53,7 +54,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
if (field == null) {
if (script == null) {
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(ValuesSourceType.ANY);
config.format(resolveFormat(null, valueType));
config.format(resolveFormat(null, valueType, timeZone));
return config;
}
ValuesSourceType valuesSourceType = valueType != null ? valueType.getValuesSourceType() : ValuesSourceType.ANY;
Expand All @@ -67,7 +68,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
config.missing(missing);
config.timezone(timeZone);
config.format(resolveFormat(format, valueType));
config.format(resolveFormat(format, valueType, timeZone));
config.script(createScript(script, context));
config.scriptValueType(valueType);
return config;
Expand All @@ -79,7 +80,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
config.missing(missing);
config.timezone(timeZone);
config.format(resolveFormat(format, valueType));
config.format(resolveFormat(format, valueType, timeZone));
config.unmapped(true);
if (valueType != null) {
// todo do we really need this for unmapped?
Expand Down Expand Up @@ -120,14 +121,17 @@ private static AggregationScript.LeafFactory createScript(Script script, QuerySh
}
}

private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType) {
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType, @Nullable DateTimeZone tz) {
if (valueType == null) {
return DocValueFormat.RAW; // we can't figure it out
}
DocValueFormat valueFormat = valueType.defaultFormat;
if (valueFormat instanceof DocValueFormat.Decimal && format != null) {
valueFormat = new DocValueFormat.Decimal(format);
}
if (valueFormat instanceof DocValueFormat.DateTime && format != null) {
valueFormat = new DocValueFormat.DateTime(Joda.forPattern(format), tz != null ? tz : DateTimeZone.UTC);
}
return valueFormat;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,31 @@ public void testExceptionOnNegativeInterval() {
}
}

/**
* https://github.com/elastic/elasticsearch/issues/31760 shows an edge case where an unmapped "date" field in two indices
* that are queried simultaneously can lead to the "format" parameter in the aggregation not being preserved correctly.
*
* The error happens when the bucket from the "unmapped" index is received first in the reduce phase, however the case can
* be recreated when aggregating about a single index with an unmapped date field and also getting "empty" buckets.
*/
public void testFormatIndexUnmapped() throws InterruptedException, ExecutionException {
String indexDateUnmapped = "test31760";
indexRandom(true, client().prepareIndex(indexDateUnmapped, "_doc").setSource("foo", "bar"));
ensureSearchable(indexDateUnmapped);

SearchResponse response = client().prepareSearch(indexDateUnmapped)
.addAggregation(
dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("YYYY-MM")
.minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
.execute().actionGet();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");
assertThat(histo.getBuckets().size(), equalTo(1));
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2018-01"));
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(0L));
internalCluster().wipeIndices(indexDateUnmapped);
}

/**
* https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
* "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone
Expand Down

0 comments on commit 17b275e

Please sign in to comment.