Skip to content

Commit

Permalink
[7.14] Fix sort on nanosecond date fields with missing values (#74760) (
Browse files Browse the repository at this point in the history
#75064)

For missing values on date fields we use Long.MIN_VALUE by default. This is okay
when the resolution of the field is milliseconds. For nanoseconds though,
negative values can lead to IllegalArgumentExpections when we try to format them
internally.
This change fixes this by explicitely setting the minimum value to 0L (which
corresponds to 1970-01-01T00:00:00.000 for nanosecond resolution) when no other
explicit missing value is defined and the target numeric type is a nanosecond
type (this is true for nanosecond fields and when "numeric_type" is explicitely
set). This way we correct the behaviour for single typed indices and cases where
we are sorting across multiple indices with mixed "date" and "date_nanos" type
where "numeric_type" is set in the sort definition.

Closes #73763
  • Loading branch information
elasticsearchmachine committed Jul 7, 2021
1 parent cdb8448 commit 439bd12
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,148 @@ public void testSortMissingStrings() throws IOException {
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3"));
}

public void testSortMissingDates() throws IOException {
for (String type : new String[]{"date", "date_nanos"}) {
String index = "test_" + type;
assertAcked(
prepareCreate(index).addMapping(
"_doc",
XContentFactory.jsonBuilder()
.startObject()
.startObject("_doc")
.startObject("properties")
.startObject("mydate")
.field("type", type)
.endObject()
.endObject()
.endObject()
.endObject()
)
);
ensureGreen();
client().prepareIndex(index, "_doc").setId("1").setSource("mydate", "2021-01-01").get();
client().prepareIndex(index, "_doc").setId("2").setSource("mydate", "2021-02-01").get();
client().prepareIndex(index, "_doc").setId("3").setSource("other_field", "value").get();

refresh();

for (boolean withFormat : new boolean[] {true, false}) {
String format = null;
if (withFormat) {
format = type.equals("date") ? "strict_date_optional_time" : "strict_date_optional_time_nanos";
}

SearchResponse searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.ASC).setFormat(format))
.get();
assertHitsInOrder(searchResponse, new String[] { "1", "2", "3" });

searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.ASC).missing("_first").setFormat(format))
.get();
assertHitsInOrder(searchResponse, new String[] { "3", "1", "2" });

searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.DESC).setFormat(format))
.get();
assertHitsInOrder(searchResponse, new String[] { "2", "1", "3" });

searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.DESC).missing("_first").setFormat(format))
.get();
assertHitsInOrder(searchResponse, new String[] { "3", "2", "1" });
}
}
}

/**
* Sort across two indices with both "date" and "date_nanos" type using "numeric_type" set to "date_nanos"
*/
public void testSortMissingDatesMixedTypes() throws IOException {
for (String type : new String[] { "date", "date_nanos" }) {
String index = "test_" + type;
assertAcked(
prepareCreate(index).addMapping(
"_doc",
XContentFactory.jsonBuilder()
.startObject()
.startObject("_doc")
.startObject("properties")
.startObject("mydate")
.field("type", type)
.endObject()
.endObject()
.endObject()
.endObject()
)
);

}
ensureGreen();

client().prepareIndex("test_date", "_doc").setId("1").setSource("mydate", "2021-01-01").get();
client().prepareIndex("test_date", "_doc").setId("2").setSource("mydate", "2021-02-01").get();
client().prepareIndex("test_date", "_doc").setId("3").setSource("other_field", 1).get();
client().prepareIndex("test_date_nanos", "_doc").setId("4").setSource("mydate", "2021-03-01").get();
client().prepareIndex("test_date_nanos", "_doc").setId("5").setSource("mydate", "2021-04-01").get();
client().prepareIndex("test_date_nanos", "_doc").setId("6").setSource("other_field", 2).get();
refresh();

for (boolean withFormat : new boolean[] {true, false}) {
String format = null;
if (withFormat) {
format = "strict_date_optional_time_nanos";
}

String index = "test*";
SearchResponse searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.ASC).setFormat(format).setNumericType("date_nanos"))
.addSort(SortBuilders.fieldSort("other_field").order(SortOrder.ASC))
.get();
assertHitsInOrder(searchResponse, new String[] { "1", "2", "4", "5", "3", "6" });

searchResponse = client().prepareSearch(index)
.addSort(
SortBuilders.fieldSort("mydate")
.order(SortOrder.ASC)
.missing("_first")
.setFormat(format)
.setNumericType("date_nanos")
)
.addSort(SortBuilders.fieldSort("other_field").order(SortOrder.ASC))
.get();
assertHitsInOrder(searchResponse, new String[] { "3", "6", "1", "2", "4", "5" });

searchResponse = client().prepareSearch(index)
.addSort(SortBuilders.fieldSort("mydate").order(SortOrder.DESC).setFormat(format).setNumericType("date_nanos"))
.addSort(SortBuilders.fieldSort("other_field").order(SortOrder.ASC))
.get();
assertHitsInOrder(searchResponse, new String[] { "5", "4", "2", "1", "3", "6" });

searchResponse = client().prepareSearch(index)
.addSort(
SortBuilders.fieldSort("mydate")
.order(SortOrder.DESC)
.missing("_first")
.setFormat(format)
.setNumericType("date_nanos")
)
.addSort(SortBuilders.fieldSort("other_field").order(SortOrder.ASC))
.get();
assertHitsInOrder(searchResponse, new String[] { "3", "6", "5", "4", "2", "1" });
}
}

private void assertHitsInOrder(SearchResponse response, String[] expectedIds) {
SearchHit[] hits = response.getHits().getHits();
assertEquals(expectedIds.length, hits.length);
int i = 0;
for (String id : expectedIds) {
assertEquals(id, hits[i].getId());
i++;
}
}

public void testIgnoreUnmapped() throws Exception {
createIndex("test");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ public static ZoneId of(String zoneId) {

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

public static final long MAX_NANOSECOND = toLong(MAX_NANOSECOND_INSTANT);

/**
* convert a java time instant to a long value which is stored in lucene
* the long value resembles the nanoseconds since the epoch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.DocValueFormat;
Expand Down Expand Up @@ -142,17 +142,17 @@ public DocIdSetIterator innerDocs(LeafReaderContext ctx) throws IOException {
}

/** Whether missing values should be sorted first. */
public final boolean sortMissingFirst(Object missingValue) {
public static final boolean sortMissingFirst(Object missingValue) {
return "_first".equals(missingValue);
}

/** Whether missing values should be sorted last, this is the default. */
public final boolean sortMissingLast(Object missingValue) {
public static final boolean sortMissingLast(Object missingValue) {
return missingValue == null || "_last".equals(missingValue);
}

/** Return the missing object value according to the reduced type of the comparator. */
public final Object missingObject(Object missingValue, boolean reversed) {
public Object missingObject(Object missingValue, boolean reversed) {
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
final boolean min = sortMissingFirst(missingValue) ^ reversed;
switch (reducedType()) {
Expand Down Expand Up @@ -199,7 +199,7 @@ public final Object missingObject(Object missingValue, boolean reversed) {
case STRING:
case STRING_VAL:
if (missingValue instanceof BytesRef) {
return (BytesRef) missingValue;
return missingValue;
} else if (missingValue instanceof byte[]) {
return new BytesRef((byte[]) missingValue);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
Expand Down Expand Up @@ -156,16 +156,31 @@ private XFieldComparatorSource comparatorSource(
return dateNanosComparatorSource(missingValue, sortMode, nested);
default:
assert targetNumericType.isFloatingPoint() == false;
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, targetNumericType);
}
}

protected XFieldComparatorSource dateComparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
protected XFieldComparatorSource dateComparatorSource(
@Nullable Object missingValue,
MultiValueMode sortMode,
Nested nested
) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, NumericType.DATE);
}

protected XFieldComparatorSource dateNanosComparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, dvs -> convertNumeric(dvs, DateUtils::toNanoSeconds));
protected XFieldComparatorSource dateNanosComparatorSource(
@Nullable Object missingValue,
MultiValueMode sortMode,
Nested nested
) {
return new LongValuesComparatorSource(
this,
missingValue,
sortMode,
nested,
dvs -> convertNumeric(dvs, DateUtils::toNanoSeconds),
NumericType.DATE_NANOSECONDS
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.comparators.LongComparator;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
Expand All @@ -38,18 +40,20 @@ public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorS

private final IndexNumericFieldData indexFieldData;
private final Function<SortedNumericDocValues, SortedNumericDocValues> converter;
private final NumericType targetNumericType;

public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested) {
this(indexFieldData, missingValue, sortMode, nested, null);
MultiValueMode sortMode, Nested nested, NumericType targetNumericType) {
this(indexFieldData, missingValue, sortMode, nested, null, targetNumericType);
}

public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested,
Function<SortedNumericDocValues, SortedNumericDocValues> converter) {
Function<SortedNumericDocValues, SortedNumericDocValues> converter, NumericType targetNumericType) {
super(missingValue, sortMode, nested);
this.indexFieldData = indexFieldData;
this.converter = converter;
this.targetNumericType = targetNumericType;
}

@Override
Expand Down Expand Up @@ -128,4 +132,16 @@ protected long docValue() {
}
};
}

@Override
public Object missingObject(Object missingValue, boolean reversed) {
if (targetNumericType == NumericType.DATE_NANOSECONDS) {
// special case to prevent negative values that would cause invalid nanosecond ranges
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
final boolean min = sortMissingFirst(missingValue) ^ reversed;
return min ? 0L : DateUtils.MAX_NANOSECOND;
}
}
return super.missingObject(missingValue, reversed);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,37 @@ protected boolean sortRequiresCustomComparator() {
}

@Override
protected XFieldComparatorSource dateComparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
protected XFieldComparatorSource dateComparatorSource(
Object missingValue,
MultiValueMode sortMode,
Nested nested
) {
if (numericType == NumericType.DATE_NANOSECONDS) {
// converts date_nanos values to millisecond resolution
return new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertNumeric(dvs, DateUtils::toMilliSeconds));
sortMode, nested, dvs -> convertNumeric(dvs, DateUtils::toMilliSeconds), NumericType.DATE);
}
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, NumericType.DATE);
}

@Override
protected XFieldComparatorSource dateNanosComparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
protected XFieldComparatorSource dateNanosComparatorSource(
Object missingValue,
MultiValueMode sortMode,
Nested nested
) {
if (numericType == NumericType.DATE) {
// converts date values to nanosecond resolution
return new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertNumeric(dvs, DateUtils::toNanoSeconds));
}
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
return new LongValuesComparatorSource(
this,
missingValue,
sortMode,
nested,
dvs -> convertNumeric(dvs, DateUtils::toNanoSeconds),
NumericType.DATE_NANOSECONDS
);
}
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, NumericType.DATE_NANOSECONDS);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
Expand All @@ -31,6 +31,9 @@

import java.util.function.Function;

import static org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.sortMissingFirst;
import static org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.sortMissingLast;

public class SortedSetOrdinalsIndexFieldData extends AbstractIndexOrdinalsFieldData {

public static class Builder implements IndexFieldData.Builder {
Expand Down Expand Up @@ -76,12 +79,12 @@ public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMod
*/
if (nested != null ||
(sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN) ||
(source.sortMissingLast(missingValue) == false && source.sortMissingFirst(missingValue) == false)) {
(sortMissingLast(missingValue) == false && sortMissingFirst(missingValue) == false)) {
return new SortField(getFieldName(), source, reverse);
}
SortField sortField = new SortedSetSortField(getFieldName(), reverse,
sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN);
sortField.setMissingValue(source.sortMissingLast(missingValue) ^ reverse ?
sortField.setMissingValue(sortMissingLast(missingValue) ^ reverse ?
SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST);
return sortField;
}
Expand Down

0 comments on commit 439bd12

Please sign in to comment.