Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coerce decimal strings for whole number types by truncating the decimal part #25835

Merged
merged 1 commit into from Jul 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -133,7 +133,7 @@ public short shortValue(boolean coerce) throws IOException {
Token token = currentToken();
if (token == Token.VALUE_STRING) {
checkCoerceString(coerce, Short.class);
return Short.parseShort(text());
return (short) Double.parseDouble(text());
}
short result = doShortValue();
ensureNumberConversion(coerce, result, Short.class);
Expand All @@ -147,13 +147,12 @@ public int intValue() throws IOException {
return intValue(DEFAULT_NUMBER_COERCE_POLICY);
}


@Override
public int intValue(boolean coerce) throws IOException {
Token token = currentToken();
if (token == Token.VALUE_STRING) {
checkCoerceString(coerce, Integer.class);
return Integer.parseInt(text());
return (int) Double.parseDouble(text());
}
int result = doIntValue();
ensureNumberConversion(coerce, result, Integer.class);
Expand All @@ -172,7 +171,13 @@ public long longValue(boolean coerce) throws IOException {
Token token = currentToken();
if (token == Token.VALUE_STRING) {
checkCoerceString(coerce, Long.class);
return Long.parseLong(text());
// longs need special handling so we don't lose precision while parsing
String stringValue = text();
try {
return Long.parseLong(stringValue);
} catch (NumberFormatException e) {
return (long) Double.parseDouble(stringValue);
}
}
long result = doLongValue();
ensureNumberConversion(coerce, result, Long.class);
Expand Down
Expand Up @@ -312,13 +312,7 @@ public List<Field> createFields(String name, Number value,
DOUBLE("double", NumericType.DOUBLE) {
@Override
Double parse(Object value, boolean coerce) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();
}
return Double.parseDouble(value.toString());
return objectToDouble(value);
}

@Override
Expand Down Expand Up @@ -389,20 +383,20 @@ public List<Field> createFields(String name, Number value,
BYTE("byte", NumericType.BYTE) {
@Override
Byte parse(Object value, boolean coerce) {
double doubleValue = objectToDouble(value);

if (doubleValue < Byte.MIN_VALUE || doubleValue > Byte.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a byte");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}

if (value instanceof Number) {
double doubleValue = ((Number) value).doubleValue();
if (doubleValue < Byte.MIN_VALUE || doubleValue > Byte.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a byte");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}
return ((Number) value).byteValue();
}
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();
}
return Byte.parseByte(value.toString());

return (byte) doubleValue;
Copy link
Contributor

@jpountz jpountz Jul 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I deleted your comment by mistake, so I am adding it back:

This only rejects coerce=false values for numbers with decimals but it won't reject strings, which seems to run contrary to how coercion is supposed to work. I did not change this behaviour. I just wanted to call it out as inconsistent with how the parse methods in AbstractXContentParser work, should this be changed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed it should be changed to be consistent, but let's do it in a separate PR? Thinking more about it, I'm wondering whether we should remove the coerce option instead. I opened #25861.

}

@Override
Expand Down Expand Up @@ -445,29 +439,25 @@ Number valueForSearch(Number value) {
SHORT("short", NumericType.SHORT) {
@Override
Short parse(Object value, boolean coerce) {
double doubleValue = objectToDouble(value);

if (doubleValue < Short.MIN_VALUE || doubleValue > Short.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a short");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}

if (value instanceof Number) {
double doubleValue = ((Number) value).doubleValue();
if (doubleValue < Short.MIN_VALUE || doubleValue > Short.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a short");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}
return ((Number) value).shortValue();
}
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();
}
return Short.parseShort(value.toString());

return (short) doubleValue;
}

@Override
Short parse(XContentParser parser, boolean coerce) throws IOException {
int value = parser.intValue(coerce);
if (value < Short.MIN_VALUE || value > Short.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a short");
}
return (short) value;
return parser.shortValue(coerce);
}

@Override
Expand Down Expand Up @@ -501,20 +491,20 @@ Number valueForSearch(Number value) {
INTEGER("integer", NumericType.INT) {
@Override
Integer parse(Object value, boolean coerce) {
double doubleValue = objectToDouble(value);

if (doubleValue < Integer.MIN_VALUE || doubleValue > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for an integer");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}

if (value instanceof Number) {
double doubleValue = ((Number) value).doubleValue();
if (doubleValue < Integer.MIN_VALUE || doubleValue > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for an integer");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}
return ((Number) value).intValue();
}
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();
}
return Integer.parseInt(value.toString());

return (int) doubleValue;
}

@Override
Expand Down Expand Up @@ -612,20 +602,27 @@ public List<Field> createFields(String name, Number value,
LONG("long", NumericType.LONG) {
@Override
Long parse(Object value, boolean coerce) {
double doubleValue = objectToDouble(value);

if (doubleValue < Long.MIN_VALUE || doubleValue > Long.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a long");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}

if (value instanceof Number) {
double doubleValue = ((Number) value).doubleValue();
if (doubleValue < Long.MIN_VALUE || doubleValue > Long.MAX_VALUE) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for a long");
}
if (!coerce && doubleValue % 1 != 0) {
throw new IllegalArgumentException("Value [" + value + "] has a decimal part");
}
return ((Number) value).longValue();
}
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();

// longs need special handling so we don't lose precision while parsing
String stringValue = (value instanceof BytesRef) ? ((BytesRef) value).utf8ToString() : value.toString();

try {
return Long.parseLong(stringValue);
} catch (NumberFormatException e) {
return (long) Double.parseDouble(stringValue);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not preserve the precision of large numbers that have a decimal part and exceed the fractional bits of double but are within min/max Long. For example, while both 4115420654264075766 and "4115420654264075766" will be indexed as 4115420654264075766, something like "4115420654264075766.1" will not be indexed as 4115420654264075766 due to String -> Double -> Long conversion.

Do we want to try to do something a bit more clever to handle this edge case?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd just put a comment that we might not fail in all cases, but I don't think I would try to address it?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean a comment in the code or in the documentation? (i.e. a warning here https://www.elastic.co/guide/en/elasticsearch/reference/current/coerce.html)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean in the code but just noticed there was one already

}
return Long.parseLong(value.toString());
}

@Override
Expand Down Expand Up @@ -781,6 +778,23 @@ boolean hasDecimalPart(Object number) {
return Math.signum(Double.parseDouble(value.toString()));
}

/**
* Converts an Object to a double by checking it against known types first
*/
private static double objectToDouble(Object value) {
double doubleValue;

if (value instanceof Number) {
doubleValue = ((Number) value).doubleValue();
} else if (value instanceof BytesRef) {
doubleValue = Double.parseDouble(((BytesRef) value).utf8ToString());
} else {
doubleValue = Double.parseDouble(value.toString());
}

return doubleValue;
}

}

public static final class NumberFieldType extends MappedFieldType {
Expand Down
Expand Up @@ -34,6 +34,7 @@

public abstract class AbstractNumericFieldMapperTestCase extends ESSingleNodeTestCase {
protected Set<String> TYPES;
protected Set<String> WHOLE_TYPES;
protected IndexService indexService;
protected DocumentMapperParser parser;

Expand Down Expand Up @@ -92,6 +93,14 @@ public void testCoerce() throws Exception {

protected abstract void doTestCoerce(String type) throws IOException;

public void testDecimalCoerce() throws Exception {
for (String type : WHOLE_TYPES) {
doTestDecimalCoerce(type);
}
}

protected abstract void doTestDecimalCoerce(String type) throws IOException;

public void testNullValue() throws IOException {
for (String type : TYPES) {
doTestNullValue(type);
Expand Down
Expand Up @@ -36,6 +36,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
@Override
protected void setTypeList() {
TYPES = new HashSet<>(Arrays.asList("byte", "short", "integer", "long", "float", "double"));
WHOLE_TYPES = new HashSet<>(Arrays.asList("byte", "short", "integer", "long"));
}

@Override
Expand Down Expand Up @@ -185,6 +186,28 @@ public void doTestCoerce(String type) throws IOException {
assertThat(e.getCause().getMessage(), containsString("passed as String"));
}

@Override
protected void doTestDecimalCoerce(String type) throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", type).endObject().endObject()
.endObject().endObject().string();

DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));

assertEquals(mapping, mapper.mappingSource().toString());

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", "7.89")
.endObject()
.bytes(),
XContentType.JSON));

IndexableField[] fields = doc.rootDoc().getFields("field");
IndexableField pointField = fields[0];
assertEquals(7, pointField.numericValue().doubleValue(), 0d);
}

public void testIgnoreMalformed() throws Exception {
for (String type : TYPES) {
doTestIgnoreMalformed(type);
Expand Down Expand Up @@ -301,6 +324,7 @@ protected void doTestNullValue(String type) throws IOException {
assertFalse(dvField.fieldType().stored());
}

@Override
public void testEmptyName() throws IOException {
// after version 5
for (String type : TYPES) {
Expand All @@ -314,4 +338,5 @@ public void testEmptyName() throws IOException {
assertThat(e.getMessage(), containsString("name cannot be empty string"));
}
}

}
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
Expand All @@ -35,6 +36,7 @@
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
Expand All @@ -43,6 +45,7 @@
import org.junit.Before;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.function.Supplier;

Expand Down Expand Up @@ -246,6 +249,37 @@ public void testConversions() {
assertEquals(1.1d, NumberType.DOUBLE.parse(1.1, true));
}

public void testCoercions() {
assertEquals((byte) 5, NumberType.BYTE.parse((short) 5, true));
assertEquals((byte) 5, NumberType.BYTE.parse("5", true));
assertEquals((byte) 5, NumberType.BYTE.parse("5.0", true));
assertEquals((byte) 5, NumberType.BYTE.parse("5.9", true));
assertEquals((byte) 5, NumberType.BYTE.parse(new BytesRef("5.3".getBytes(StandardCharsets.UTF_8)), true));

assertEquals((short) 5, NumberType.SHORT.parse((byte) 5, true));
assertEquals((short) 5, NumberType.SHORT.parse("5", true));
assertEquals((short) 5, NumberType.SHORT.parse("5.0", true));
assertEquals((short) 5, NumberType.SHORT.parse("5.9", true));
assertEquals((short) 5, NumberType.SHORT.parse(new BytesRef("5.3".getBytes(StandardCharsets.UTF_8)), true));

assertEquals(5, NumberType.INTEGER.parse((byte) 5, true));
assertEquals(5, NumberType.INTEGER.parse("5", true));
assertEquals(5, NumberType.INTEGER.parse("5.0", true));
assertEquals(5, NumberType.INTEGER.parse("5.9", true));
assertEquals(5, NumberType.INTEGER.parse(new BytesRef("5.3".getBytes(StandardCharsets.UTF_8)), true));
assertEquals(Integer.MAX_VALUE, NumberType.INTEGER.parse(Integer.MAX_VALUE, true));

assertEquals((long) 5, NumberType.LONG.parse((byte) 5, true));
assertEquals((long) 5, NumberType.LONG.parse("5", true));
assertEquals((long) 5, NumberType.LONG.parse("5.0", true));
assertEquals((long) 5, NumberType.LONG.parse("5.9", true));
assertEquals((long) 5, NumberType.LONG.parse(new BytesRef("5.3".getBytes(StandardCharsets.UTF_8)), true));

// these will lose precision if they get treated as a double
assertEquals(-4115420654264075766L, NumberType.LONG.parse("-4115420654264075766", true));
assertEquals(-4115420654264075766L, NumberType.LONG.parse(-4115420654264075766L, true));
}

public void testHalfFloatRange() throws IOException {
// make sure the accuracy loss of half floats only occurs at index time
// this test checks that searching half floats yields the same results as
Expand Down