Skip to content

Commit

Permalink
Add tests for Integer-Decimal cross-equality and fix these
Browse files Browse the repository at this point in the history
  • Loading branch information
radeusgd committed Oct 12, 2022
1 parent f0c9fde commit 0c10218
Show file tree
Hide file tree
Showing 17 changed files with 212 additions and 115 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package org.enso.base.polyglot;

import java.math.BigDecimal;

/**
* The numeric converter deals with conversions of Java numeric types to the two main types
* supported by Enso - Long for integers and Double for decimals. Any other types are coerced to one
* of these types.
*
* <p>It provides two concepts - coercion - which allows to coerce an integer type to a decimal, but
* will not convert a decimal to an integer even if it has 0 fractional part. Then there is
* conversion which allows to convert a decimal with 0 fractional part to an integer. Conversion
* should be used when we care about the original type of the object (i.e. we want any decimals to
* require decimal storage even if they have 0 fractional part). Conversion is to be used when we
* want to be consistent with Enso's equality semantics where 2 == 2.0.
*/
public class NumericConverter {
/**
* Coerces a number (possibly an integer) to a Double.
*
* <p>Will throw an exception if the object is not a number.
*/
public static double coerceToDouble(Object o) {
return switch (o) {
case Double x -> x;
case BigDecimal x -> x.doubleValue();
case Float x -> x.doubleValue();
default -> (double) coerceToLong(o);
};
}

/**
* Coerces a number to an Integer.
*
* <p>Will throw an exception if the object is not an integer.
*
* <p>Decimal values are not accepted.
*/
public static long coerceToLong(Object o) {
return switch (o) {
case Long x -> x;
case Integer x -> x.longValue();
case Short x -> x.longValue();
case Byte x -> x.longValue();
default -> throw new UnsupportedOperationException();
};
}

/** Returns true if the object is any supported number. */
public static boolean isCoercibleToDouble(Object o) {
return o instanceof Double
|| o instanceof BigDecimal
|| o instanceof Float
|| isCoercibleToLong(o);
}

/**
* Returns true if the object is any supported integer.
*
* <p>Returns false for decimals with 0 fractional part - the type itself must be an integer type.
*/
public static boolean isCoercibleToLong(Object o) {
return o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte;
}

/**
* Tries converting the value to a Double.
*
* <p>It will return null if the object represented a non-numeric value.
*/
public static Double tryConvertingToDouble(Object o) {
return switch (o) {
case Double x -> x;
case BigDecimal x -> x.doubleValue();
case Float x -> x.doubleValue();
case Long x -> x.doubleValue();
case Integer x -> x.doubleValue();
case Short x -> x.doubleValue();
case Byte x -> x.doubleValue();
case null, default -> null;
};
}

/**
* Tries converting the value to a Long.
*
* <p>Decimal number types are accepted, only if their fractional part is 0. It will return null
* if the object represented a non-integer value.
*/
public static Long tryConvertingToLong(Object o) {
return switch (o) {
case Long x -> x;
case Integer x -> x.longValue();
case Short x -> x.longValue();
case Byte x -> x.longValue();
case Double x -> x % 1.0 == 0.0 ? x.longValue() : null;
case Float x -> x % 1.0f == 0.0f ? x.longValue() : null;
case BigDecimal x -> {
try {
yield x.longValueExact();
} catch (ArithmeticException e) {
yield null;
}
}
case null, default -> null;
};
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package org.enso.base;

import org.graalvm.polyglot.Value;
package org.enso.base.polyglot;

import java.time.LocalDate;
import java.time.LocalDateTime;
import org.graalvm.polyglot.Value;

public class Polyglot_Utils {
/**
* Converts a polyglot Value ensuring that various date/time types are converted to the correct
* type.
*/
public static Object convertPolyglotValue(Value item) {
if (item.isDate()) {
LocalDate d = item.asDate();
Expand All @@ -26,12 +29,14 @@ public static Object convertPolyglotValue(Value item) {
return item.as(Object.class);
}

/** A helper functions for situations where we cannot use the Value conversion directly.
* <p>
* Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967
* Once that issue is resolved, we should probably remove this helper.
* <p>
* In that case we take a generic Object, knowing that the values of interest to us will be passed as Value anyway - so we can check that and fire the conversion if needed.
/**
* A helper functions for situations where we cannot use the Value conversion directly.
*
* <p>Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967 Once that issue
* is resolved, we should probably remove this helper.
*
* <p>In that case we take a generic Object, knowing that the values of interest to us will be
* passed as Value anyway - so we can check that and fire the conversion if needed.
*/
public static Object convertPolyglotValue(Object item) {
if (item instanceof Value v) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.enso.table.data.column.builder.object;

import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.Storage;

import java.math.BigDecimal;
Expand Down Expand Up @@ -81,10 +82,10 @@ private void initBuilderFor(Object o) {
int initialCapacity = Math.max(initialSize, currentSize);
if (o instanceof Boolean) {
currentBuilder = new BoolBuilder();
} else if (o instanceof Double || o instanceof BigDecimal) {
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
} else if (o instanceof Long) {
} else if (NumericConverter.isCoercibleToLong(o)) {
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity);
} else if (NumericConverter.isCoercibleToDouble(o)) {
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
} else if (o instanceof LocalDate) {
currentBuilder = new DateBuilder(initialCapacity);
} else if (o instanceof LocalTime) {
Expand All @@ -106,11 +107,16 @@ private record RetypeInfo(Class<?> clazz, int type) {}
new RetypeInfo(Boolean.class, Storage.Type.BOOL),
new RetypeInfo(Long.class, Storage.Type.LONG),
new RetypeInfo(Double.class, Storage.Type.DOUBLE),
new RetypeInfo(String.class, Storage.Type.STRING),
new RetypeInfo(BigDecimal.class, Storage.Type.DOUBLE),
new RetypeInfo(LocalDate.class, Storage.Type.DATE),
new RetypeInfo(LocalTime.class, Storage.Type.TIME_OF_DAY),
new RetypeInfo(ZonedDateTime.class, Storage.Type.DATE_TIME),
new RetypeInfo(String.class, Storage.Type.STRING));
new RetypeInfo(Float.class, Storage.Type.DOUBLE),
new RetypeInfo(Integer.class, Storage.Type.LONG),
new RetypeInfo(Short.class, Storage.Type.LONG),
new RetypeInfo(Byte.class, Storage.Type.LONG)
);

private void retypeAndAppend(Object o) {
for (RetypeInfo info : retypePairs) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package org.enso.table.data.column.builder.object;

import org.enso.table.data.NumericConverter;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;

import java.math.BigDecimal;
import java.util.Arrays;
import java.util.BitSet;

Expand Down Expand Up @@ -70,10 +69,10 @@ public void appendNoGrow(Object o) {
if (o == null) {
isMissing.set(currentSize++);
} else if (isDouble) {
double value = NumericConverter.toDouble(o);
double value = NumericConverter.coerceToDouble(o);
data[currentSize++] = Double.doubleToRawLongBits(value);
} else {
data[currentSize++] = NumericConverter.toLong(o);
data[currentSize++] = NumericConverter.coerceToLong(o);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.enso.table.data.column.operation.aggregate;

import org.enso.base.Polyglot_Utils;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.Storage;
import org.graalvm.polyglot.Value;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
package org.enso.table.data.column.operation.map;

import org.enso.base.Polyglot_Utils;
import org.enso.table.data.NumericConverter;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.graalvm.polyglot.Value;

import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.function.Function;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;

/**
* A specialized implementation for the IS_IN operation for builtin types, relying on hashing. Since
* for some columns we know what types of objects can be stored, we can filter out any objects that
* do not match that type and then rely on a consistent definition of hashcode for these builtin
* types (which is not available in general for custom objects).
*/
public class SpecializedIsInOp<T extends Storage> extends MapOperation<T> {
/**
* An optimized representation of the vector of values to match.
*
* <p>It indicates whether the vector contained a null value and contains a hashmap of the vector
* elements for faster contains checks.
*/
public record CompactRepresentation(HashSet<Object> coercedValues, boolean hasNulls) {}

private final Function<List<?>, CompactRepresentation> prepareList;

public static <U extends Storage> SpecializedIsInOp<U> make(Function<List<?>, CompactRepresentation> prepareList) {
/**
* Creates a new operation with a given preprocessing function.
*
* <p>The responsibility of the function is to analyse the list and create a hashmap of relevant
* elements, coerced to a type that is consistent with the storage type of the given column. Any
* elements not fitting the expected type can (and should) be discarded.
*
* <p>It is important to correctly coerce the types, for example in Enso 2 == 2.0, so if we are
* getting a Long for a DoubleColumn, it should be converted to a Double before adding it to the
* hashmap. Similarly, for LongStorage, non-integer Doubles can be ignored, but Doubles with 0
* fractional part need to be converted into a Long. These conversions can be achieved with the
* {@code NumericConverter} class.
*/
public static <U extends Storage> SpecializedIsInOp<U> make(
Function<List<?>, CompactRepresentation> prepareList) {
return new SpecializedIsInOp<>(prepareList);
}

public static <U extends Storage> SpecializedIsInOp<U> makeForTimeColumns() {
return SpecializedIsInOp.make(list -> {
HashSet<Object> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
Object coerced = Polyglot_Utils.convertPolyglotValue(o);
if (coerced != null) {
set.add(coerced);
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
});
/**
* Creates a new operation which ensures the Enso Date/Time types are correctly coerced.
*
* <p>It uses the provided {@code storageClass} to only keep the elements that are of the same
* type as expected in the storage.
*/
public static <U extends Storage> SpecializedIsInOp<U> makeForTimeColumns(Class<?> storageClass) {
return SpecializedIsInOp.make(
list -> {
HashSet<Object> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
Object coerced = Polyglot_Utils.convertPolyglotValue(o);
if (storageClass.isInstance(coerced)) {
set.add(coerced);
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
});
}

SpecializedIsInOp(Function<List<?>, CompactRepresentation> prepareList) {
Expand Down
Loading

0 comments on commit 0c10218

Please sign in to comment.