Skip to content

Commit

Permalink
Generify SpecializedIsInOp
Browse files Browse the repository at this point in the history
  • Loading branch information
radeusgd committed Oct 14, 2022
1 parent 10e8896 commit 4c3bdee
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,24 @@
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.TypedStorage;

/**
* A specialized implementation for the IS_IN operation for builtin types, relying on hashing. Since
* for some columns we know what types of objects can be stored, we can filter out any objects that
* do not match that type and then rely on a consistent definition of hashcode for these builtin
* types (which is not available in general for custom objects).
*/
public class SpecializedIsInOp<T extends Storage> extends MapOperation<T> {
public class SpecializedIsInOp<T, S extends Storage & TypedStorage<T>> extends MapOperation<S> {
/**
* An optimized representation of the vector of values to match.
*
* <p>It indicates whether the vector contained a null value and contains a hashmap of the vector
* elements for faster contains checks.
*/
public record CompactRepresentation(HashSet<Object> coercedValues, boolean hasNulls) {}
public record CompactRepresentation<T>(HashSet<T> coercedValues, boolean hasNulls) {}

private final Function<List<?>, CompactRepresentation> prepareList;
private final Function<List<?>, CompactRepresentation<T>> prepareList;

/**
* Creates a new operation with a given preprocessing function.
Expand All @@ -38,8 +39,8 @@ public record CompactRepresentation(HashSet<Object> coercedValues, boolean hasNu
* fractional part need to be converted into a Long. These conversions can be achieved with the
* {@code NumericConverter} class.
*/
public static <U extends Storage> SpecializedIsInOp<U> make(
Function<List<?>, CompactRepresentation> prepareList) {
public static <T, S extends Storage & TypedStorage<T>> SpecializedIsInOp<T, S> make(
Function<List<?>, CompactRepresentation<T>> prepareList) {
return new SpecializedIsInOp<>(prepareList);
}

Expand All @@ -49,51 +50,51 @@ public static <U extends Storage> SpecializedIsInOp<U> make(
* <p>It uses the provided {@code storageClass} to only keep the elements that are of the same
* type as expected in the storage.
*/
public static <U extends Storage> SpecializedIsInOp<U> makeForTimeColumns(Class<?> storageClass) {
public static <T, S extends Storage & TypedStorage<T>> SpecializedIsInOp<T, S> makeForTimeColumns(Class<T> storageClass) {
return SpecializedIsInOp.make(
list -> {
HashSet<Object> set = new HashSet<>();
HashSet<T> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
Object coerced = Polyglot_Utils.convertPolyglotValue(o);
if (storageClass.isInstance(coerced)) {
set.add(coerced);
set.add(storageClass.cast(coerced));
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
});
}

SpecializedIsInOp(Function<List<?>, CompactRepresentation> prepareList) {
SpecializedIsInOp(Function<List<?>, CompactRepresentation<T>> prepareList) {
super(Storage.Maps.IS_IN);
this.prepareList = prepareList;
}

@Override
public Storage runMap(T storage, Object arg) {
public Storage runMap(S storage, Object arg) {
if (arg instanceof List) {
return runMap(storage, (List<?>) arg);
} else {
throw new IllegalArgumentException("Argument to `is_in` must be a vector.");
}
}

public Storage runMap(T storage, List<?> arg) {
CompactRepresentation compactRepresentation = prepareList.apply(arg);
public Storage runMap(S storage, List<?> arg) {
CompactRepresentation<T> compactRepresentation = prepareList.apply(arg);
BitSet newVals = new BitSet();
for (int i = 0; i < storage.size(); i++) {
if (storage.isNa(i) && compactRepresentation.hasNulls) {
newVals.set(i);
} else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) {
} else if (compactRepresentation.coercedValues.contains(storage.getItemTyped(i))) {
newVals.set(i);
}
}
return new BoolStorage(newVals, new BitSet(), storage.size(), false);
}

@Override
public Storage runZip(Storage storage, Storage arg) {
public Storage runZip(S storage, Storage arg) {
throw new IllegalStateException("Zip mode is not supported for this operation.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.graalvm.polyglot.Value;

/** A boolean column storage. */
public class BoolStorage extends Storage {
public final class BoolStorage extends Storage implements TypedStorage<Boolean> {
private static final MapOpStorage<BoolStorage> ops = buildOps();
private final BitSet values;
private final BitSet isMissing;
Expand Down Expand Up @@ -47,10 +47,15 @@ public int getType() {
}

@Override
public Object getItemBoxed(int idx) {
public Boolean getItemBoxed(int idx) {
return isMissing.get(idx) ? null : getItem(idx);
}

@Override
public Boolean getItemTyped(int idx) {
return getItemBoxed(idx);
}

public boolean getItem(long idx) {
return negated != values.get((int) idx);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import java.time.LocalDate;

public class DateStorage extends SpecializedStorage<LocalDate> {
public final class DateStorage extends SpecializedStorage<LocalDate> {
/**
* @param data the underlying data
* @param size the number of items stored
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import java.time.ZonedDateTime;

public class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
public final class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
/**
* @param data the underlying data
* @param size the number of items stored
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import java.util.List;

/** A column containing floating point numbers. */
public class DoubleStorage extends NumericStorage {
public final class DoubleStorage extends NumericStorage implements TypedStorage<Double> {
private final long[] data;
private final BitSet isMissing;
private final int size;
Expand Down Expand Up @@ -61,7 +61,12 @@ public double getItemDouble(int idx) {
}

@Override
public Object getItemBoxed(int idx) {
public Double getItemTyped(int idx) {
return getItemBoxed(idx);
}

@Override
public Double getItemBoxed(int idx) {
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
}

Expand Down Expand Up @@ -259,7 +264,7 @@ public Storage run(DoubleStorage storage) {
.add(
SpecializedIsInOp.make(
list -> {
HashSet<Object> set = new HashSet<>();
HashSet<Double> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
Expand All @@ -268,7 +273,7 @@ public Storage run(DoubleStorage storage) {
set.add(x);
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
}));
return ops;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.util.stream.LongStream;

/** A column storing 64-bit integers. */
public class LongStorage extends NumericStorage {
public final class LongStorage extends NumericStorage implements TypedStorage<Long> {
private final long[] data;
private final BitSet isMissing;
private final int size;
Expand Down Expand Up @@ -69,10 +69,15 @@ public double getItemDouble(int idx) {
}

@Override
public Object getItemBoxed(int idx) {
public Long getItemBoxed(int idx) {
return isMissing.get(idx) ? null : data[idx];
}

@Override
public Long getItemTyped(int idx) {
return getItemBoxed(idx);
}

/** @inheritDoc */
@Override
public int getType() {
Expand Down Expand Up @@ -368,7 +373,7 @@ public Storage run(LongStorage storage) {
}
})
.add(SpecializedIsInOp.make(list -> {
HashSet<Object> set = new HashSet<>();
HashSet<Long> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
Expand All @@ -377,7 +382,7 @@ public Storage run(LongStorage storage) {
set.add(x);
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
}));
return ops;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import java.util.BitSet;

/** A column storing arbitrary objects. */
public class ObjectStorage extends SpecializedStorage<Object> {
public final class ObjectStorage extends SpecializedStorage<Object> {
/**
* @param data the underlying data
* @param size the number of items stored
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import java.util.BitSet;
import java.util.List;

public abstract class SpecializedStorage<T> extends Storage {
public abstract class SpecializedStorage<T> extends Storage implements TypedStorage<T> {

protected abstract SpecializedStorage<T> newInstance(T[] data, int size);

Expand Down Expand Up @@ -62,6 +62,11 @@ public T getItemBoxed(int idx) {
return data[idx];
}

@Override
public T getItemTyped(int idx) {
return getItemBoxed(idx);
}

/** @inheritDoc */
@Override
public boolean isNa(long idx) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.graalvm.polyglot.Value;

/** A column storing strings. */
public class StringStorage extends SpecializedStorage<String> {
public final class StringStorage extends SpecializedStorage<String> {

/**
* @param data the underlying data
Expand Down Expand Up @@ -135,15 +135,15 @@ protected boolean doString(String a, String b) {
});
t.add(new LikeOp());
t.add(SpecializedIsInOp.make(list -> {
HashSet<Object> set = new HashSet<>();
HashSet<String> set = new HashSet<>();
boolean hasNulls = false;
for (Object o : list) {
hasNulls |= o == null;
if (o instanceof String s) {
set.add(s);
}
}
return new SpecializedIsInOp.CompactRepresentation(set, hasNulls);
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
}));
return t;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import java.time.LocalTime;

public class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
public final class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
/**
* @param data the underlying data
* @param size the number of items stored
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.enso.table.data.column.storage;

public interface TypedStorage<T> {
int size();
boolean isNa(long idx);
T getItemTyped(int idx);
}

0 comments on commit 4c3bdee

Please sign in to comment.