Skip to content

Commit

Permalink
Return Object[] instead of List from frame array selectors.
Browse files Browse the repository at this point in the history
Update MSQSelectTest and MSQInsertTest to reflect the fact that null
arrays are possible.

Add a bunch of javadocs to object selectors describing expected behavior,
including the requirement that array selectors return Object[].
  • Loading branch information
gianm committed Jul 27, 2023
1 parent 8c374a8 commit fb2a498
Show file tree
Hide file tree
Showing 19 changed files with 390 additions and 137 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -647,12 +647,12 @@ public void testInsertOnFoo1WithAutoTypeArrayGroupBy()
.setExpectedResultRows(
NullHandling.replaceWithDefault() ?
ImmutableList.of(
new Object[]{0L, new Object[]{null}},
new Object[]{0L, null},
new Object[]{0L, new Object[]{"a", "b"}},
new Object[]{0L, new Object[]{"b", "c"}},
new Object[]{0L, new Object[]{"d"}}
) : ImmutableList.of(
new Object[]{0L, new Object[]{null}},
new Object[]{0L, null},
new Object[]{0L, new Object[]{"a", "b"}},
new Object[]{0L, new Object[]{""}},
new Object[]{0L, new Object[]{"b", "c"}},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1500,9 +1500,9 @@ public void testScanWithMultiValueSelectQuery()
new Object[]{"[\"a\",\"b\"]", ImmutableList.of("a", "b")},
new Object[]{"[\"b\",\"c\"]", ImmutableList.of("b", "c")},
new Object[]{"d", ImmutableList.of("d")},
new Object[]{"", Collections.singletonList(useDefault ? null : "")},
new Object[]{NullHandling.defaultStringValue(), Collections.singletonList(null)},
new Object[]{NullHandling.defaultStringValue(), Collections.singletonList(null)}
new Object[]{"", null},
new Object[]{NullHandling.defaultStringValue(), null},
new Object[]{NullHandling.defaultStringValue(), null}
)).verifyResults();
}

Expand Down Expand Up @@ -1709,7 +1709,7 @@ public void testGroupByArrayWithMultiValueMvToArray()
.build();

ArrayList<Object[]> expected = new ArrayList<>();
expected.add(new Object[]{Collections.singletonList(null), !useDefault ? 2L : 3L});
expected.add(new Object[]{null, !useDefault ? 2L : 3L});
if (!useDefault) {
expected.add(new Object[]{Collections.singletonList(""), 1L});
}
Expand Down Expand Up @@ -2136,7 +2136,7 @@ private List<Object[]> expectedMultiValueFooRowsGroup()
private List<Object[]> expectedMultiValueFooRowsGroupByList()
{
ArrayList<Object[]> expected = new ArrayList<>();
expected.add(new Object[]{Collections.singletonList(null), !useDefault ? 2L : 3L});
expected.add(new Object[]{null, !useDefault ? 2L : 3L});
if (!useDefault) {
expected.add(new Object[]{Collections.singletonList(""), 1L});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.base.Predicate;
import com.google.common.primitives.Ints;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import org.apache.datasketches.memory.Memory;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.frame.read.FrameReaderUtils;
Expand All @@ -41,7 +42,7 @@
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Arrays;
import java.util.List;

/**
Expand Down Expand Up @@ -175,15 +176,15 @@ public Object getObject()
final int size = currentStrings.size();

if (size == 0) {
return asArray ? Collections.emptyList() : null;
return asArray ? ObjectArrays.EMPTY_ARRAY : null;
} else if (size == 1) {
return asArray ? Collections.singletonList(lookupName(0)) : lookupName(0);
return asArray ? new Object[]{lookupName(0)} : lookupName(0);
} else {
final List<String> strings = new ArrayList<>(size);
final Object[] strings = new Object[size];
for (int i = 0; i < size; i++) {
strings.add(lookupName(i));
strings[i] = lookupName(i);
}
return strings;
return asArray ? strings : Arrays.asList(strings);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public boolean hasMultipleValues(final RowKey key, final int fieldNumber)
rowReader.fieldReader(fieldNumber)
.makeColumnValueSelector(keyMemory, fieldPointer);

return selector.getObject() instanceof List;
return selector.getObject() instanceof List || selector.getObject() instanceof Object[];
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.primitives.Ints;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import org.apache.datasketches.memory.Memory;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.error.DruidException;
Expand Down Expand Up @@ -60,7 +61,7 @@

import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
Expand Down Expand Up @@ -475,7 +476,11 @@ private String getString(final int index)
}

/**
* Returns the value at the given physical row number.
* Returns the object at the given physical row number.
*
* When {@link #asArray}, the return value is always of type {@code Object[]}. Otherwise, the return value
* is either an empty list (if the row is empty), a single String (if the row has one value), or a List
* of Strings (if the row has more than one value).
*
* @param physicalRow physical row number
* @param decode if true, return java.lang.String. If false, return UTF-8 ByteBuffer.
Expand All @@ -496,24 +501,24 @@ private Object getRowAsObject(final int physicalRow, final boolean decode)
}

if (rowLength == 0) {
return Collections.emptyList();
return asArray ? ObjectArrays.EMPTY_ARRAY : Collections.emptyList();
} else if (rowLength == 1) {
final int index = cumulativeRowLength - 1;
final Object o = decode ? getString(index) : getStringUtf8(index);
return asArray ? Collections.singletonList(o) : o;
return asArray ? new Object[]{o} : o;
} else {
final List<Object> row = new ArrayList<>(rowLength);
final Object[] row = new Object[rowLength];

for (int i = 0; i < rowLength; i++) {
final int index = cumulativeRowLength - rowLength + i;
row.add(decode ? getString(index) : getStringUtf8(index));
row[i] = decode ? getString(index) : getStringUtf8(index);
}

return row;
return asArray ? row : Arrays.asList(row);
}
} else {
final Object o = decode ? getString(physicalRow) : getStringUtf8(physicalRow);
return asArray ? Collections.singletonList(o) : o;
return asArray ? new Object[]{o} : o;
}
}

Expand All @@ -525,14 +530,18 @@ private Object getRowAsObject(final int physicalRow, final boolean decode)
*/
private List<ByteBuffer> getRowAsListUtf8(final int physicalRow)
{
if (asArray) {
throw DruidException.defensive("Unexpected call for array column");
}

final Object object = getRowAsObject(physicalRow, false);

if (object instanceof List) {
if (object == null) {
return Collections.singletonList(null);
} else if (object instanceof List) {
//noinspection unchecked
return (List<ByteBuffer>) object;
} else {
// currentValues cannot be null when !asArray, because entirely-null rows only exist for STRING_ARRAY,
// not STRING. So it's safe to call .size() directly. (The current code only runs when !asArray.)
return Collections.singletonList((ByteBuffer) object);
}
}
Expand Down Expand Up @@ -617,11 +626,7 @@ public ValueMatcher makeValueMatcher(Predicate<String> predicate)
@Override
public Object getObject()
{
if (asArray) {
return getRowAsObject(frame.physicalRow(offset.getOffset()), true);
} else {
return defaultGetObject();
}
return getRowAsObject(frame.physicalRow(offset.getOffset()), true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
package org.apache.druid.segment;

import org.apache.druid.guice.annotations.ExtensionPoint;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;

import javax.annotation.Nullable;

Expand All @@ -29,12 +31,42 @@
* BaseObjectColumnValueSelector to make it impossible to accidently call any method other than {@link #getObject()}.
*
* All implementations of this interface MUST also implement {@link ColumnValueSelector}.
*
* Typically created by {@link ColumnSelectorFactory#makeColumnValueSelector(String)}.
*/
@ExtensionPoint
public interface BaseObjectColumnValueSelector<T>
{
/**
* Returns the currently-selected object.
*
* The behavior of this method depends on the type of selector, which can be determined by calling
* {@link ColumnSelectorFactory#getColumnCapabilities(String)} on the same {@link ColumnSelectorFactory} that
* you got this selector from. If the capabilties are nonnull, the selector type is given by
* {@link ColumnCapabilities#getType()}.
*
* String selectors, where type is {@link ColumnType#STRING}, may return any type of object from this method,
* especially in cases where the selector is casting objects to string at selection time. Callers are encouraged to
* avoid the need to deal with various objects by using {@link ColumnSelectorFactory#makeDimensionSelector} instead.
*
* Numeric selectors, where {@link ColumnType#isNumeric()}, may return any type of {@link Number}. Callers that
* wish to deal with more specific types should treat the original {@link ColumnValueSelector} as a
* {@link BaseLongColumnValueSelector}, {@link BaseDoubleColumnValueSelector}, or
* {@link BaseFloatColumnValueSelector} instead.
*
* Array selectors, where {@link ColumnType#isArray()}, must return {@code Object[]}. The array may contain
* null elements, and the array itself may also be null.
*
* Selectors of unknown type, where {@link ColumnSelectorFactory#getColumnCapabilities(String)} returns null,
* may return any type of object. Callers must be prepared for a wide variety of possible input objects. This case
* is common during ingestion, where selectors are built on top of external data.
*/
@Nullable
T getObject();

/**
* Most-specific class of object returned by {@link #getObject()}, if known in advance. This method returns
* {@link Object} when selectors do not know in advance what class of object they may return.
*/
Class<? extends T> classOfObject();
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ default boolean isNull()

/**
* Converts the current result of {@link #getRow()} into null, if the row is empty, a String, if the row has size 1,
* or a String[] array, if the row has size > 1, using {@link #lookupName(int)}.
* or a {@code List<String>}, if the row has size > 1, using {@link #lookupName(int)}.
*
* This method is not the default implementation of {@link #getObject()} to minimize the chance that implementations
* "forget" to override it with more optimized version.
Expand All @@ -130,6 +130,11 @@ default Object defaultGetObject()
/**
* Converts a particular {@link IndexedInts} to an Object in a standard way, assuming each element in the IndexedInts
* is a dictionary ID that can be resolved with the provided selector.
*
* Specification:
* 1) Empty row ({@link IndexedInts#size()} zero) returns null.
* 2) Single-value row returns a single {@link String}.
* 3) Two+ value rows return {@link List} of {@link String}.
*/
@Nullable
static Object rowToObject(IndexedInts row, DimensionDictionarySelector selector)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,16 @@ T makeMultiValueDimensionProcessor(
*/
T makeLongProcessor(ColumnCapabilities capabilities, VectorValueSelector selector);

/**
* Called when {@link ColumnCapabilities#getType()} is ARRAY.
*/
T makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector);

/**
* Called when {@link ColumnCapabilities#getType()} is COMPLEX. May also be called for STRING typed columns in
* cases where the dictionary does not exist or is not expected to be useful.
*
* @see VectorObjectSelector#getObjectVector() for details on what can appear here when type is STRING
*/
T makeObjectProcessor(@SuppressWarnings("unused") ColumnCapabilities capabilities, VectorObjectSelector selector);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ default int getMaxVectorSize()

/**
* Returns an object selector. Should only be called on columns where {@link #getColumnCapabilities} indicates that
* they return STRING or COMPLEX, or on nonexistent columns.
* they return STRING, ARRAY, or COMPLEX, or on nonexistent columns.
*
* For STRING, this is needed if values are not dictionary encoded, such as computed virtual columns, or can
* optionally be used in place of {@link SingleValueDimensionVectorSelector} when using the dictionary isn't helpful.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,39 @@

package org.apache.druid.segment.vector;

import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionDictionarySelector;
import org.apache.druid.segment.VectorColumnProcessorFactory;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.IndexedInts;

/**
* Vectorized object selector, useful for complex columns.
* Vectorized object selector.
*
* Typically created by {@link VectorColumnSelectorFactory#makeObjectSelector(String)}.
*
* @see org.apache.druid.segment.ColumnValueSelector, the non-vectorized version.
* @see ColumnValueSelector, the non-vectorized version.
*/
public interface VectorObjectSelector extends VectorSizeInspector
{
/**
* Get the current vector. Individual elements of the array may be null.
* Get the current vector.
*
* The type of objects in the array depends on the type of the selector. Callers can determine this by calling
* {@link VectorColumnSelectorFactory#getColumnCapabilities(String)} if creating selectors directly. Alternatively,
* callers using {@link VectorColumnProcessorFactory} will receive capabilities as part of the callback to
* {@link VectorColumnProcessorFactory#makeObjectProcessor(ColumnCapabilities, VectorObjectSelector)}.
*
* String selectors, where type is {@link ColumnType#STRING}, must use objects compatible with the spec of
* {@link org.apache.druid.segment.DimensionSelector#rowToObject(IndexedInts, DimensionDictionarySelector)}.
*
* Array selectors, where {@link ColumnType#isArray()}, must use {@code Object[]}. The array may contain
* null elements, and the array itself may also be null.
*
* Complex selectors may use any type of object.
*
* No other type of selector is possible. Vector object selectors are only used for strings, arrays, and complex types.
*/
Object[] getObjectVector();
}
Loading

0 comments on commit fb2a498

Please sign in to comment.