Skip to content

Commit

Permalink
Frames: consider writing singly-valued column when input column hasMu…
Browse files Browse the repository at this point in the history
…ltipleValues is UNKNOWN.

Prior to this patch, columnar frames would always write multi-valued columns if
the input column had hasMultipleValues = UNKNOWN. This had the effect of flipping
UNKNOWN to TRUE when copying data into frames, which is problematic because TRUE
causes expressions to assume that string inputs must be treated as arrays.

We now avoid this by flipping UNKNOWN to FALSE if no multi-valuedness
is encountered, and flipping it to TRUE if multi-valuedness is encountered.
  • Loading branch information
gianm committed Nov 1, 2023
1 parent 2ea7177 commit b178137
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ private static StringFrameColumnWriter makeStringWriter(
return new StringFrameColumnWriterImpl(
selector,
allocator,
capabilities == null || capabilities.hasMultipleValues().isMaybeTrue()
capabilities == null ? ColumnCapabilities.Capable.UNKNOWN : capabilities.hasMultipleValues()
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.column.ColumnCapabilities;

import javax.annotation.Nullable;
import java.nio.ByteBuffer;
Expand All @@ -45,7 +46,8 @@ public abstract class StringFrameColumnWriter<T extends ColumnValueSelector> imp

private final T selector;
private final byte typeCode;
protected final boolean multiValue;
protected final ColumnCapabilities.Capable multiValue;
protected boolean encounteredMultiValueRow;

/**
* Row lengths: one int per row with the number of values contained by that row and all previous rows.
Expand Down Expand Up @@ -73,14 +75,14 @@ public abstract class StringFrameColumnWriter<T extends ColumnValueSelector> imp
final T selector,
final MemoryAllocator allocator,
final byte typeCode,
final boolean multiValue
final ColumnCapabilities.Capable multiValue
)
{
this.selector = selector;
this.typeCode = typeCode;
this.multiValue = multiValue;

if (multiValue) {
if (multiValue.isMaybeTrue()) {
this.cumulativeRowLengths = AppendableMemory.create(allocator, INITIAL_ALLOCATION_SIZE);
} else {
this.cumulativeRowLengths = null;
Expand All @@ -107,7 +109,7 @@ public boolean addSelection()
return false;
}

if (multiValue && !cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
if (multiValue.isMaybeTrue() && !cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
return false;
}

Expand All @@ -119,8 +121,16 @@ public boolean addSelection()
return false;
}

if (utf8Count != 1) {
encounteredMultiValueRow = true;

if (multiValue.isFalse()) {
throw new ISE("Encountered unexpected multi-value row, size[%d]", utf8Count);
}
}

// Enough space has been reserved to write what we need to write; let's start.
if (multiValue) {
if (multiValue.isMaybeTrue()) {
final MemoryRange<WritableMemory> rowLengthsCursor = cumulativeRowLengths.cursor();

if (utf8Data == null && typeCode == FrameColumnWriters.TYPE_STRING_ARRAY) {
Expand Down Expand Up @@ -189,7 +199,7 @@ public void undo()
throw new ISE("Cannot undo");
}

if (multiValue) {
if (multiValue.isMaybeTrue()) {
cumulativeRowLengths.rewindCursor(Integer.BYTES);
cumulativeStringLengths.rewindCursor(Integer.BYTES * lastRowLength);
lastCumulativeRowLength -= lastRowLength;
Expand All @@ -207,21 +217,22 @@ public void undo()
public long size()
{
return DATA_OFFSET
+ (multiValue ? cumulativeRowLengths.size() : 0)
+ (isWriteMultiValue() ? cumulativeRowLengths.size() : 0)
+ cumulativeStringLengths.size()
+ stringData.size();
}

@Override
public long writeTo(final WritableMemory memory, final long startPosition)
{
final boolean writeMultiValue = isWriteMultiValue();
long currentPosition = startPosition;

memory.putByte(currentPosition, typeCode);
memory.putByte(currentPosition + 1, multiValue ? (byte) 1 : (byte) 0);
memory.putByte(currentPosition + 1, writeMultiValue ? (byte) 1 : (byte) 0);
currentPosition += 2;

if (multiValue) {
if (writeMultiValue) {
currentPosition += cumulativeRowLengths.writeTo(memory, currentPosition);
}

Expand All @@ -234,7 +245,7 @@ public long writeTo(final WritableMemory memory, final long startPosition)
@Override
public void close()
{
if (multiValue) {
if (multiValue.isMaybeTrue()) {
cumulativeRowLengths.close();
}

Expand All @@ -249,6 +260,15 @@ public void close()
@Nullable
public abstract List<ByteBuffer> getUtf8ByteBuffersFromSelector(T selector);

/**
* Whether, given the current state of the writer, a call to {@link #writeTo(WritableMemory, long)} at this point
* would generate a multi-value column.
*/
private boolean isWriteMultiValue()
{
return multiValue.isTrue() || encounteredMultiValueRow;
}

/**
* Returns the sum of remaining bytes in the provided list of byte buffers.
*/
Expand Down Expand Up @@ -277,7 +297,7 @@ class StringFrameColumnWriterImpl extends StringFrameColumnWriter<DimensionSelec
StringFrameColumnWriterImpl(
DimensionSelector selector,
MemoryAllocator allocator,
boolean multiValue
ColumnCapabilities.Capable multiValue
)
{
super(selector, allocator, FrameColumnWriters.TYPE_STRING, multiValue);
Expand All @@ -286,7 +306,7 @@ class StringFrameColumnWriterImpl extends StringFrameColumnWriter<DimensionSelec
@Override
public List<ByteBuffer> getUtf8ByteBuffersFromSelector(final DimensionSelector selector)
{
return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector, multiValue);
return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector, multiValue.isMaybeTrue());
}
}

Expand All @@ -300,7 +320,7 @@ class StringArrayFrameColumnWriterImpl extends StringFrameColumnWriter<ColumnVal
MemoryAllocator allocator
)
{
super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY, true);
super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY, ColumnCapabilities.Capable.TRUE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,12 @@ public Capable hasMultipleValues()

public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues)
{
this.hasMultipleValues = Capable.of(hasMultipleValues);
return setHasMultipleValues(Capable.of(hasMultipleValues));
}

public ColumnCapabilitiesImpl setHasMultipleValues(Capable hasMultipleValues)
{
this.hasMultipleValues = hasMultipleValues;
return this;
}

Expand Down

0 comments on commit b178137

Please sign in to comment.