Skip to content

Commit

Permalink
ARROW-7837 [JAVA] copyFromSafe fails due to a bug in handleSafe
Browse files Browse the repository at this point in the history
this fixes [ARROW-7837](https://issues.apache.org/jira/browse/ARROW-7837).
* introduces a small test case showing how `copyFromSafe` fails on index-out-of-bounds when skipping some indices when adding values to a `VarCharVector` (probably happens to `VarBinaryVector` as well).
* root cause is a bug in `handleSafe` which loads a stale write index resulting with a failure to identify the need to resize the value buffer.
* fixed `handleSafe` to consult with the last-set index in order to properly calculate the future end offset of writing requested length to the buffer.

Closes #6461 from eyalfa/ARROW-7837 and squashes the following commits:

0f2c98e <Eyal Farago-Hagag> ARROW-7837: checkstyle
f71b4ee <Eyal Farago-Hagag> ARROW-7837: address comments by @emkornfield
fe9bdea <Eyal Farago-Hagag> ARROW-7837: style
a16c8fa <Eyal Farago-Hagag> ARROW-7837: introduce to crude benchmarks to setSafe.
be2c0b3 <Eyal Farago-Hagag> ARROW-7837: fix typo in comment.
d0d9c3e <Eyal Farago-Hagag> ARROW-7837: fix(?) a code style issue.
34cc68f <Eyal Farago-Hagag> ARROW-7837: address comments by @liafan82
f900d2c <Eyal Farago-Hagag> ARROW-7837: fix all handleSafe related call sites.
4e00b29 <Eyal Farago-Hagag> ARROW-7837: fix the issue by forcing handle safe to check the next write position based on 'lastSet'.
fe7dc17 <Eyal Farago-Hagag> ARROW-7837: reproduce ARROW-7837 in a unit-test.

Authored-by: Eyal Farago-Hagag <efarago@salesforce.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
  • Loading branch information
eyalfa authored and emkornfield committed Mar 6, 2020
1 parent e92416f commit 0f36697
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 18 deletions.
Expand Up @@ -21,6 +21,7 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
Expand All @@ -34,6 +35,8 @@
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import io.netty.buffer.ArrowBuf;

/**
* Benchmarks for {@link BaseVariableWidthVector}.
*/
Expand All @@ -46,6 +49,9 @@ public class VariableWidthVectorBenchmarks {

private static final int ALLOCATOR_CAPACITY = 1024 * 1024;

private static byte[] bytes = VariableWidthVectorBenchmarks.class.getName().getBytes();
private ArrowBuf arrowBuff;

private BufferAllocator allocator;

private VarCharVector vector;
Expand All @@ -58,13 +64,16 @@ public void prepare() {
allocator = new RootAllocator(ALLOCATOR_CAPACITY);
vector = new VarCharVector("vector", allocator);
vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH);
arrowBuff = allocator.buffer(VECTOR_LENGTH);
arrowBuff.setBytes(0, bytes, 0, bytes.length);
}

/**
* Tear down benchmarks.
*/
@TearDown
public void tearDown() {
arrowBuff.close();
vector.close();
allocator.close();
}
Expand All @@ -80,6 +89,37 @@ public int getValueCapacity() {
return vector.getValueCapacity();
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public int setSafeFromArray() {
for (int i = 0; i < 500; ++i) {
vector.setSafe(i * 40, bytes);
}
return vector.getBufferSize();
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public int setSafeFromNullableVarcharHolder() {
NullableVarCharHolder nvch = new NullableVarCharHolder();
nvch.buffer = arrowBuff;
nvch.start = 0;
nvch.end = bytes.length;
for (int i = 0; i < 50; ++i) {
nvch.isSet = 0;
for (int j = 0; j < 9; ++j) {
int idx = 10 * i + j;
vector.setSafe(idx, nvch);
}
nvch.isSet = 1;
vector.setSafe(10 * (i + 1), nvch);
}
return vector.getBufferSize();
}


public static void main(String [] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(VariableWidthVectorBenchmarks.class.getSimpleName())
Expand Down
Expand Up @@ -999,8 +999,8 @@ public void set(int index, byte[] value) {
*/
public void setSafe(int index, byte[] value) {
assert index >= 0;
fillEmpties(index);
handleSafe(index, value.length);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, 0, value.length);
lastSet = index;
Expand Down Expand Up @@ -1035,8 +1035,8 @@ public void set(int index, byte[] value, int start, int length) {
*/
public void setSafe(int index, byte[] value, int start, int length) {
assert index >= 0;
fillEmpties(index);
handleSafe(index, length);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, start, length);
lastSet = index;
Expand Down Expand Up @@ -1073,8 +1073,8 @@ public void set(int index, ByteBuffer value, int start, int length) {
*/
public void setSafe(int index, ByteBuffer value, int start, int length) {
assert index >= 0;
fillEmpties(index);
handleSafe(index, length);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
final int startOffset = getStartOffset(index);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
Expand Down Expand Up @@ -1129,8 +1129,8 @@ public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) {
assert index >= 0;
final int dataLength = end - start;
fillEmpties(index);
handleSafe(index, dataLength);
fillHoles(index);
BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
final int startOffset = offsetBuffer.getInt(index * OFFSET_WIDTH);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
Expand Down Expand Up @@ -1170,8 +1170,8 @@ public void set(int index, int start, int length, ArrowBuf buffer) {
*/
public void setSafe(int index, int start, int length, ArrowBuf buffer) {
assert index >= 0;
fillEmpties(index);
handleSafe(index, length);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
final int startOffset = offsetBuffer.getInt(index * OFFSET_WIDTH);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
Expand Down Expand Up @@ -1245,7 +1245,7 @@ protected final void handleSafe(int index, int dataLength) {
while (index >= getValueCapacity()) {
reallocValidityAndOffsetBuffers();
}
final int startOffset = getStartOffset(index);
final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1);
while (valueBuffer.capacity() < (startOffset + dataLength)) {
reallocDataBuffer();
}
Expand Down
Expand Up @@ -184,8 +184,8 @@ public void set(int index, VarBinaryHolder holder) {
public void setSafe(int index, VarBinaryHolder holder) {
assert index >= 0;
final int dataLength = holder.end - holder.start;
fillEmpties(index);
handleSafe(index, dataLength);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
final int startOffset = getStartOffset(index);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
Expand Down Expand Up @@ -225,18 +225,17 @@ public void set(int index, NullableVarBinaryHolder holder) {
*/
public void setSafe(int index, NullableVarBinaryHolder holder) {
assert index >= 0;
fillEmpties(index);
BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
final int startOffset = getStartOffset(index);
if (holder.isSet != 0) {
final int dataLength = holder.end - holder.start;
handleSafe(index, dataLength);
fillHoles(index);
final int startOffset = getStartOffset(index);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
} else {
handleSafe(index, 0);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
fillEmpties(index + 1);
}
BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
lastSet = index;
}

Expand Down
Expand Up @@ -186,8 +186,9 @@ public void set(int index, VarCharHolder holder) {
public void setSafe(int index, VarCharHolder holder) {
assert index >= 0;
final int dataLength = holder.end - holder.start;
fillEmpties(index);
handleSafe(index, dataLength);
fillHoles(index);

BitVectorHelper.setBit(validityBuffer, index);
final int startOffset = getStartOffset(index);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
Expand Down Expand Up @@ -227,18 +228,17 @@ public void set(int index, NullableVarCharHolder holder) {
*/
public void setSafe(int index, NullableVarCharHolder holder) {
assert index >= 0;
fillEmpties(index);
BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
final int startOffset = getStartOffset(index);
if (holder.isSet != 0) {
final int dataLength = holder.end - holder.start;
handleSafe(index, dataLength);
fillHoles(index);
final int startOffset = getStartOffset(index);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
} else {
handleSafe(index, 0);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
fillEmpties(index + 1);
}
BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
lastSet = index;
}

Expand Down
Expand Up @@ -23,6 +23,7 @@
import static org.junit.Assert.assertNull;

import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.time.Duration;
import java.time.Period;

Expand Down Expand Up @@ -1069,4 +1070,35 @@ public void testCopyFromWithNulls14() {
}
}
}

@Test //https://issues.apache.org/jira/browse/ARROW-7837
public void testCopySafeArrow7837() {
// this test exposes a bug in `handleSafe` where
// it reads a stale index and as a result missed a required resize of the value vector.
try (VarCharVector vc1 = new VarCharVector("vc1", allocator);
VarCharVector vc2 = new VarCharVector("vc2", allocator);
) {
//initial size is carefully set in order to force the second 'copyFromSafe' operation
// to trigger a reallocation of the vector.
vc2.setInitialCapacity(/*valueCount*/20, /*density*/0.5);

vc1.setSafe(0, "1234567890".getBytes(Charset.forName("utf-8")));
assertFalse(vc1.isNull(0));
assertEquals(vc1.getObject(0).toString(), "1234567890");

vc2.copyFromSafe(0, 0, vc1);
assertFalse(vc2.isNull(0));
assertEquals(vc2.getObject(0).toString(), "1234567890");

vc2.copyFromSafe(0, 5, vc1);
assertTrue(vc2.isNull(1));
assertTrue(vc2.isNull(2));
assertTrue(vc2.isNull(3));
assertTrue(vc2.isNull(4));
assertFalse(vc2.isNull(5));
assertEquals(vc2.getObject(5).toString(), "1234567890");
}
}


}

0 comments on commit 0f36697

Please sign in to comment.