From 12613bd5e2552c831034377f88962a30ee14dd1b Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Wed, 21 Oct 2015 22:09:13 -0700 Subject: [PATCH 1/3] zero out padding bytes --- .../expressions/codegen/UnsafeRowWriter.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java index adbe2621870df..048b7749d8fb4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java @@ -100,19 +100,27 @@ public void alignToWords(int numBytes) { } public void write(int ordinal, boolean value) { - Platform.putBoolean(holder.buffer, getFieldOffset(ordinal), value); + final long offset = getFieldOffset(ordinal); + Platform.putLong(holder.buffer, offset, 0L); + Platform.putBoolean(holder.buffer, offset, value); } public void write(int ordinal, byte value) { - Platform.putByte(holder.buffer, getFieldOffset(ordinal), value); + final long offset = getFieldOffset(ordinal); + Platform.putLong(holder.buffer, offset, 0L); + Platform.putByte(holder.buffer, offset, value); } public void write(int ordinal, short value) { - Platform.putShort(holder.buffer, getFieldOffset(ordinal), value); + final long offset = getFieldOffset(ordinal); + Platform.putLong(holder.buffer, offset, 0L); + Platform.putShort(holder.buffer, offset, value); } public void write(int ordinal, int value) { - Platform.putInt(holder.buffer, getFieldOffset(ordinal), value); + final long offset = getFieldOffset(ordinal); + Platform.putLong(holder.buffer, offset, 0L); + Platform.putInt(holder.buffer, offset, value); } public void write(int ordinal, long value) { @@ -123,7 +131,9 @@ public void write(int ordinal, float value) { if (Float.isNaN(value)) { value = Float.NaN; } - Platform.putFloat(holder.buffer, getFieldOffset(ordinal), value); + final long offset = getFieldOffset(ordinal); + Platform.putLong(holder.buffer, offset, 0L); + Platform.putFloat(holder.buffer, offset, value); } public void write(int ordinal, double value) { From cf01d6ded3732e4cf677cddc2ad53628a43b2770 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 22 Oct 2015 22:54:14 -0700 Subject: [PATCH 2/3] add regression test --- .../codegen/GeneratedProjectionSuite.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala index 098944a9f4fc5..629efca5e4c44 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala @@ -98,4 +98,23 @@ class GeneratedProjectionSuite extends SparkFunSuite { val row2 = safeProj(unsafeRow) assert(row2 === row) } + + test("padding bytes should be zeroed out") { + val types = Seq(BooleanType, ByteType, ShortType, IntegerType, FloatType, BinaryType, StringType) + val struct = StructType(types.map(StructField("", _, true))) + val fields = Array[DataType](StringType, struct) + val unsafeProj = UnsafeProjection.create(fields) + + val innerRow = InternalRow(false, 1.toByte, 2.toShort, 3, 4.0f, "".getBytes, + UTF8String.fromString("")) + val row1 = InternalRow(UTF8String.fromString(""), innerRow) + val unsafe1 = unsafeProj(row1).copy() + // create a Row with long String before the inner struct + val row2 = InternalRow(UTF8String.fromString("a_long_string").repeat(10), innerRow) + val unsafe2 = unsafeProj(row2).copy() + assert(unsafe1.getStruct(1, 7) === unsafe2.getStruct(1, 7)) + val unsafe3 = unsafeProj(row1).copy() + assert(unsafe1 === unsafe3) + assert(unsafe1.getStruct(1, 7) === unsafe3.getStruct(1, 7)) + } } From 9ff3f6688df74d25d66dc8cbaf1fea53faa070ec Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 22 Oct 2015 23:08:11 -0700 Subject: [PATCH 3/3] fix style --- .../expressions/codegen/GeneratedProjectionSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala index 629efca5e4c44..14b79d2322419 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala @@ -100,7 +100,8 @@ class GeneratedProjectionSuite extends SparkFunSuite { } test("padding bytes should be zeroed out") { - val types = Seq(BooleanType, ByteType, ShortType, IntegerType, FloatType, BinaryType, StringType) + val types = Seq(BooleanType, ByteType, ShortType, IntegerType, FloatType, BinaryType, + StringType) val struct = StructType(types.map(StructField("", _, true))) val fields = Array[DataType](StringType, struct) val unsafeProj = UnsafeProjection.create(fields)