-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-11149] [SQL] Improve cache performance for primitive types #9145
Changes from 6 commits
cea0e33
7ee54a9
8a49887
1ef3e18
9610766
4511781
f9151cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,36 @@ import org.apache.spark.sql.types._ | |
import org.apache.spark.unsafe.Platform | ||
import org.apache.spark.unsafe.types.UTF8String | ||
|
||
|
||
/** | ||
* A help class for fast reading Int/Long/Float/Double from ByteBuffer in native order. | ||
*/ | ||
object ByteBufferHelper { | ||
def getInt(buffer: ByteBuffer): Int = { | ||
val pos = buffer.position() | ||
buffer.position(pos + 4) | ||
Platform.getInt(buffer.array(), Platform.BYTE_ARRAY_OFFSET + pos) | ||
} | ||
|
||
def getLong(buffer: ByteBuffer): Long = { | ||
val pos = buffer.position() | ||
buffer.position(pos + 8) | ||
Platform.getLong(buffer.array(), Platform.BYTE_ARRAY_OFFSET + pos) | ||
} | ||
|
||
def getFloat(buffer: ByteBuffer): Float = { | ||
val pos = buffer.position() | ||
buffer.position(pos + 4) | ||
Platform.getFloat(buffer.array(), Platform.BYTE_ARRAY_OFFSET + pos) | ||
} | ||
|
||
def getDouble(buffer: ByteBuffer): Double = { | ||
val pos = buffer.position() | ||
buffer.position(pos + 8) | ||
Platform.getDouble(buffer.array(), Platform.BYTE_ARRAY_OFFSET + pos) | ||
} | ||
} | ||
|
||
/** | ||
* An abstract class that represents type of a column. Used to append/extract Java objects into/from | ||
* the underlying [[ByteBuffer]] of a column. | ||
|
@@ -134,11 +164,11 @@ private[sql] object INT extends NativeColumnType(IntegerType, 4) { | |
} | ||
|
||
override def extract(buffer: ByteBuffer): Int = { | ||
buffer.getInt() | ||
ByteBufferHelper.getInt(buffer) | ||
} | ||
|
||
override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = { | ||
row.setInt(ordinal, buffer.getInt()) | ||
row.setInt(ordinal, ByteBufferHelper.getInt(buffer)) | ||
} | ||
|
||
override def setField(row: MutableRow, ordinal: Int, value: Int): Unit = { | ||
|
@@ -163,11 +193,11 @@ private[sql] object LONG extends NativeColumnType(LongType, 8) { | |
} | ||
|
||
override def extract(buffer: ByteBuffer): Long = { | ||
buffer.getLong() | ||
ByteBufferHelper.getLong(buffer) | ||
} | ||
|
||
override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = { | ||
row.setLong(ordinal, buffer.getLong()) | ||
row.setLong(ordinal, ByteBufferHelper.getLong(buffer)) | ||
} | ||
|
||
override def setField(row: MutableRow, ordinal: Int, value: Long): Unit = { | ||
|
@@ -191,11 +221,11 @@ private[sql] object FLOAT extends NativeColumnType(FloatType, 4) { | |
} | ||
|
||
override def extract(buffer: ByteBuffer): Float = { | ||
buffer.getFloat() | ||
ByteBufferHelper.getFloat(buffer) | ||
} | ||
|
||
override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = { | ||
row.setFloat(ordinal, buffer.getFloat()) | ||
row.setFloat(ordinal, ByteBufferHelper.getFloat(buffer)) | ||
} | ||
|
||
override def setField(row: MutableRow, ordinal: Int, value: Float): Unit = { | ||
|
@@ -219,11 +249,11 @@ private[sql] object DOUBLE extends NativeColumnType(DoubleType, 8) { | |
} | ||
|
||
override def extract(buffer: ByteBuffer): Double = { | ||
buffer.getDouble() | ||
ByteBufferHelper.getDouble(buffer) | ||
} | ||
|
||
override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = { | ||
row.setDouble(ordinal, buffer.getDouble()) | ||
row.setDouble(ordinal, ByteBufferHelper.getDouble(buffer)) | ||
} | ||
|
||
override def setField(row: MutableRow, ordinal: Int, value: Double): Unit = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Around line 332, there is call to buffer.getShort() Is it worth adding corresponding method to ByteBufferHelper ? If so, I can send a PR. Thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it does not worth it. |
||
|
@@ -330,7 +360,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 8) { | |
} | ||
|
||
override def extract(buffer: ByteBuffer): UTF8String = { | ||
val length = buffer.getInt() | ||
val length = ByteBufferHelper.getInt(buffer) | ||
assert(buffer.hasArray) | ||
val base = buffer.array() | ||
val offset = buffer.arrayOffset() | ||
|
@@ -358,7 +388,7 @@ private[sql] case class COMPACT_DECIMAL(precision: Int, scale: Int) | |
extends NativeColumnType(DecimalType(precision, scale), 8) { | ||
|
||
override def extract(buffer: ByteBuffer): Decimal = { | ||
Decimal(buffer.getLong(), precision, scale) | ||
Decimal(ByteBufferHelper.getLong(buffer), precision, scale) | ||
} | ||
|
||
override def append(v: Decimal, buffer: ByteBuffer): Unit = { | ||
|
@@ -396,7 +426,7 @@ private[sql] sealed abstract class ByteArrayColumnType[JvmType](val defaultSize: | |
} | ||
|
||
override def extract(buffer: ByteBuffer): JvmType = { | ||
val length = buffer.getInt() | ||
val length = ByteBufferHelper.getInt(buffer) | ||
val bytes = new Array[Byte](length) | ||
buffer.get(bytes, 0, length) | ||
deserialize(bytes) | ||
|
@@ -480,7 +510,7 @@ private[sql] case class STRUCT(dataType: StructType) extends ColumnType[UnsafeRo | |
} | ||
|
||
override def extract(buffer: ByteBuffer): UnsafeRow = { | ||
val sizeInBytes = buffer.getInt() | ||
val sizeInBytes = ByteBufferHelper.getInt(buffer) | ||
assert(buffer.hasArray) | ||
val base = buffer.array() | ||
val offset = buffer.arrayOffset() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
put a big warning here that this only works with HeapByteBuffer.