From 3b5ad124c18cf4c87620c8b20272c670acf59509 Mon Sep 17 00:00:00 2001 From: Nong Li Date: Wed, 9 Mar 2016 17:00:13 -0800 Subject: [PATCH] [SPARK-13790] Speed up ColumnVector's getDecimal We should reuse an object similar to the other non-primitive type getters. For a query that computes averages over decimal columns, this shows a 10% speedup on overall query times. TPCDS Snappy: Best/Avg Time(ms) Rate(M/s) Per Row(ns) -------------------------------------------------------------------------------- q27-agg (master) 10627 / 11057 10.8 92.3 q27-agg (this patch) 9722 / 9832 11.8 84.4 --- .../org/apache/spark/sql/types/Decimal.scala | 7 +++++++ .../execution/vectorized/ColumnVector.java | 20 +++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 6a59e9728a9f5..ab4404a8881f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -68,6 +68,13 @@ final class Decimal extends Ordered[Decimal] with Serializable { this } + /** + * Just updates the underlying value to `v`, assuming precision and scale is unchanged. + */ + def setInternal(v: Long): Unit = { + this.longVal = v + } + /** * Set this Decimal to the given unscaled Long, with a given precision and scale. */ diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java index bb0247c2fbedf..3eaa5387ddc24 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java @@ -536,9 +536,13 @@ private Array getByteArray(int rowId) { */ public final Decimal getDecimal(int rowId, int precision, int scale) { if (precision <= Decimal.MAX_INT_DIGITS()) { - return Decimal.apply(getInt(rowId), precision, scale); + assert(resultDecimal != null); + resultDecimal.setInternal(getInt(rowId)); + return resultDecimal; } else if (precision <= Decimal.MAX_LONG_DIGITS()) { - return Decimal.apply(getLong(rowId), precision, scale); + assert (resultDecimal != null); + resultDecimal.setInternal(getLong(rowId)); + return resultDecimal; } else { // TODO: best perf? byte[] bytes = getBinary(rowId); @@ -852,6 +856,11 @@ public final int appendStruct(boolean isNull) { */ protected final ColumnarBatch.Row resultStruct; + /** + * Reusable object for getDecimal() + */ + private Decimal resultDecimal; + /** * The Dictionary for this column. * @@ -927,5 +936,12 @@ protected ColumnVector(int capacity, DataType type, MemoryMode memMode) { this.resultArray = null; this.resultStruct = null; } + + if (type instanceof DecimalType) { + DecimalType dt = (DecimalType)type; + if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { + resultDecimal = Decimal.apply(0, dt.precision(), dt.scale()); + } + } } }