From 746fcfbfafd3dabd1739a55ea7cbb3be34e2cc7b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 14 Mar 2015 02:31:03 +0800 Subject: [PATCH 1/2] Make castStruct faster. --- .../spark/sql/catalyst/expressions/Cast.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b1bc858478ee1..6514f81dab06f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -394,10 +394,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w val casts = from.fields.zip(to.fields).map { case (fromField, toField) => cast(fromField.dataType, toField.dataType) } - // TODO: This is very slow! - buildCast[Row](_, row => Row(row.toSeq.zip(casts).map { - case (v, cast) => if (v == null) null else cast(v) - }: _*)) + // TODO: Could be faster? + buildCast[Row](_, row => { + var i = 0 + val fields = row.toSeq.map {(v) => + val f = if (v == null) null else casts(i)(v) + i += 1 + f + } + Row(fields: _*) + }) } private[this] def cast(from: DataType, to: DataType): Any => Any = to match { From 385d5b0f0787a093266eb332d057cc89f029b6fe Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 15 Mar 2015 23:45:39 +0800 Subject: [PATCH 2/2] Further improved. --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 6514f81dab06f..25b42abfe7bca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -395,14 +395,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w case (fromField, toField) => cast(fromField.dataType, toField.dataType) } // TODO: Could be faster? + val newRow = new GenericMutableRow(from.fields.size) buildCast[Row](_, row => { var i = 0 - val fields = row.toSeq.map {(v) => - val f = if (v == null) null else casts(i)(v) + while (i < row.length) { + val v = row(i) + newRow.update(i, if (v == null) null else casts(i)(v)) i += 1 - f } - Row(fields: _*) + newRow.copy() }) }