From 2d4bb18fcc19d86ed4c3f01bfe10f2af85f8da85 Mon Sep 17 00:00:00 2001 From: Zuo Wang Date: Thu, 3 Mar 2016 14:08:59 +0800 Subject: [PATCH] [SPARK-13531] [SQL] Avoid call defaultSize of ObjectType Avoid call defaultSize of ObjectType. Manual test. --- .../sql/catalyst/plans/logical/LogicalPlan.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8095083f336e1..e5d3838faa1fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} +import org.apache.spark.sql.types.ObjectType abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { @@ -320,8 +321,14 @@ abstract class UnaryNode extends LogicalPlan { override def statistics: Statistics = { // There should be some overhead in Row object, the size should not be zero when there is // no columns, this help to prevent divide-by-zero error. - val childRowSize = child.output.map(_.dataType.defaultSize).sum + 8 - val outputRowSize = output.map(_.dataType.defaultSize).sum + 8 + val childRowSize = child.output.map(_.dataType).map { + case _: ObjectType => 0 + case dt => dt.defaultSize + }.sum + 8 + val outputRowSize = output.map(_.dataType).map { + case _: ObjectType => 0 + case dt => dt.defaultSize + }.sum + 8 // Assume there will be the same number of rows as child has. var sizeInBytes = (child.statistics.sizeInBytes * outputRowSize) / childRowSize if (sizeInBytes == 0) {