From 1ab2d43d81c987162124f949bbed64c1d17056f7 Mon Sep 17 00:00:00 2001 From: Lianhui Wang Date: Mon, 30 May 2016 11:32:37 +0800 Subject: [PATCH 1/2] init commit --- .../apache/spark/sql/hive/execution/HiveTableScanExec.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala index e29864f996e2e..923f041cebbe5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala @@ -152,8 +152,9 @@ case class HiveTableScanExec( } } val numOutputRows = longMetric("numOutputRows") + val outputSchema = schema rdd.mapPartitionsInternal { iter => - val proj = UnsafeProjection.create(schema) + val proj = UnsafeProjection.create(outputSchema) iter.map { r => numOutputRows += 1 proj(r) From 933b421215f9cbc2e15c39ff0fc62b43e5bdd10f Mon Sep 17 00:00:00 2001 From: Lianhui Wang Date: Tue, 31 May 2016 16:50:09 +0800 Subject: [PATCH 2/2] address comments --- .../org/apache/spark/sql/hive/execution/HiveTableScanExec.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala index 923f041cebbe5..cc3e74b4e8ccc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala @@ -152,6 +152,7 @@ case class HiveTableScanExec( } } val numOutputRows = longMetric("numOutputRows") + // Avoid to serialize MetastoreRelation because schema is lazy. (see SPARK-15649) val outputSchema = schema rdd.mapPartitionsInternal { iter => val proj = UnsafeProjection.create(outputSchema)