From 3a32561eb905b236014cad74472c3a8c359b1aa0 Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Tue, 19 Jan 2016 09:57:52 +0530 Subject: [PATCH 1/3] SPARK-12898. Consider having dummyCallSite for HiveTableScan --- .../spark/sql/hive/execution/HiveTableScan.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 1588728bdbaa4..15f4a017af8ff 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.execution +import org.apache.spark.util.Utils + import scala.collection.JavaConverters._ import org.apache.hadoop.hive.conf.HiveConf @@ -134,10 +136,14 @@ case class HiveTableScan( protected override def doExecute(): RDD[InternalRow] = { val rdd = if (!relation.hiveQlTable.isPartitioned) { - hadoopReader.makeRDDForTable(relation.hiveQlTable) + Utils.withDummyCallSite(sqlContext.sparkContext) { + hadoopReader.makeRDDForTable(relation.hiveQlTable) + } } else { - hadoopReader.makeRDDForPartitionedTable( - prunePartitions(relation.getHiveQlPartitions(partitionPruningPred))) + Utils.withDummyCallSite(sqlContext.sparkContext) { + hadoopReader.makeRDDForPartitionedTable( + prunePartitions(relation.getHiveQlPartitions(partitionPruningPred))) + } } rdd.mapPartitionsInternal { iter => val proj = UnsafeProjection.create(schema) From 51c7aba3e831db5048f0b4185f7017456bd22428 Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Wed, 20 Jan 2016 11:41:35 +0530 Subject: [PATCH 2/3] SPARK-12898 [SQL]. Consider having dummyCallSite for HiveTableScan --- .../org/apache/spark/sql/hive/execution/HiveTableScan.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 15f4a017af8ff..09a6e0c6db3c0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -135,6 +135,8 @@ case class HiveTableScan( } protected override def doExecute(): RDD[InternalRow] = { + // Using dummyCallSite, as getCallSite can turn out to be expensive with + // with multiple partitions. val rdd = if (!relation.hiveQlTable.isPartitioned) { Utils.withDummyCallSite(sqlContext.sparkContext) { hadoopReader.makeRDDForTable(relation.hiveQlTable) From 067dd05c62d9b034fc2691485830db789193fc4f Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Wed, 20 Jan 2016 13:05:58 +0530 Subject: [PATCH 3/3] SPARK-12898 [SQL]. Consider having dummyCallSite for HiveTableScan --- .../org/apache/spark/sql/hive/execution/HiveTableScan.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 09a6e0c6db3c0..eff8833e9232e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive.execution -import org.apache.spark.util.Utils - import scala.collection.JavaConverters._ import org.apache.hadoop.hive.conf.HiveConf @@ -34,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution._ import org.apache.spark.sql.hive._ import org.apache.spark.sql.types.{BooleanType, DataType} +import org.apache.spark.util.Utils /** * The Hive table scan operator. Column and partition pruning are both handled.