apache · fuwhu · Jan 15, 2020 · Jan 21, 2020 · Jan 28, 2020 · Jan 28, 2020
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -146,7 +146,7 @@ case class HiveTableScanExec(
    * @param partitions All partitions of the relation.
    * @return Partitions that are involved in the query plan.
    */
-  private[hive] def prunePartitions(partitions: Seq[HivePartition]) = {
+  private[hive] def prunePartitions(partitions: Seq[HivePartition]): Seq[HivePartition] = {
     boundPruningPred match {
       case None => partitions
       case Some(shouldKeep) => partitions.filter { part =>
@@ -162,18 +162,36 @@ case class HiveTableScanExec(
     }
   }
 
+  @transient lazy val prunedPartitions: Seq[HivePartition] = {
+    if (relation.prunedPartitions.nonEmpty) {
+      val hivePartitions =
+        relation.prunedPartitions.get.map(HiveClientImpl.toHivePartition(_, hiveQlTable))
+      if (partitionPruningPred.forall(!ExecSubqueryExpression.hasSubquery(_))) {
+        hivePartitions
+      } else {
+        prunePartitions(hivePartitions)
+      }
+    } else {
+      if (sparkSession.sessionState.conf.metastorePartitionPruning &&
+        partitionPruningPred.nonEmpty) {
+        rawPartitions
+      } else {
+        prunePartitions(rawPartitions)
+      }
+    }
+  }
+
   // exposed for tests
-  @transient lazy val rawPartitions = {
+  @transient lazy val rawPartitions: Seq[HivePartition] = {
     val prunedPartitions =
       if (sparkSession.sessionState.conf.metastorePartitionPruning &&
-          partitionPruningPred.nonEmpty) {
+        partitionPruningPred.nonEmpty) {
         // Retrieve the original attributes based on expression ID so that capitalization matches.
         val normalizedFilters = partitionPruningPred.map(_.transform {
           case a: AttributeReference => originalAttributes(a)
         })
-        relation.prunedPartitions.getOrElse(
-          sparkSession.sessionState.catalog
-            .listPartitionsByFilter(relation.tableMeta.identifier, normalizedFilters))
+        sparkSession.sessionState.catalog
+          .listPartitionsByFilter(relation.tableMeta.identifier, normalizedFilters)
       } else {
         sparkSession.sessionState.catalog.listPartitions(relation.tableMeta.identifier)
       }
@@ -189,7 +207,7 @@ case class HiveTableScanExec(
       }
     } else {
       Utils.withDummyCallSite(sqlContext.sparkContext) {
-        hadoopReader.makeRDDForPartitionedTable(prunePartitions(rawPartitions))
+        hadoopReader.makeRDDForPartitionedTable(prunedPartitions)
       }
     }
     val numOutputRows = longMetric("numOutputRows")

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -161,7 +161,7 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
         case p @ HiveTableScanExec(columns, relation, _) =>
           val columnNames = columns.map(_.name)
           val partValues = if (relation.isPartitioned) {
-            p.prunePartitions(p.rawPartitions).map(_.getValues)
+            p.prunedPartitions.map(_.getValues)
           } else {
             Seq.empty
           }