Skip to content

Commit

Permalink
address comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
lianhuiwang committed Jun 11, 2017
1 parent c53a0c7 commit 120662e
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq)
val prunedFsRelation =
fsRelation.copy(location = prunedFileIndex)(sparkSession)
// Change table stats based on the sizeInBytes of pruned files
val withStats = logicalRelation.catalogTable.map(_.copy(
stats = Some(CatalogStatistics(sizeInBytes = BigInt(prunedFileIndex.sizeInBytes)))))
val prunedLogicalRelation = logicalRelation.copy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.hive.execution

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
Expand Down Expand Up @@ -81,19 +82,26 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
""".stripMargin)
}

val tableName = "partTbl"
sql(s"analyze table partTbl compute STATISTICS")

val tableStats =
spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost")

withSQLConf(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key -> "true") {
val df = sql("SELECT * FROM partTbl where part = 1")
val query = df.queryExecution.analyzed.analyze
val sizes1 = query.collect {
case relation: LogicalRelation => relation.computeStats(conf).sizeInBytes
}
assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}")
assert(sizes1(0) > 5000, s"expected > 5000 for test table 'src', got: ${sizes1(0)}")
assert(sizes1(0) == tableStats.get.sizeInBytes)
val sizes2 = Optimize.execute(query).collect {
case relation: LogicalRelation => relation.computeStats(conf).sizeInBytes
}
assert(sizes2.size === 1, s"Size wrong for:\n ${df.queryExecution}")
assert(sizes2(0) < 5000, s"expected < 5000 for test table 'src', got: ${sizes2(0)}")
assert(sizes2(0) < tableStats.get.sizeInBytes)
}
}
}
Expand Down

0 comments on commit 120662e

Please sign in to comment.