From 830e1881b4ef4d9bb661d8b6635470e2596d4eaa Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 17 Sep 2018 00:31:32 +0800 Subject: [PATCH 1/2] Output "dataFilters" in DataSourceScanExec.metadata --- .../spark/sql/execution/DataSourceScanExec.scala | 1 + .../DataSourceScanExecRedactionSuite.scala | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 36ed016773b67..738c0666bc3fd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -284,6 +284,7 @@ case class FileSourceScanExec( "Batched" -> supportsBatch.toString, "PartitionFilters" -> seqToString(partitionFilters), "PushedFilters" -> seqToString(pushedDownFilters), + "DataFilters" -> seqToString(dataFilters), "Location" -> locationDesc) val withOptPartitionCount = relation.partitionSchemaOption.map { _ => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala index c8d045a32d73c..81f00e5e2f421 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala @@ -83,4 +83,20 @@ class DataSourceScanExecRedactionSuite extends QueryTest with SharedSQLContext { } } + test("FileSourceScanExec metadata") { + withTempDir { dir => + val basePath = dir.getCanonicalPath + spark.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString) + val df = spark.read.parquet(basePath).filter("a = 1") + + assert(isIncluded(df.queryExecution, "Format")) + assert(isIncluded(df.queryExecution, "ReadSchema")) + assert(isIncluded(df.queryExecution, "Batched")) + assert(isIncluded(df.queryExecution, "PartitionFilters")) + assert(isIncluded(df.queryExecution, "PushedFilters")) + assert(isIncluded(df.queryExecution, "DataFilters")) + assert(isIncluded(df.queryExecution, "Location")) + } + } + } From da868465de9ccdd302699786db30fe4fe90e4cfa Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 17 Sep 2018 08:51:30 +0800 Subject: [PATCH 2/2] simplify test --- .../sql/execution/DataSourceScanExecRedactionSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala index 81f00e5e2f421..11a1c9a1f9b9c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala @@ -84,10 +84,10 @@ class DataSourceScanExecRedactionSuite extends QueryTest with SharedSQLContext { } test("FileSourceScanExec metadata") { - withTempDir { dir => - val basePath = dir.getCanonicalPath - spark.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString) - val df = spark.read.parquet(basePath).filter("a = 1") + withTempPath { path => + val dir = path.getCanonicalPath + spark.range(0, 10).write.parquet(dir) + val df = spark.read.parquet(dir) assert(isIncluded(df.queryExecution, "Format")) assert(isIncluded(df.queryExecution, "ReadSchema"))