From a96a9d3bc0ee10eac9fe78dbf6b86e8952af4912 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 17 Nov 2017 04:34:46 +0000 Subject: [PATCH] SQLTransformer should not unpersist possibly cached input dataset. --- .../org/apache/spark/ml/feature/SQLTransformer.scala | 3 ++- .../spark/ml/feature/SQLTransformerSuite.scala | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index 62c1972aab12c..0fb1d8c5dc579 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -70,7 +70,8 @@ class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) dataset.createOrReplaceTempView(tableName) val realStatement = $(statement).replace(tableIdentifier, tableName) val result = dataset.sparkSession.sql(realStatement) - dataset.sparkSession.catalog.dropTempView(tableName) + // Call SessionCatalog.dropTempView to avoid unpersisting the possibly cached dataset. + dataset.sparkSession.sessionState.catalog.dropTempView(tableName) result } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala index 753f890c48301..673a146e619f2 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.util.DefaultReadWriteTest import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.types.{LongType, StructField, StructType} +import org.apache.spark.storage.StorageLevel class SQLTransformerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { @@ -60,4 +61,15 @@ class SQLTransformerSuite val expected = StructType(Seq(StructField("id1", LongType, nullable = false))) assert(outputSchema === expected) } + + test("SPARK-22538: SQLTransformer should not unpersist given dataset") { + val df = spark.range(10) + df.cache() + df.count() + assert(df.storageLevel != StorageLevel.NONE) + new SQLTransformer() + .setStatement("SELECT id + 1 AS id1 FROM __THIS__") + .transform(df) + assert(df.storageLevel != StorageLevel.NONE) + } }