[SPARK-29392][CORE][SQL][FOLLOWUP] Avoid deprecated (in 2.13) Symbol …

…syntax 'foo in favor of simpler expression, where it generated deprecation warnings ### What changes were proposed in this pull request? Where it generates a deprecation warning in Scala 2.13, replace Symbol shorthand syntax `'foo` with an equivalent. ### Why are the changes needed? Symbol syntax `'foo` is deprecated in Scala 2.13. The lines changed below otherwise generate about 440 warnings when building for 2.13. The previous PR directly replaced many usages with `Symbol("foo")`. But it's also used to specify Columns via implicit conversion (`.select('foo)`) or even where simple Strings are used (`.as('foo)`), as it's kind of an abstraction for interned Strings. While I find this syntax confusing and would like to deprecate it, here I just replaced it where it generates a build warning (not sure why all occurrences don't): `$"foo"` or just `"foo"`. ### Does this PR introduce any user-facing change? Should not change behavior. ### How was this patch tested? Existing tests. Closes #26748 from srowen/SPARK-29392.2. Authored-by: Sean Owen <sean.owen@databricks.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
apache · Dec 4, 2019 · 2ceed6f · 2ceed6f
1 parent a2102c8
commit 2ceed6f
Show file tree

Hide file tree

Showing 21 changed files with 594 additions and 589 deletions.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -59,9 +59,9 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     // Dataset for testing SparseVector
     val toSparse: Vector => SparseVector = _.asInstanceOf[DenseVector].toSparse
     val sparse = udf(toSparse)
-    smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse('features))
-    smallSparseValidationDataset = smallValidationDataset.withColumn("features", sparse('features))
-
+    smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse($"features"))
+    smallSparseValidationDataset =
+      smallValidationDataset.withColumn("features", sparse($"features"))
   }
 
   /**

diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -161,14 +161,14 @@ class PowerIterationClusteringSuite extends SparkFunSuite
   }
 
   test("test default weight") {
-    val dataWithoutWeight = data.sample(0.5, 1L).select('src, 'dst)
+    val dataWithoutWeight = data.sample(0.5, 1L).select("src", "dst")
 
     val assignments = new PowerIterationClustering()
       .setK(2)
       .setMaxIter(40)
       .assignClusters(dataWithoutWeight)
     val localAssignments = assignments
-      .select('id, 'cluster)
+      .select("id", "cluster")
       .as[(Long, Int)].collect().toSet
 
     val dataWithWeightOne = dataWithoutWeight.withColumn("weight", lit(1.0))
@@ -178,7 +178,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
       .setMaxIter(40)
       .assignClusters(dataWithWeightOne)
     val localAssignments2 = assignments2
-      .select('id, 'cluster)
+      .select("id", "cluster")
       .as[(Long, Int)].collect().toSet
 
     assert(localAssignments === localAssignments2)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -59,12 +59,12 @@ class DistributionSuite extends SparkFunSuite {
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 10),
+      HashPartitioning(Seq($"a"), 10),
       UnspecifiedDistribution,
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 10),
+      RangePartitioning(Seq($"a".asc), 10),
       UnspecifiedDistribution,
       true)
 
@@ -101,22 +101,22 @@ class DistributionSuite extends SparkFunSuite {
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 1),
+      HashPartitioning(Seq($"a"), 1),
       AllTuples,
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 10),
+      HashPartitioning(Seq($"a"), 10),
       AllTuples,
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 1),
+      RangePartitioning(Seq($"a".asc), 1),
       AllTuples,
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 10),
+      RangePartitioning(Seq($"a".asc), 10),
       AllTuples,
       false)
 
@@ -130,17 +130,17 @@ class DistributionSuite extends SparkFunSuite {
     // SinglePartition can satisfy all the distributions except `BroadcastDistribution`
     checkSatisfied(
       SinglePartition,
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
       SinglePartition,
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
       SinglePartition,
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       true)
 
     checkSatisfied(
@@ -153,154 +153,154 @@ class DistributionSuite extends SparkFunSuite {
     // HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
     // the required clustering expressions.
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"b", $"c")),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('d, 'e)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"d", $"e")),
       false)
 
     // HashPartitioning can satisfy HashClusteredDistribution iff its hash expressions are exactly
     // same with the required hash clustering expressions.
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('c, 'b, 'a), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"c", $"b", $"a"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
 
     // HashPartitioning cannot satisfy OrderedDistribution
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 1),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 1),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false) // TODO: this can be relaxed.
 
     checkSatisfied(
-      HashPartitioning(Seq('b, 'c), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"b", $"c"), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false)
   }
 
   test("RangePartitioning is the output partitioning") {
     // RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
     // of the required ordering, or the required ordering is a prefix of its ordering.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc)),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc, 'd.desc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc, 'd.desc)),
       true)
 
     // TODO: We can have an optimization to first sort the dataset
     // by a.asc and then sort b, and c in a partition. This optimization
     // should tradeoff the benefit of a less number of Exchange operators
     // and the parallelism.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.desc, 'c.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".desc, $"c".asc)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('b.asc, 'a.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"b".asc, $"a".asc)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'd.desc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, 'd.desc)),
       false)
 
     // RangePartitioning can satisfy ClusteredDistribution iff its ordering expressions are a subset
     // of the required clustering expressions.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('c, 'b, 'a)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"c", $"b", $"a")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('b, 'c, 'a, 'd)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"b", $"c", $"a", $"d")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b")),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('c, 'd)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"c", $"d")),
       false)
 
     // RangePartitioning cannot satisfy HashClusteredDistribution
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
   }
 
   test("Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it") {
     checkSatisfied(
       SinglePartition,
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
       false)
 
     checkSatisfied(
       SinglePartition,
-      HashClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
+      HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -96,7 +96,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("cache temp table") {
     withTempView("tempTable") {
-      testData.select('key).createOrReplaceTempView("tempTable")
+      testData.select("key").createOrReplaceTempView("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
       spark.catalog.cacheTable("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"))
@@ -127,8 +127,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
   }
 
   test("uncaching temp table") {
-    testData.select('key).createOrReplaceTempView("tempTable1")
-    testData.select('key).createOrReplaceTempView("tempTable2")
+    testData.select("key").createOrReplaceTempView("tempTable1")
+    testData.select("key").createOrReplaceTempView("tempTable2")
     spark.catalog.cacheTable("tempTable1")
 
     assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
@@ -361,15 +361,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
   }
 
   test("Drops temporary table") {
-    testData.select('key).createOrReplaceTempView("t1")
+    testData.select("key").createOrReplaceTempView("t1")
     spark.table("t1")
     spark.catalog.dropTempView("t1")
     intercept[AnalysisException](spark.table("t1"))
   }
 
   test("Drops cached temporary table") {
-    testData.select('key).createOrReplaceTempView("t1")
-    testData.select('key).createOrReplaceTempView("t2")
+    testData.select("key").createOrReplaceTempView("t1")
+    testData.select("key").createOrReplaceTempView("t2")
     spark.catalog.cacheTable("t1")
 
     assert(spark.catalog.isCached("t1"))
@@ -859,7 +859,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-23880 table cache should be lazy and don't trigger any jobs") {
     val cachedData = checkIfNoJobTriggered {
-      spark.range(1002).filter('id > 1000).orderBy('id.desc).cache()
+      spark.range(1002).filter($"id" > 1000).orderBy($"id".desc).cache()
     }
     assert(cachedData.collect === Seq(1001))
   }
@@ -891,7 +891,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-24596 Non-cascading Cache Invalidation - drop persistent view") {
     withTable("t") {
-      spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
+      spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
         .write.format("json").saveAsTable("t")
       withView("t1") {
         withTempView("t2") {
@@ -911,7 +911,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-24596 Non-cascading Cache Invalidation - uncache table") {
     withTable("t") {
-      spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
+      spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
         .write.format("json").saveAsTable("t")
       withTempView("t1", "t2") {
         sql("CACHE TABLE t")