Skip to content

Commit

Permalink
[SPARK-29392][CORE][SQL][FOLLOWUP] Avoid deprecated (in 2.13) Symbol …
Browse files Browse the repository at this point in the history
…syntax 'foo in favor of simpler expression, where it generated deprecation warnings

### What changes were proposed in this pull request?

Where it generates a deprecation warning in Scala 2.13, replace Symbol shorthand syntax `'foo` with an equivalent.

### Why are the changes needed?

Symbol syntax `'foo` is deprecated in Scala 2.13. The lines changed below otherwise generate about 440 warnings when building for 2.13.

The previous PR directly replaced many usages with `Symbol("foo")`. But it's also used to specify Columns via implicit conversion (`.select('foo)`) or even where simple Strings are used (`.as('foo)`), as it's kind of an abstraction for interned Strings.

While I find this syntax confusing and would like to deprecate it, here I just replaced it where it generates a build warning (not sure why all occurrences don't): `$"foo"` or just `"foo"`.

### Does this PR introduce any user-facing change?

Should not change behavior.

### How was this patch tested?

Existing tests.

Closes #26748 from srowen/SPARK-29392.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
srowen authored and dongjoon-hyun committed Dec 4, 2019
1 parent a2102c8 commit 2ceed6f
Show file tree
Hide file tree
Showing 21 changed files with 594 additions and 589 deletions.
Expand Up @@ -59,9 +59,9 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
// Dataset for testing SparseVector
val toSparse: Vector => SparseVector = _.asInstanceOf[DenseVector].toSparse
val sparse = udf(toSparse)
smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse('features))
smallSparseValidationDataset = smallValidationDataset.withColumn("features", sparse('features))

smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse($"features"))
smallSparseValidationDataset =
smallValidationDataset.withColumn("features", sparse($"features"))
}

/**
Expand Down
Expand Up @@ -161,14 +161,14 @@ class PowerIterationClusteringSuite extends SparkFunSuite
}

test("test default weight") {
val dataWithoutWeight = data.sample(0.5, 1L).select('src, 'dst)
val dataWithoutWeight = data.sample(0.5, 1L).select("src", "dst")

val assignments = new PowerIterationClustering()
.setK(2)
.setMaxIter(40)
.assignClusters(dataWithoutWeight)
val localAssignments = assignments
.select('id, 'cluster)
.select("id", "cluster")
.as[(Long, Int)].collect().toSet

val dataWithWeightOne = dataWithoutWeight.withColumn("weight", lit(1.0))
Expand All @@ -178,7 +178,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
.setMaxIter(40)
.assignClusters(dataWithWeightOne)
val localAssignments2 = assignments2
.select('id, 'cluster)
.select("id", "cluster")
.as[(Long, Int)].collect().toSet

assert(localAssignments === localAssignments2)
Expand Down
Expand Up @@ -59,12 +59,12 @@ class DistributionSuite extends SparkFunSuite {
true)

checkSatisfied(
HashPartitioning(Seq('a), 10),
HashPartitioning(Seq($"a"), 10),
UnspecifiedDistribution,
true)

checkSatisfied(
RangePartitioning(Seq('a.asc), 10),
RangePartitioning(Seq($"a".asc), 10),
UnspecifiedDistribution,
true)

Expand Down Expand Up @@ -101,22 +101,22 @@ class DistributionSuite extends SparkFunSuite {
true)

checkSatisfied(
HashPartitioning(Seq('a), 1),
HashPartitioning(Seq($"a"), 1),
AllTuples,
true)

checkSatisfied(
HashPartitioning(Seq('a), 10),
HashPartitioning(Seq($"a"), 10),
AllTuples,
false)

checkSatisfied(
RangePartitioning(Seq('a.asc), 1),
RangePartitioning(Seq($"a".asc), 1),
AllTuples,
true)

checkSatisfied(
RangePartitioning(Seq('a.asc), 10),
RangePartitioning(Seq($"a".asc), 10),
AllTuples,
false)

Expand All @@ -130,17 +130,17 @@ class DistributionSuite extends SparkFunSuite {
// SinglePartition can satisfy all the distributions except `BroadcastDistribution`
checkSatisfied(
SinglePartition,
ClusteredDistribution(Seq('a, 'b, 'c)),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
SinglePartition,
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
SinglePartition,
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
true)

checkSatisfied(
Expand All @@ -153,154 +153,154 @@ class DistributionSuite extends SparkFunSuite {
// HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
// the required clustering expressions.
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
HashPartitioning(Seq('b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"b", $"c")),
false)

checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('d, 'e)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"d", $"e")),
false)

// HashPartitioning can satisfy HashClusteredDistribution iff its hash expressions are exactly
// same with the required hash clustering expressions.
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
HashPartitioning(Seq('c, 'b, 'a), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"c", $"b", $"a"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)

checkSatisfied(
HashPartitioning(Seq('a, 'b), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)

// HashPartitioning cannot satisfy OrderedDistribution
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false)

checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 1),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"a", $"b", $"c"), 1),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false) // TODO: this can be relaxed.

checkSatisfied(
HashPartitioning(Seq('b, 'c), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"b", $"c"), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false)
}

test("RangePartitioning is the output partitioning") {
// RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
// of the required ordering, or the required ordering is a prefix of its ordering.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
true)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc)),
true)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc, 'd.desc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc, 'd.desc)),
true)

// TODO: We can have an optimization to first sort the dataset
// by a.asc and then sort b, and c in a partition. This optimization
// should tradeoff the benefit of a less number of Exchange operators
// and the parallelism.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.desc, 'c.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".desc, $"c".asc)),
false)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('b.asc, 'a.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"b".asc, $"a".asc)),
false)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'd.desc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, 'd.desc)),
false)

// RangePartitioning can satisfy ClusteredDistribution iff its ordering expressions are a subset
// of the required clustering expressions.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('c, 'b, 'a)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"c", $"b", $"a")),
true)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('b, 'c, 'a, 'd)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"b", $"c", $"a", $"d")),
true)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b")),
false)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('c, 'd)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"c", $"d")),
false)

// RangePartitioning cannot satisfy HashClusteredDistribution
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)
}

test("Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it") {
checkSatisfied(
SinglePartition,
ClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
false)

checkSatisfied(
SinglePartition,
HashClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
false)

checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)

checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
HashClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)

checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)
}
}
Expand Up @@ -96,7 +96,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi

test("cache temp table") {
withTempView("tempTable") {
testData.select('key).createOrReplaceTempView("tempTable")
testData.select("key").createOrReplaceTempView("tempTable")
assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
spark.catalog.cacheTable("tempTable")
assertCached(sql("SELECT COUNT(*) FROM tempTable"))
Expand Down Expand Up @@ -127,8 +127,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
}

test("uncaching temp table") {
testData.select('key).createOrReplaceTempView("tempTable1")
testData.select('key).createOrReplaceTempView("tempTable2")
testData.select("key").createOrReplaceTempView("tempTable1")
testData.select("key").createOrReplaceTempView("tempTable2")
spark.catalog.cacheTable("tempTable1")

assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
Expand Down Expand Up @@ -361,15 +361,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
}

test("Drops temporary table") {
testData.select('key).createOrReplaceTempView("t1")
testData.select("key").createOrReplaceTempView("t1")
spark.table("t1")
spark.catalog.dropTempView("t1")
intercept[AnalysisException](spark.table("t1"))
}

test("Drops cached temporary table") {
testData.select('key).createOrReplaceTempView("t1")
testData.select('key).createOrReplaceTempView("t2")
testData.select("key").createOrReplaceTempView("t1")
testData.select("key").createOrReplaceTempView("t2")
spark.catalog.cacheTable("t1")

assert(spark.catalog.isCached("t1"))
Expand Down Expand Up @@ -859,7 +859,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi

test("SPARK-23880 table cache should be lazy and don't trigger any jobs") {
val cachedData = checkIfNoJobTriggered {
spark.range(1002).filter('id > 1000).orderBy('id.desc).cache()
spark.range(1002).filter($"id" > 1000).orderBy($"id".desc).cache()
}
assert(cachedData.collect === Seq(1001))
}
Expand Down Expand Up @@ -891,7 +891,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi

test("SPARK-24596 Non-cascading Cache Invalidation - drop persistent view") {
withTable("t") {
spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
.write.format("json").saveAsTable("t")
withView("t1") {
withTempView("t2") {
Expand All @@ -911,7 +911,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi

test("SPARK-24596 Non-cascading Cache Invalidation - uncache table") {
withTable("t") {
spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
.write.format("json").saveAsTable("t")
withTempView("t1", "t2") {
sql("CACHE TABLE t")
Expand Down

0 comments on commit 2ceed6f

Please sign in to comment.