[MINOR][SQL] Rename config name to spark.sql.analyzer.failAmbiguousSe…

…lfJoin.enabled ### What changes were proposed in this pull request? add `.enabled` postfix to `spark.sql.analyzer.failAmbiguousSelfJoin`. ### Why are the changes needed? to follow the existing naming style ### Does this PR introduce any user-facing change? no ### How was this patch tested? not needed Closes #26694 from cloud-fan/conf. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
apache · Dec 2, 2019 · e271664 · e271664
1 parent 4e073f3
commit e271664
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 15 deletions.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
@@ -111,7 +111,7 @@ license: |
 
   - The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
 
-  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin` to `false`.
+  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin.enabled` to `false`.
 
   - Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
     <table class="table">

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -875,8 +875,8 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val FAIL_AMBIGUOUS_SELF_JOIN =
-    buildConf("spark.sql.analyzer.failAmbiguousSelfJoin")
+  val FAIL_AMBIGUOUS_SELF_JOIN_ENABLED =
+    buildConf("spark.sql.analyzer.failAmbiguousSelfJoin.enabled")
       .doc("When true, fail the Dataset query if it contains ambiguous self-join.")
       .internal()
       .booleanConf

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -229,7 +229,7 @@ class Dataset[T] private[sql](
       case _ =>
         queryExecution.analyzed
     }
-    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) {
+    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
       plan.setTagValue(Dataset.DATASET_ID_TAG, id)
     }
     plan
@@ -1337,7 +1337,7 @@ class Dataset[T] private[sql](
   private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
     val newExpr = expr transform {
       case a: AttributeReference
-        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN) =>
+        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
         val metadata = new MetadataBuilder()
           .withMetadata(a.metadata)
           .putLong(Dataset.DATASET_ID_KEY, id)

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/...core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -71,7 +71,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) return plan
+    if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) return plan
 
     // We always remove the special metadata from `AttributeReference` at the end of this rule, so
     // Dataset column reference only exists in the root node via Dataset transformations like
@@ -149,7 +149,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
           "to figure out which one. Please alias the Datasets with different names via " +
           "`Dataset.as` before joining them, and specify the column using qualified name, e.g. " +
           """`df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set """ +
-          s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key} to false to disable this check.")
+          s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key} to false to disable this check.")
       }
     }
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -96,7 +96,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df1("id") > df2("id")` is always false.
       checkAnswer(df1.join(df2, df1("id") > df2("id")), Nil)
@@ -110,7 +110,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1("id") > df2("id")))
     }
@@ -121,7 +121,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1.colRegex("id") > df2.colRegex("id")))
     }
@@ -132,7 +132,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"a.b" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1("a.b") > df2("a.c")))
     }
@@ -143,7 +143,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df2("id")` actually points to the column of `df1`.
       checkAnswer(df1.join(df2).select(df2("id")), Seq(0, 0, 1, 1, 2, 2).map(Row(_)))
@@ -157,7 +157,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2).select(df2("id")))
     }
@@ -170,7 +170,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df4 = spark.range(1)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df2("id") < df3("id")` is always false
       checkAnswer(df1.join(df2).join(df3, df2("id") < df3("id")), Nil)
@@ -196,7 +196,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2).join(df3, df2("id") < df3("id")))
       assertAmbiguousSelfJoin(df1.join(df4).join(df2).select(df2("id")))