apache · shrirangmhalgi · May 22, 2026 · cloud-fan · May 22, 2026
diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/...core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Cast, Equality, Expression, ExprId}
-import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.classic.Dataset
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -153,9 +153,29 @@ object DetectAmbiguousSelfJoin extends Rule[LogicalPlan] {
           }
           condition.toSeq.flatMap(getAmbiguousAttrs)
 
-        case _ => ambiguousColRefs.toSeq.map { ref =>
-          colRefAttrs.find(attr => toColumnReference(attr) == ref).get
-        }
+        case _ =>
+          // SPARK-52498: For a Project on top of a self-join with a foldable join condition
+          // (e.g., df.join(df, df("col") === 0).select(df("col"))), the column references
+          // in the select are not ambiguous because the foldable condition means it doesn't
+          // matter which side the column comes from.
+          val isProjectOverFoldableSelfJoin = plan match {
+            case Project(_, Join(
+                LogicalPlanWithDatasetId(_, leftId),
+                LogicalPlanWithDatasetId(_, rightId),
+                _, Some(condition), _)) if leftId == rightId =>
+              condition.collectFirst {
+                case Equality(_, b) if b.foldable => true
+                case Equality(a, _) if a.foldable => true
+              }.isDefined
+            case _ => false
+          }
+          if (isProjectOverFoldableSelfJoin) {
+            Nil
+          } else {
+            ambiguousColRefs.toSeq.map { ref =>
+              colRefAttrs.find(attr => toColumnReference(attr) == ref).get
+            }
+          }
       }
 
       if (ambiguousAttrs.nonEmpty) {

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -553,4 +553,18 @@ class DataFrameSelfJoinSuite extends SharedSparkSession {
           .count() == 1)
     }
   }
+
+  test("SPARK-52498: self join with foldable condition and select should not be ambiguous") {
+    withSQLConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true") {
+      val df1 = spark.range(3).toDF("col1")
+
+      // Self-join with foldable condition + select from one side
+      val df2 = df1.join(df1, df1("col1") === 0).select(df1("col1"))
+      df2.queryExecution.analyzed
+
+      // Multi-layer self-join
+      val df3 = df2.join(df2, df2("col1") === 0).select(df2("col1"))
+      df3.queryExecution.analyzed
+    }
+  }
 }