diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 323a7db9c7ad..65fa2414c9ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1868,7 +1868,14 @@ class Analyzer( // Only Project, Aggregate, CollectMetrics can host star expressions. case u @ (_: Project | _: Aggregate | _: CollectMetrics) => Try(s.expand(u.children.head, resolver)) match { - case Success(expanded) => expanded.map(wrapOuterReference) + case Success(expanded) => + expanded.map { + case alias: Alias => + alias.withNewChildren(Seq(wrapOuterReference(alias.child))) + .asInstanceOf[Alias] + case e => + Alias(wrapOuterReference(e), toPrettySQL(e))() + } case Failure(_) => throw e } // Do not use the outer plan to resolve the star expression diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 86718ee43431..90e3d8de829e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -225,12 +225,15 @@ class ResolveSubquerySuite extends AnalysisTest { test("SPARK-35618: lateral join with star expansion in functions") { val outerA = OuterReference(a.withQualifier(Seq("t1"))) val outerB = OuterReference(b.withQualifier(Seq("t1"))) + val aliasedOuterA = Alias(outerA, a.name)() + val aliasedOuterB = Alias(outerB, b.name)() val array = CreateArray(Seq(star("t1"))) val newArray = CreateArray(Seq(outerA, outerB)) + val aliasedNewArray = CreateArray(Seq(aliasedOuterA, aliasedOuterB)) checkAnalysis( lateralJoin(t1.as("t1"), t0.select(array)), LateralJoin(t1, - LateralSubquery(t0.select(newArray.as(newArray.sql)), Seq(a, b)), Inner, None) + LateralSubquery(t0.select(newArray.as(aliasedNewArray.sql)), Seq(a, b)), Inner, None) ) assertAnalysisErrorCondition( lateralJoin(t1.as("t1"), t0.select(Count(star("t1")))), diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 4362eeb09bf6..2e7063a5792b 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -507,7 +507,7 @@ Project [c1#x, c2#x, c3#x, c4#x, c5#x] +- LateralJoin lateral-subquery#x [c1#x && c2#x && c3#x && c4#x && c5#x], Inner : +- SubqueryAlias T : +- Project [c1#x AS c1#x, c2#x AS c2#x, c3#x AS c3#x, c4#x AS c4#x, c5#x AS c5#x] - : +- Project [outer(c1#x), outer(c2#x), outer(c3#x), outer(c4#x), outer(c5#x)] + : +- Project [outer(c1#x) AS c1#x, outer(c2#x) AS c2#x, outer(c3#x) AS c3#x, outer(c4#x) AS c4#x, outer(c5#x) AS c5#x] : +- OneRowRelation +- SubqueryAlias v1 +- View (`v1`, [c1#x, c2#x, c3#x, c4#x, c5#x]) @@ -522,8 +522,8 @@ SELECT T.* FROM v1, LATERAL (SELECT COALESCE(v1.*)) AS T(x) Project [x#x] +- LateralJoin lateral-subquery#x [c1#x && c2#x && c3#x && c4#x && c5#x], Inner : +- SubqueryAlias T - : +- Project [coalesce(outer(v1.c1), outer(v1.c2), outer(v1.c3), outer(v1.c4), outer(v1.c5))#x AS x#x] - : +- Project [coalesce(outer(c1#x), outer(c2#x), cast(outer(c3#x) as int), outer(c4#x), outer(c5#x)) AS coalesce(outer(v1.c1), outer(v1.c2), outer(v1.c3), outer(v1.c4), outer(v1.c5))#x] + : +- Project [coalesce(outer(v1.c1) AS c1, outer(v1.c2) AS c2, outer(v1.c3) AS c3, outer(v1.c4) AS c4, outer(v1.c5) AS c5)#x AS x#x] + : +- Project [coalesce(outer(c1#x), outer(c2#x), cast(outer(c3#x) as int), outer(c4#x), outer(c5#x)) AS coalesce(outer(v1.c1) AS c1, outer(v1.c2) AS c2, outer(v1.c3) AS c3, outer(v1.c4) AS c4, outer(v1.c5) AS c5)#x] : +- OneRowRelation +- SubqueryAlias v1 +- View (`v1`, [c1#x, c2#x, c3#x, c4#x, c5#x]) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 44512caf1def..9ca0517a60e8 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -1075,3 +1075,73 @@ Project [c1#x, c2#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL] +- View (`t1`, [c1#x, c2#x]) +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x] +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query analysis +Project [scalar-subquery#x [col1#x] AS scalarsubquery(col1)#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [outer(col1#x) AS col1#x] +: +- SubqueryAlias t2 +: +- LocalRelation [col1#x] ++- SubqueryAlias t1 + +- LocalRelation [col1#x] + + +-- !query +SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1) +FROM (SELECT named_struct('a', 1) AS s) AS t1 +-- !query analysis +Project [scalar-subquery#x [s#x] AS scalarsubquery(s)#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [outer(s#x).a AS a#x] +: +- SubqueryAlias t2 +: +- LocalRelation [col1#x] ++- SubqueryAlias t1 + +- Project [named_struct(a, 1) AS s#x] + +- OneRowRelation + + +-- !query +SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query analysis +Project [scalar-subquery#x [] AS scalarsubquery()#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [col1#x] +: +- SubqueryAlias t2 +: +- LocalRelation [col1#x] ++- SubqueryAlias t1 + +- LocalRelation [col1#x] + + +-- !query +SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query analysis +Project [scalar-subquery#x [] AS scalarsubquery()#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [col1#x] +: +- SubqueryAlias t1 +: +- Project [3 AS col1#x] +: +- OneRowRelation ++- SubqueryAlias t1 + +- LocalRelation [col1#x] + + +-- !query +SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1) +-- !query analysis +Project [scalar-subquery#x [col1#x] AS scalarsubquery(col1)#x] +: +- Project [col1#x] +: +- SubqueryAlias __auto_generated_subquery_name +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [outer(col1#x) AS col1#x] +: +- SubqueryAlias t2 +: +- LocalRelation [col1#x] ++- SubqueryAlias t1 + +- LocalRelation [col1#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index ef1e612fd744..b99913c68055 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -258,4 +258,20 @@ select * from ( where t.c2 is not null; -- SPARK-43838: Subquery on single table with having clause -SELECT c1, c2, (SELECT count(*) cnt FROM t1 t2 WHERE t1.c1 = t2.c1 HAVING cnt = 0) FROM t1 +SELECT c1, c2, (SELECT count(*) cnt FROM t1 t2 WHERE t1.c1 = t2.c1 HAVING cnt = 0) FROM t1; + +-- Outer star expansion in scalar subquery +SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1); + +-- Outer struct star expansion in scalar subquery +SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1) +FROM (SELECT named_struct('a', 1) AS s) AS t1; + +-- Untargeted star in subquery should NOT expand from outer scope +SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1); + +-- Inner scope wins when star target matches both inner and outer scope +SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1); + +-- Outer star expansion through a derived table wrapper +SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1); diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 85bd9137602a..14a89975a857 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -607,3 +607,44 @@ struct -- !query output 0 1 NULL 1 2 NULL + + +-- !query +SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1) +FROM (SELECT named_struct('a', 1) AS s) AS t1 +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1) +-- !query schema +struct +-- !query output +3 + + +-- !query +SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1) +-- !query schema +struct +-- !query output +1