diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index 0c8ecbab48ecb..68fb42da71033 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -28,9 +28,11 @@ import org.apache.spark.sql.catalyst.expressions.SubExprUtils.wrapOuterReference import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.catalyst.trees.TreePattern._ +import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.NullType trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { @@ -180,7 +182,19 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { field } if (newChild.resolved) { - ExtractValue(child = newChild, extraction = resolvedField, resolver = resolver) + // applyOrNull propagates NULL when the base is NullType instead of throwing + // INVALID_EXTRACT_BASE_FIELD_TYPE, consistent with multipart field access (col.a). + val extracted = ExtractValue.applyOrNull( + child = newChild, extraction = resolvedField, resolver = resolver) + // A NullType base yields a bare NULL literal, which would otherwise produce an output + // column named `NULL`. Alias it with the extraction's text (e.g. `col[0]`) to keep a + // stable column name; CleanupAliases later trims this alias where it's not a top-level + // projection output. + if (newChild.dataType == NullType) { + Alias(extracted, toPrettySQL(u.copy(child = newChild, extraction = resolvedField)))() + } else { + extracted + } } else { u.copy(child = newChild, extraction = resolvedField) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala index 93d53d9f33a64..aa379a63a2af9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala @@ -290,8 +290,9 @@ object TableOutputResolver extends SQLConfHelper with Logging { * that exceed the column length are caught at runtime. Uses `getRawType` so it works for both * V1 and V2 tables. Shared by the by-name and by-position default-fill paths. * - * `applyColumnMetadata` strips the default's outer alias and re-wraps it with the required - * metadata, so the length check is applied to the default value itself (the alias child). + * We unwrap the default's outer alias before the length check so the check wraps the + * default value itself, not the alias; `applyColumnMetadata` then re-adds the required + * alias and metadata afterward. */ private def applyDefaultWithLengthCheck( defaultExpr: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 022e130ec3a5e..2f9c1e2dd704b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -52,6 +52,25 @@ object ExtractValue { } } + /** + * Resolution-time variant of [[apply]]: extracting a field/element/key from a NULL (`NullType`) + * base yields NULL (SQL NULL propagation) instead of throwing `INVALID_EXTRACT_BASE_FIELD_TYPE`. + * A `NullType` column can arise e.g. from schema evolution with missing columns. This is used by + * the user-facing extraction resolution sites (multipart name resolution and + * `UnresolvedExtractValue` resolution). `extractValue` itself is left unchanged, so the other + * direct consumers keep their prior (throwing) behavior. + */ + def applyOrNull( + child: Expression, + extraction: Expression, + resolver: Resolver): Expression = { + if (child.dataType == NullType) { + Literal(null, NullType) + } else { + apply(child, extraction, resolver) + } + } + /** * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`, * depend on the type of `child` and `extraction`. @@ -119,13 +138,21 @@ object ExtractValue { val withExtractedNestedFields = nestedFields .foldLeft(Some(attribute): Option[Expression]) { case (Some(expression), field) => - ExtractValue.extractValue( - child = expression, - extraction = Literal(field), - resolver = resolver - ) match { - case Left(e) => Some(e) - case Right(_) => None + // Extraction from a NULL (NullType) base propagates NULL rather than failing, matching + // the user-facing resolution sites (which use applyOrNull). Treating it as extractable + // here keeps the NullType candidate in single-pass NameScope candidate filtering so it + // resolves consistently with the legacy analyzer. + if (expression.dataType == NullType) { + Some(Literal(null, NullType)) + } else { + ExtractValue.extractValue( + child = expression, + extraction = Literal(field), + resolver = resolver + ) match { + case Left(e) => Some(e) + case Right(_) => None + } } case _ => None diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 114a43c34c040..a3008a949ec07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -396,7 +396,9 @@ package object expressions { // Then this will add ExtractValue("c", ExtractValue("b", a)), and alias the final // expression as "c". val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, name) => - ExtractValue(e, Literal(name), resolver) + // applyOrNull propagates NULL when the base is NullType (e.g. a NullType column from + // schema evolution) instead of throwing INVALID_EXTRACT_BASE_FIELD_TYPE. + ExtractValue.applyOrNull(e, Literal(name), resolver) } Seq(Alias(fieldExprs, nestedFields.last)()) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out new file mode 100644 index 0000000000000..3def19963fc08 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out @@ -0,0 +1,8 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT col.a FROM (SELECT null AS col) t +-- !query analysis +Project [null AS a#x] ++- SubqueryAlias t + +- Project [null AS col#x] + +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out index 9f34e1a6e4ea8..789f5bcb23629 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out @@ -38,3 +38,30 @@ DROP TABLE t1 -- !query analysis DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1 + + +-- !query +SELECT col.a FROM (SELECT null AS col) t +-- !query analysis +Project [null AS a#x] ++- SubqueryAlias t + +- Project [null AS col#x] + +- OneRowRelation + + +-- !query +SELECT col[0] FROM (SELECT null AS col) t +-- !query analysis +Project [null AS col[0]#x] ++- SubqueryAlias t + +- Project [null AS col#x] + +- OneRowRelation + + +-- !query +SELECT col['key'] FROM (SELECT null AS col) t +-- !query analysis +Project [null AS col[key]#x] ++- SubqueryAlias t + +- Project [null AS col#x] + +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out index cfcd9c5c42e28..11f5f948d8f69 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out @@ -500,3 +500,12 @@ Project [sum_val#x] +- Aggregate [col1#x], [(col1#x.nums[0] + col1#x.nums[1]) AS sum_val#x, col1#x] +- SubqueryAlias t +- LocalRelation [col1#x] + + +-- !query +SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1 +-- !query analysis +Filter (cast(null as int) = 1) ++- Aggregate [col1#x], [named_struct(a, 1) AS col1#x] + +- SubqueryAlias t + +- LocalRelation [col1#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql new file mode 100644 index 0000000000000..19d2154936f9c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql @@ -0,0 +1,10 @@ +-- SPARK-57186: multipart field access (col.a) on a NullType base propagates NULL under the +-- single-pass resolver as well, consistently with the legacy analyzer. Dual-running both analyzers +-- locks in that consistency (no HYBRID_ANALYZER_EXCEPTION). +-- The col[0]/col['key'] subscript forms are intentionally not covered here: the single-pass +-- resolver does not resolve subscript extraction (UnresolvedExtractValue) at all -- a pre-existing +-- limitation independent of NullType -- so they are exercised only under the legacy analyzer in +-- extract-value-resolution-edge-cases.sql. +--SET spark.sql.analyzer.singlePassResolver.dualRunWithLegacy=true + +SELECT col.a FROM (SELECT null AS col) t; diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql index 5a2784d542702..48ebdfc0a3fab 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql @@ -8,3 +8,13 @@ SELECT col1.a, a FROM t1 ORDER BY col1.a; SELECT split(col1, '-')[1] AS a FROM VALUES('a-b') ORDER BY split(col1, '-')[1]; DROP TABLE t1; + +-- SPARK-57186: extracting a field/element/key from a NullType base returns NULL instead of +-- throwing INVALID_EXTRACT_BASE_FIELD_TYPE (SQL NULL propagation; a NullType column can arise e.g. +-- from schema evolution with missing columns). This applies uniformly to dotted field access +-- (`col.a`) and the subscript forms (`col[0]`, `col['key']`), and is implemented at the +-- user-facing resolution sites (ExtractValue.applyOrNull) without changing the shared +-- ExtractValue.extractValue utility. +SELECT col.a FROM (SELECT null AS col) t; +SELECT col[0] FROM (SELECT null AS col) t; +SELECT col['key'] FROM (SELECT null AS col) t; diff --git a/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql b/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql index 1f53ca359fe13..de3f5c8cc43f2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql @@ -141,3 +141,10 @@ FROM VALUES (NAMED_STRUCT('nums', ARRAY(10, 20))) t (col1) GROUP BY col1 HAVING col1.nums[0] + col1.nums[1] > 25 ORDER BY col1.nums[0]; + +-- SPARK-57186: Alias type: Struct, Table column type: NullType (void). +-- Unlike the STRING/ARRAY/MAP input bases above, which throw INVALID_EXTRACT_BASE_FIELD_TYPE for +-- this shadowing pattern, a NullType input column that shadows the struct alias yields NULL +-- (NULL propagation). The HAVING predicate is therefore NULL and the row is filtered out, giving +-- an empty result. NullType is intentionally the one base type that does not error here. +SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1; diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out new file mode 100644 index 0000000000000..e410666845123 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out @@ -0,0 +1,7 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT col.a FROM (SELECT null AS col) t +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out index 0565edc99b95d..4d6cbb936d778 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out @@ -37,3 +37,27 @@ DROP TABLE t1 struct<> -- !query output + + +-- !query +SELECT col.a FROM (SELECT null AS col) t +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT col[0] FROM (SELECT null AS col) t +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT col['key'] FROM (SELECT null AS col) t +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out b/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out index f685076f9f30c..df08b3837b553 100644 --- a/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out @@ -427,3 +427,11 @@ ORDER BY col1.nums[0] struct -- !query output 30 + + +-- !query +SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1 +-- !query schema +struct> +-- !query output +