apache · dejankrak-db · May 31, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/...talyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/...talyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -30,7 +30,9 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
+import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.NullType
 
 trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
@@ -180,7 +182,19 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
             field
           }
           if (newChild.resolved) {
-            ExtractValue(child = newChild, extraction = resolvedField, resolver = resolver)
+            // applyOrNull propagates NULL when the base is NullType instead of throwing
+            // INVALID_EXTRACT_BASE_FIELD_TYPE, consistent with multipart field access (col.a).
+            val extracted = ExtractValue.applyOrNull(
+              child = newChild, extraction = resolvedField, resolver = resolver)
+            // A NullType base yields a bare NULL literal, which would otherwise produce an output
+            // column named `NULL`. Alias it with the extraction's text (e.g. `col[0]`) to keep a
+            // stable column name; CleanupAliases later trims this alias where it's not a top-level
+            // projection output.
+            if (newChild.dataType == NullType) {
+              Alias(extracted, toPrettySQL(u.copy(child = newChild, extraction = resolvedField)))()
+            } else {
+              extracted
+            }
           } else {
             u.copy(child = newChild, extraction = resolvedField)
           }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -290,8 +290,9 @@ object TableOutputResolver extends SQLConfHelper with Logging {
    * that exceed the column length are caught at runtime. Uses `getRawType` so it works for both
    * V1 and V2 tables. Shared by the by-name and by-position default-fill paths.
    *
-   * `applyColumnMetadata` strips the default's outer alias and re-wraps it with the required
-   * metadata, so the length check is applied to the default value itself (the alias child).
+   * We unwrap the default's outer alias before the length check so the check wraps the
+   * default value itself, not the alias; `applyColumnMetadata` then re-adds the required
+   * alias and metadata afterward.
    */
   private def applyDefaultWithLengthCheck(
       defaultExpr: Expression,

diff --git a/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -52,6 +52,25 @@ object ExtractValue {
     }
   }
 
+  /**
+   * Resolution-time variant of [[apply]]: extracting a field/element/key from a NULL (`NullType`)
+   * base yields NULL (SQL NULL propagation) instead of throwing `INVALID_EXTRACT_BASE_FIELD_TYPE`.
+   * A `NullType` column can arise e.g. from schema evolution with missing columns. This is used by
+   * the user-facing extraction resolution sites (multipart name resolution and
+   * `UnresolvedExtractValue` resolution). `extractValue` itself is left unchanged, so the other
+   * direct consumers keep their prior (throwing) behavior.
+   */
+  def applyOrNull(
+      child: Expression,
+      extraction: Expression,
+      resolver: Resolver): Expression = {
+    if (child.dataType == NullType) {
+      Literal(null, NullType)
+    } else {
+      apply(child, extraction, resolver)
+    }
+  }
+
   /**
    * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
    * depend on the type of `child` and `extraction`.
@@ -119,13 +138,21 @@ object ExtractValue {
     val withExtractedNestedFields = nestedFields
       .foldLeft(Some(attribute): Option[Expression]) {
         case (Some(expression), field) =>
-          ExtractValue.extractValue(
-            child = expression,
-            extraction = Literal(field),
-            resolver = resolver
-          ) match {
-            case Left(e) => Some(e)
-            case Right(_) => None
+          // Extraction from a NULL (NullType) base propagates NULL rather than failing, matching
+          // the user-facing resolution sites (which use applyOrNull). Treating it as extractable
+          // here keeps the NullType candidate in single-pass NameScope candidate filtering so it
+          // resolves consistently with the legacy analyzer.
+          if (expression.dataType == NullType) {
+            Some(Literal(null, NullType))
+          } else {
+            ExtractValue.extractValue(
+              child = expression,
+              extraction = Literal(field),
+              resolver = resolver
+            ) match {
+              case Left(e) => Some(e)
+              case Right(_) => None
+            }
           }
         case _ =>
           None

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -396,7 +396,9 @@ package object expressions  {
           // Then this will add ExtractValue("c", ExtractValue("b", a)), and alias the final
           // expression as "c".
           val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, name) =>
-            ExtractValue(e, Literal(name), resolver)
+            // applyOrNull propagates NULL when the base is NullType (e.g. a NullType column from
+            // schema evolution) instead of throwing INVALID_EXTRACT_BASE_FIELD_TYPE.
+            ExtractValue.applyOrNull(e, Literal(name), resolver)
           }
           Seq(Alias(fieldExprs, nestedFields.last)())
 

diff --git a/.../src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out b/.../src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out
@@ -0,0 +1,8 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS a#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
diff --git a/...src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out b/...src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out
@@ -38,3 +38,30 @@ DROP TABLE t1
 -- !query analysis
 DropTable false, false
 +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
+
+
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS a#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT col[0] FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS col[0]#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT col['key'] FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS col[key]#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
diff --git a/...ces/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out b/...ces/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out
@@ -500,3 +500,12 @@ Project [sum_val#x]
       +- Aggregate [col1#x], [(col1#x.nums[0] + col1#x.nums[1]) AS sum_val#x, col1#x]
          +- SubqueryAlias t
             +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1
+-- !query analysis
+Filter (cast(null as int) = 1)
++- Aggregate [col1#x], [named_struct(a, 1) AS col1#x]
+   +- SubqueryAlias t
+      +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql
@@ -0,0 +1,10 @@
+-- SPARK-57186: multipart field access (col.a) on a NullType base propagates NULL under the
+-- single-pass resolver as well, consistently with the legacy analyzer. Dual-running both analyzers
+-- locks in that consistency (no HYBRID_ANALYZER_EXCEPTION).
+-- The col[0]/col['key'] subscript forms are intentionally not covered here: the single-pass
+-- resolver does not resolve subscript extraction (UnresolvedExtractValue) at all -- a pre-existing
+-- limitation independent of NullType -- so they are exercised only under the legacy analyzer in
+-- extract-value-resolution-edge-cases.sql.
+--SET spark.sql.analyzer.singlePassResolver.dualRunWithLegacy=true
+
+SELECT col.a FROM (SELECT null AS col) t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql
@@ -8,3 +8,13 @@ SELECT col1.a, a FROM t1 ORDER BY col1.a;
 SELECT split(col1, '-')[1] AS a FROM VALUES('a-b') ORDER BY split(col1, '-')[1];
 
 DROP TABLE t1;
+
+-- SPARK-57186: extracting a field/element/key from a NullType base returns NULL instead of
+-- throwing INVALID_EXTRACT_BASE_FIELD_TYPE (SQL NULL propagation; a NullType column can arise e.g.
+-- from schema evolution with missing columns). This applies uniformly to dotted field access
+-- (`col.a`) and the subscript forms (`col[0]`, `col['key']`), and is implemented at the
+-- user-facing resolution sites (ExtractValue.applyOrNull) without changing the shared
+-- ExtractValue.extractValue utility.
+SELECT col.a FROM (SELECT null AS col) t;
+SELECT col[0] FROM (SELECT null AS col) t;
+SELECT col['key'] FROM (SELECT null AS col) t;
diff --git a/...rc/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql b/...rc/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql
@@ -141,3 +141,10 @@ FROM VALUES (NAMED_STRUCT('nums', ARRAY(10, 20))) t (col1)
 GROUP BY col1
 HAVING col1.nums[0] + col1.nums[1] > 25
 ORDER BY col1.nums[0];
+
+-- SPARK-57186: Alias type: Struct, Table column type: NullType (void).
+-- Unlike the STRING/ARRAY/MAP input bases above, which throw INVALID_EXTRACT_BASE_FIELD_TYPE for
+-- this shadowing pattern, a NullType input column that shadows the struct alias yields NULL
+-- (NULL propagation). The HAVING predicate is therefore NULL and the row is filtered out, giving
+-- an empty result. NullType is intentionally the one base type that does not error here.
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out
@@ -0,0 +1,7 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query schema
+struct<a:void>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out
@@ -37,3 +37,27 @@ DROP TABLE t1
 struct<>
 -- !query output
 
+
+
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query schema
+struct<a:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT col[0] FROM (SELECT null AS col) t
+-- !query schema
+struct<col[0]:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT col['key'] FROM (SELECT null AS col) t
+-- !query schema
+struct<col[key]:void>
+-- !query output
+NULL
diff --git a/...st/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out b/...st/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out
@@ -427,3 +427,11 @@ ORDER BY col1.nums[0]
 struct<sum_val:int>
 -- !query output
 30
+
+
+-- !query
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1
+-- !query schema
+struct<col1:struct<a:int>>
+-- !query output
+