apache · jingz-db · Jan 29, 2024 · Jan 29, 2024 · Jan 30, 2024 · Feb 13, 2024
diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
@@ -3317,6 +3317,12 @@
     ],
     "sqlState" : "XXKST"
   },
+  "STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA" : {
+    "message" : [
+      "Streaming stateful operator name does not match with the operator in state metadata with the same operator id (id: <operatorId>). Stateful Operator name for current batch: <currentOperatorName>; Operator name in the state metadata: <stateMetadataOperatorName>."
+    ],
+    "sqlState" : "42K03"
+  },
   "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : {
     "message" : [
       "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
@@ -7443,4 +7449,4 @@
     ],
     "sqlState" : "P0001"
   }
-}
+},
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
@@ -2091,6 +2091,12 @@ The checkpoint seems to be only run with older Spark version(s). Run the streami
 
 Query [id = `<id>`, runId = `<runId>`] terminated with exception: `<message>`
 
+### STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA
+
+* [SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Streaming stateful operator name does not match with the operator in state metadata with the same operator id (id: `<operatorId>`). Stateful Operator name for current batch: `<currentOperatorName>`; Operator name in the state metadata: `<stateMetadataOperatorName>`.
+
 ### SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT
 
 [SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1702,6 +1702,19 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       new NoSuchElementException("State is either not defined or has already been removed")
   }
 
+  def statefulOperatorNotMatchInStateMetadataError(
+      operatorId: Long,
+      currentOperatorName: String, stateMetadataOperatorName: String):
+    SparkRuntimeException = {
+      new SparkRuntimeException(
+        errorClass = "STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA",
+        messageParameters = Map(
+          "operatorId" -> operatorId.toString,
+          "currentOperatorName" -> currentOperatorName,
+          "stateMetadataOperatorName" -> stateMetadataOperatorName)
+      )
+  }
+
   def cannotSetTimeoutDurationError(): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(errorClass = "_LEGACY_ERROR_TEMP_2203")
   }

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -29,12 +29,13 @@ import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, Express
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, MergingSessionsExec, ObjectHashAggregateExec, SortAggregateExec, UpdatingSessionsExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
 import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSourceV1
-import org.apache.spark.sql.execution.streaming.state.OperatorStateMetadataWriter
+import org.apache.spark.sql.execution.streaming.state.{OperatorStateMetadataReader, OperatorStateMetadataV1, OperatorStateMetadataWriter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.Utils
@@ -184,6 +185,41 @@ class IncrementalExecution(
     }
   }
 
+  /**
+   * Read from existing operator state metadata that contains the same operator id
+   * with current operator id. Check if they contains the same operator name.
+   * Throw errors if not match.
+   */
+  object checkOperatorInMetadata extends SparkPlanPartialRule {
+    override val rule: PartialFunction[SparkPlan, SparkPlan] = {
+      case stateStoreWriter: StateStoreWriter if isFirstBatch =>
+        val opId = stateStoreWriter.getStateInfo.operatorId
+        try {
+          val metadataPathToCheck = new Path(checkpointLocation, opId.toString)
+          logInfo("Reading from operator metadata, check if stateful operator with " +
+            "the same id from committed batch is the same operator of current stateful operator. " +
+            s"Stateful operator metadata path to check: ${metadataPathToCheck.toString}")
+          val operatorMetadata: OperatorStateMetadataV1 = new OperatorStateMetadataReader(
+            metadataPathToCheck, hadoopConf).read().asInstanceOf[OperatorStateMetadataV1]
+          val operatorInMetadata = operatorMetadata.operatorInfo.operatorName
+          if (operatorMetadata.operatorInfo.operatorName != stateStoreWriter.shortName) {
+            throw QueryExecutionErrors.statefulOperatorNotMatchInStateMetadataError(
+              opId, stateStoreWriter.shortName, operatorInMetadata)
+          }
+        } catch {
+          case e: java.io.FileNotFoundException =>
+            // no need to throw fatal error
+            logWarning("Error reading metadata path for stateful operator. " +
+              "This may due to no prior committed batch, or previously run on lower versions. " +
+              "Trying to read operator metadata for stateful operator " +
+              s"$opId: ${e.toString}")
+          case e: Exception =>
+            throw e
+        }
+        stateStoreWriter
+    }
+  }
+
   object WriteStatefulOperatorMetadataRule extends SparkPlanPartialRule {
     override val rule: PartialFunction[SparkPlan, SparkPlan] = {
       case stateStoreWriter: StateStoreWriter if isFirstBatch =>
@@ -389,6 +425,9 @@ class IncrementalExecution(
 
     override def apply(plan: SparkPlan): SparkPlan = {
       val planWithStateOpId = plan transform composedRule
+      // Check operator name, fail the query if operator changes;
+      // Will not change rule
+      planWithStateOpId transform checkOperatorInMetadata.rule
       // The rule doesn't change the plan but cause the side effect that metadata is written
       // in the checkpoint directory of stateful operator.
       planWithStateOpId transform WriteStatefulOperatorMetadataRule.rule

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -240,7 +240,7 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
   }
 
   /** Name to output in [[StreamingOperatorProgress]] to identify operator type */
-  protected def shortName: String = "defaultName"
+  def shortName: String = "defaultName"
 
   /**
    * Should the MicroBatchExecution run another batch based on this stateful operator and the

diff --git a/...est/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala b/...est/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.execution.streaming.state
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Append
 import org.apache.spark.sql.execution.datasources.v2.state.{StateDataSourceUnspecifiedRequiredOption, StateSourceOptions}
-import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MemoryStream}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
-import org.apache.spark.sql.streaming.OutputMode.Complete
+import org.apache.spark.sql.streaming.OutputMode.{Complete, Update}
 import org.apache.spark.sql.test.SharedSparkSession
 
 
@@ -215,4 +217,93 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
     checkError(exc, "STDS_REQUIRED_OPTION_UNSPECIFIED", "42601",
       Map("optionName" -> StateSourceOptions.PATH))
   }
+
+  test("Operator metadata path non-existence should not fail query") {
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val aggregated =
+        inputData.toDF()
+          .groupBy($"value")
+          .agg(count("*"))
+          .as[(Int, Long)]
+
+      testStream(aggregated, Complete)(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 3),
+        CheckLastBatch((3, 1)),
+        StopStream
+      )
+
+      // Delete operator metadata path
+      val metadataPath = new Path(checkpointDir.toString, s"state/0/_metadata/metadata")
+      val fm = CheckpointFileManager.create(new Path(checkpointDir.getCanonicalPath), hadoopConf)
+      fm.delete(metadataPath)
+
+      // Restart the query
+      testStream(aggregated, Complete)(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 3),
+        CheckLastBatch((3, 2)),
+        StopStream
+      )
+    }
+  }
+
+  test("Restarting query - " +
+    "checking operator name of the same operator id is the same in the metadata") {
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val stream = inputData.toDF().withColumn("eventTime", timestamp_seconds($"value"))
+
+      testStream(stream
+        .withWatermark("eventTime", "10 seconds")
+        .dropDuplicatesWithinWatermark())(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 1),
+        ProcessAllAvailable(),
+        StopStream
+      )
+
+      def checkOpChangeError(opName: String, ex: Throwable): Unit = {
+        checkError(ex.asInstanceOf[SparkRuntimeException],
+          "STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA", "42K03",
+          Map("operatorId" -> 0.toString,
+            "currentOperatorName" -> opName,
+            "stateMetadataOperatorName" -> "dedupeWithinWatermark")
+        )
+      }
+
+      testStream(stream.dropDuplicates(), Append)(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 2),
+        ExpectFailure[SparkRuntimeException] {
+          (t: Throwable) => {
+            checkOpChangeError("dedupe", t)
+          }
+        }
+      )
+
+      testStream(stream.groupBy("value").count(), Update)(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 3),
+        ExpectFailure[SparkRuntimeException] {
+          (t: Throwable) => {
+            checkOpChangeError("stateStoreSave", t)
+          }
+        }
+      )
+
+      testStream(stream
+        .groupBy(session_window($"eventTime", "10 seconds").as("session"), $"value")
+        .agg(count("*").as("numEvents")), Complete)(
+        StartStream(checkpointLocation = checkpointDir.toString),
+        AddData(inputData, 3),
+        ExpectFailure[SparkRuntimeException] {
+          (t: Throwable) => {
+            checkOpChangeError("sessionWindowStateStoreSaveExec", t)
+          }
+        }
+      )
+    }
+  }
 }