repro step: * brand new version 9 table with MDT off, load data
- then set MDT on, run dml with where filter on any columns
25/09/09 17:22:45 WARN HoodieTableFileSystemView: Partition: record_index is not available in store java.lang.IllegalStateException: Partition record_index should be part of inflight metadata partitions here [] at org.apache.hudi.common.util.ValidationUtils.checkState(ValidationUtils.java:78) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$tagRecordsWithLocationForStreamingWrites$27(HoodieBackedTableMetadataWriter.java:1340) at java.base/java.lang.Iterable.forEach(Iterable.java:75) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.tagRecordsWithLocationForStreamingWrites(HoodieBackedTableMetadataWriter.java:1334) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.streamWriteToMetadataPartitions(HoodieBackedTableMetadataWriter.java:1240) at org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:92) at org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:57) at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:111) at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1000) at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:545) at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:195) at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:213) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:132) at org.apache.spark.sql.hudi.command.DeleteHoodieTableCommand.run(DeleteHoodieTableCommand.scala:52) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437) at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98) at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85) at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83) at org.apache.spark.sql.Dataset.(Dataset.scala:220) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744) ... 47 elided
reproducible with hudi OSS git hash a749d6b
JIRA info
repro step: * brand new version 9 table with MDT off, load data
25/09/09 17:22:45 WARN HoodieTableFileSystemView: Partition: record_index is not available in store java.lang.IllegalStateException: Partition record_index should be part of inflight metadata partitions here [] at org.apache.hudi.common.util.ValidationUtils.checkState(ValidationUtils.java:78) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$tagRecordsWithLocationForStreamingWrites$27(HoodieBackedTableMetadataWriter.java:1340) at java.base/java.lang.Iterable.forEach(Iterable.java:75) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.tagRecordsWithLocationForStreamingWrites(HoodieBackedTableMetadataWriter.java:1334) at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.streamWriteToMetadataPartitions(HoodieBackedTableMetadataWriter.java:1240) at org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:92) at org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:57) at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:111) at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1000) at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:545) at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:195) at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:213) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:132) at org.apache.spark.sql.hudi.command.DeleteHoodieTableCommand.run(DeleteHoodieTableCommand.scala:52) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437) at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98) at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85) at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83) at org.apache.spark.sql.Dataset.(Dataset.scala:220) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744) ... 47 elided
reproducible with hudi OSS git hash a749d6b
JIRA info