test("BLOB column persists as BLOB in Hudi tableCreateSchema and allows INSERT INTO") {
withTempDir { tmp =>
val tableName = generateTableName
spark.sql(
s"""
|CREATE TABLE $tableName (
| id BIGINT,
| payload BLOB
|) USING hudi
|LOCATION '${tmp.getCanonicalPath}'
|TBLPROPERTIES (
| primaryKey = 'id'
|)
""".stripMargin)
val metaClient = createMetaClient(spark, tmp.getCanonicalPath)
val persistedOpt = metaClient.getTableConfig.getTableCreateSchema
assertTrue(persistedOpt.isPresent, "tableCreateSchema should be present")
val persisted = persistedOpt.get()
val payload = persisted.getField("payload").get().schema().getNonNullType()
assertEquals(HoodieSchemaType.BLOB, payload.getType)
spark.sql(
s"""
|INSERT INTO $tableName VALUES
| (1, named_struct(
| 'type', 'INLINE',
| 'data', cast(X'010203' as binary),
| 'reference', cast(null as struct<external_path:string,offset:bigint,length:bigint,managed:boolean>)))
""".stripMargin)
}
}
Failed to convert struct type to HoodieSchema: StructType(StructField(id,LongType,false),StructField(payload,StructType(StructField(type,StringType,false),StructField(data,BinaryType,false),StructField(reference,StructType(StructField(external_path,StringType,false),StructField(offset,LongType,true),StructField(length,LongType,true),StructField(managed,BooleanType,false)),true)),false))
org.apache.hudi.internal.schema.HoodieSchemaException: Failed to convert struct type to HoodieSchema: StructType(StructField(id,LongType,false),StructField(payload,StructType(StructField(type,StringType,false),StructField(data,BinaryType,false),StructField(reference,StructType(StructField(external_path,StringType,false),StructField(offset,LongType,true),StructField(length,LongType,true),StructField(managed,BooleanType,false)),true)),false))
at org.apache.hudi.HoodieSchemaConversionUtils$.convertStructTypeToHoodieSchema(HoodieSchemaConversionUtils.scala:144)
at org.apache.hudi.HoodieSchemaConversionUtils$.convertStructTypeToHoodieSchema(HoodieSchemaConversionUtils.scala:121)
at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:371)
at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:187)
at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:205)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:127)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand$.run(InsertIntoHoodieTableCommand.scala:122)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand.run(InsertIntoHoodieTableCommand.scala:73)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744)
at org.apache.spark.sql.hudi.ddl.TestCreateTable.$anonfun$new$132(TestCreateTable.scala:2346)
at org.apache.spark.sql.hudi.ddl.TestCreateTable.$anonfun$new$132$adapted(TestCreateTable.scala:2323)
at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.withTempDir(HoodieSparkSqlTestBase.scala:102)
at org.apache.spark.sql.hudi.ddl.TestCreateTable.$anonfun$new$131(TestCreateTable.scala:2323)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.$anonfun$test$1(HoodieSparkSqlTestBase.scala:114)
at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189)
at org.scalatest.TestSuite.withFixture(TestSuite.scala:196)
at org.scalatest.TestSuite.withFixture$(TestSuite.scala:195)
at org.scalatest.funsuite.AnyFunSuite.withFixture(AnyFunSuite.scala:1562)
at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199)
at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181)
at org.scalatest.funsuite.AnyFunSuite.runTest(AnyFunSuite.scala:1562)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232)
at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
at scala.collection.immutable.List.foreach(List.scala:431)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232)
at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231)
at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562)
at org.scalatest.Suite.run(Suite.scala:1112)
at org.scalatest.Suite.run$(Suite.scala:1094)
at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236)
at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236)
at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235)
at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.org$scalatest$BeforeAndAfterAll$$super$run(HoodieSparkSqlTestBase.scala:58)
at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.run(HoodieSparkSqlTestBase.scala:58)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1314)
at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1308)
at scala.collection.immutable.List.foreach(List.scala:431)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1308)
at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993)
at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1474)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971)
at org.scalatest.tools.Runner$.run(Runner.scala:798)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)
Caused by: java.lang.IllegalArgumentException: Invalid blob schema structure. Expected schema:
type STRING NOT NULL,data BINARY,reference STRUCT<external_path: STRING, offset: BIGINT, length: BIGINT, managed: BOOLEAN>
Got schema:
type STRING NOT NULL,data BINARY NOT NULL,reference STRUCT<external_path: STRING, offset: BIGINT, length: BIGINT, managed: BOOLEAN>
at org.apache.spark.sql.avro.HoodieSparkSchemaConverters$.validateBlobStructure(HoodieSparkSchemaConverters.scala:362)
at org.apache.spark.sql.avro.HoodieSparkSchemaConverters$.toHoodieTypeNested(HoodieSparkSchemaConverters.scala:142)
at org.apache.spark.sql.avro.HoodieSparkSchemaConverters$.$anonfun$toHoodieTypeNested$2(HoodieSparkSchemaConverters.scala:167)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at org.apache.spark.sql.types.StructType.map(StructType.scala:102)
at org.apache.spark.sql.avro.HoodieSparkSchemaConverters$.toHoodieTypeNested(HoodieSparkSchemaConverters.scala:166)
at org.apache.spark.sql.avro.HoodieSparkSchemaConverters$.toHoodieType(HoodieSparkSchemaConverters.scala:63)
at org.apache.hudi.HoodieSchemaConversionUtils$.convertStructTypeToHoodieSchema(HoodieSchemaConversionUtils.scala:139)
... 94 more
Bug Description
What happened:
What you expected:
Steps to reproduce:
Environment
Hudi version:
Query engine: (Spark/Flink/Trino etc)
Relevant configs:
Logs and Stack Trace