Skip to content

Variant on Lance fileformat failing #18596

@voonhous

Description

@voonhous

Bug Description

What happened:

What you expected:

Steps to reproduce:

test("Test Query Log Only MOR Table With VARIANT column triggers compaction (Lance)") {
    assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher")
    assume(System.getProperty("lance.skip.tests") != "true",
      "Lance tests disabled via -Dlance.skip.tests=true")
    // Lance writer has no VARIANT handling today (RFC-100 Phase 2). Expected to fail
    // until support lands in HoodieSparkLanceWriter; this test pins the gap.

    withRecordType()(withTempDir { tmp =>
      val tableName = generateTableName
      spark.sql(
        s"""
           |create table $tableName (
           |  id int,
           |  v variant,
           |  ts long
           |) using hudi
           | location '${tmp.getCanonicalPath}'
           | tblproperties (
           |  primaryKey = 'id',
           |  type = 'mor',
           |  preCombineField = 'ts',
           |  hoodie.index.type = 'INMEMORY',
           |  hoodie.compact.inline = 'true',
           |  hoodie.clean.commits.retained = '1',
           |  'hoodie.table.base.file.format' = 'LANCE'
           | )
       """.stripMargin)

      // Verify the LANCE config was actually persisted to hoodie.properties.
      assertResult(HoodieFileFormat.LANCE)(
        createMetaClient(spark, tmp.getCanonicalPath).getTableConfig.getBaseFileFormat)

      spark.sql(
        s"insert into $tableName values " +
          "(1, parse_json('{\"key\":\"value1\"}'), 1000)")
      spark.sql(
        s"insert into $tableName values " +
          "(2, parse_json('{\"key\":\"value2\"}'), 1000)")
      spark.sql(
        s"insert into $tableName values " +
          "(3, parse_json('{\"key\":\"value3\"}'), 1000)")
      assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath))
      checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")(
        Seq(1, "{\"key\":\"value1\"}", 1000),
        Seq(2, "{\"key\":\"value2\"}", 1000),
        Seq(3, "{\"key\":\"value3\"}", 1000)
      )

      spark.sql(
        s"""
           |merge into $tableName h0
           |using (
           |  select 1 as id,
           |         parse_json('{"key":"v1-merged"}') as v,
           |         1001L as ts
           |) s0
           | on h0.id = s0.id
           | when matched then update set *
           |""".stripMargin)
      assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath))
      checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")(
        Seq(1, "{\"key\":\"v1-merged\"}", 1001),
        Seq(2, "{\"key\":\"value2\"}", 1000),
        Seq(3, "{\"key\":\"value3\"}", 1000)
      )

      spark.sql(
        s"""
           |merge into $tableName h0
           |using (
           |  select 4 as id,
           |         parse_json('{"key":"value4"}') as v,
           |         1000L as ts
           |) s0
           | on h0.id = s0.id
           | when not matched then insert *
           |""".stripMargin)

      assertResult(false)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath))
      checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")(
        Seq(1, "{\"key\":\"v1-merged\"}", 1001),
        Seq(2, "{\"key\":\"value2\"}", 1000),
        Seq(3, "{\"key\":\"value3\"}", 1000),
        Seq(4, "{\"key\":\"value4\"}", 1000)
      )

      val variantField = spark.table(tableName).schema.find(_.name == "v").get
      assertResult("variant")(variantField.dataType.typeName)

      // 6th commit drives an auto-clean that retires the now-superseded log-only slice.
      spark.sql(
        s"""
           |merge into $tableName h0
           |using (
           |  select 2 as id,
           |         parse_json('{"key":"v2-merged"}') as v,
           |         1002L as ts
           |) s0
           | on h0.id = s0.id
           | when matched then update set *
           |""".stripMargin)
      checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")(
        Seq(1, "{\"key\":\"v1-merged\"}", 1001),
        Seq(2, "{\"key\":\"v2-merged\"}", 1002),
        Seq(3, "{\"key\":\"value3\"}", 1000),
        Seq(4, "{\"key\":\"value4\"}", 1000)
      )

      val metaClient = createMetaClient(spark, tmp.getCanonicalPath)
      metaClient.reloadActiveTimeline()
      assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0,
        "Expected at least one .clean instant on the timeline after compaction")
    })
  }

Environment

Hudi version:
Query engine: (Spark/Flink/Trino etc)
Relevant configs:

Logs and Stack Trace

Job aborted due to stage failure: Task 0 in stage 85.0 failed 1 times, most recent failure: Lost task 0.0 in stage 85.0 (TID 153) (192.168.1.192 executor driver): org.apache.hudi.exception.HoodieException: Could not instantiate the HoodieMergeHandle implementation: org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:159)
	at org.apache.hudi.io.HoodieMergeHandleFactory.create(HoodieMergeHandleFactory.java:132)
	at org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:160)
	at org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compact$da19f3d2$1(HoodieCompactor.java:145)
	at org.apache.spark.api.java.JavaPairRDD$.$anonfun$toScalaFunction$1(JavaPairRDD.scala:1072)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:232)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:370)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1651)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1560)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1625)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1424)
	at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1378)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:386)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:336)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:107)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
	at org.apache.spark.scheduler.Task.run(Task.scala:147)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate class org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:148)
	... 31 more
Caused by: java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:481)
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:71)
	... 32 more
Caused by: org.apache.spark.SparkUnsupportedOperationException: [UNSUPPORTED_DATATYPE] Unsupported data type "VARIANT". SQLSTATE: 0A000
	at org.apache.spark.sql.util.LanceArrowUtils$.unsupportedDataTypeError(LanceArrowUtils.scala:330)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowType(LanceArrowUtils.scala:278)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowField(LanceArrowUtils.scala:239)
	at org.apache.spark.sql.util.LanceArrowUtils$.$anonfun$toArrowSchema$1(LanceArrowUtils.scala:166)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.immutable.List.prependedAll(List.scala:156)
	at scala.collection.immutable.List$.from(List.scala:685)
	at scala.collection.immutable.List$.from(List.scala:682)
	at scala.collection.SeqFactory$Delegate.from(Factory.scala:306)
	at scala.collection.immutable.Seq$.from(Seq.scala:42)
	at scala.collection.immutable.Seq$.from(Seq.scala:39)
	at scala.collection.IterableOps.map(Iterable.scala:684)
	at scala.collection.IterableOps.map$(Iterable.scala:684)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:105)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowSchema(LanceArrowUtils.scala:160)
	at org.apache.spark.sql.util.LanceArrowUtils.toArrowSchema(LanceArrowUtils.scala)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.<init>(HoodieSparkLanceWriter.java:131)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.create(HoodieSparkLanceWriter.java:108)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter$HoodieSparkLanceWriterBuilder.build(HoodieSparkLanceWriter.java:97)
	at org.apache.hudi.io.storage.HoodieSparkFileWriterFactory.newLanceFileWriter(HoodieSparkFileWriterFactory.java:133)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:76)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:53)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.init(FileGroupReaderBasedMergeHandle.java:234)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.<init>(FileGroupReaderBasedMergeHandle.java:166)
	... 38 more

Driver stacktrace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 85.0 failed 1 times, most recent failure: Lost task 0.0 in stage 85.0 (TID 153) (192.168.1.192 executor driver): org.apache.hudi.exception.HoodieException: Could not instantiate the HoodieMergeHandle implementation: org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:159)
	at org.apache.hudi.io.HoodieMergeHandleFactory.create(HoodieMergeHandleFactory.java:132)
	at org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:160)
	at org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compact$da19f3d2$1(HoodieCompactor.java:145)
	at org.apache.spark.api.java.JavaPairRDD$.$anonfun$toScalaFunction$1(JavaPairRDD.scala:1072)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:232)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:370)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1651)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1560)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1625)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1424)
	at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1378)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:386)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:336)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:107)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
	at org.apache.spark.scheduler.Task.run(Task.scala:147)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate class org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:148)
	... 31 more
Caused by: java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:481)
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:71)
	... 32 more
Caused by: org.apache.spark.SparkUnsupportedOperationException: [UNSUPPORTED_DATATYPE] Unsupported data type "VARIANT". SQLSTATE: 0A000
	at org.apache.spark.sql.util.LanceArrowUtils$.unsupportedDataTypeError(LanceArrowUtils.scala:330)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowType(LanceArrowUtils.scala:278)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowField(LanceArrowUtils.scala:239)
	at org.apache.spark.sql.util.LanceArrowUtils$.$anonfun$toArrowSchema$1(LanceArrowUtils.scala:166)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.immutable.List.prependedAll(List.scala:156)
	at scala.collection.immutable.List$.from(List.scala:685)
	at scala.collection.immutable.List$.from(List.scala:682)
	at scala.collection.SeqFactory$Delegate.from(Factory.scala:306)
	at scala.collection.immutable.Seq$.from(Seq.scala:42)
	at scala.collection.immutable.Seq$.from(Seq.scala:39)
	at scala.collection.IterableOps.map(Iterable.scala:684)
	at scala.collection.IterableOps.map$(Iterable.scala:684)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:105)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowSchema(LanceArrowUtils.scala:160)
	at org.apache.spark.sql.util.LanceArrowUtils.toArrowSchema(LanceArrowUtils.scala)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.<init>(HoodieSparkLanceWriter.java:131)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.create(HoodieSparkLanceWriter.java:108)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter$HoodieSparkLanceWriterBuilder.build(HoodieSparkLanceWriter.java:97)
	at org.apache.hudi.io.storage.HoodieSparkFileWriterFactory.newLanceFileWriter(HoodieSparkFileWriterFactory.java:133)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:76)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:53)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.init(FileGroupReaderBasedMergeHandle.java:234)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.<init>(FileGroupReaderBasedMergeHandle.java:166)
	... 38 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
	at scala.Option.getOrElse(Option.scala:201)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
	at scala.Option.foreach(Option.scala:437)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1009)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2484)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2505)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2524)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2549)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1057)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:417)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1056)
	at org.apache.spark.api.java.JavaRDDLike.collect(JavaRDDLike.scala:363)
	at org.apache.spark.api.java.JavaRDDLike.collect$(JavaRDDLike.scala:362)
	at org.apache.spark.api.java.AbstractJavaRDDLike.collect(JavaRDDLike.scala:46)
	at org.apache.hudi.client.SparkRDDWriteClient$SlimWriteStats.from(SparkRDDWriteClient.java:441)
	at org.apache.hudi.client.SparkRDDTableServiceClient.triggerWritesAndFetchWriteStats(SparkRDDTableServiceClient.java:72)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.commitCompaction(BaseHoodieTableServiceClient.java:377)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.compact(BaseHoodieTableServiceClient.java:355)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.compact(BaseHoodieTableServiceClient.java:314)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.lambda$inlineCompaction$0(BaseHoodieTableServiceClient.java:201)
	at org.apache.hudi.common.util.Option.ifPresent(Option.java:101)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.inlineCompaction(BaseHoodieTableServiceClient.java:199)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.inlineCompaction(BaseHoodieTableServiceClient.java:211)
	at org.apache.hudi.client.BaseHoodieTableServiceClient.runTableServicesInline(BaseHoodieTableServiceClient.java:658)
	at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInlineInternal(BaseHoodieWriteClient.java:711)
	at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:698)
	at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:290)
	at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:148)
	at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1005)
	at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:560)
	at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:187)
	at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:205)
	at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:127)
	at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.executeUpsert(MergeIntoHoodieTableCommand.scala:510)
	at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.run(MergeIntoHoodieTableCommand.scala:278)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:79)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:77)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:88)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:272)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:125)
	at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
	at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
	at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
	at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:295)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:124)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:237)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
	at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
	at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
	at scala.util.Try$.apply(Try.scala:217)
	at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
	at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1439)
	at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:131)
	at org.apache.spark.sql.classic.Dataset.<init>(Dataset.scala:277)
	at org.apache.spark.sql.classic.Dataset$.$anonfun$ofRows$5(Dataset.scala:140)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.classic.Dataset$.ofRows(Dataset.scala:136)
	at org.apache.spark.sql.classic.SparkSession.$anonfun$sql$4(SparkSession.scala:499)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:490)
	at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:504)
	at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:513)
	at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:91)
	at org.apache.spark.sql.hudi.dml.schema.TestVariantDataType.$anonfun$new$11(TestVariantDataType.scala:305)
	at org.apache.spark.sql.hudi.dml.schema.TestVariantDataType.$anonfun$new$11$adapted(TestVariantDataType.scala:244)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.withTempDir(HoodieSparkSqlTestBase.scala:102)
	at org.apache.spark.sql.hudi.dml.schema.TestVariantDataType.$anonfun$new$10(TestVariantDataType.scala:244)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.$anonfun$withRecordType$3(HoodieSparkSqlTestBase.scala:375)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.withSQLConf(HoodieSparkSqlTestBase.scala:326)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.$anonfun$withRecordType$1(HoodieSparkSqlTestBase.scala:374)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.$anonfun$withRecordType$1$adapted(HoodieSparkSqlTestBase.scala:366)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.withRecordType(HoodieSparkSqlTestBase.scala:366)
	at org.apache.spark.sql.hudi.dml.schema.TestVariantDataType.$anonfun$new$9(TestVariantDataType.scala:244)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.$anonfun$test$1(HoodieSparkSqlTestBase.scala:114)
	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189)
	at org.scalatest.TestSuite.withFixture(TestSuite.scala:196)
	at org.scalatest.TestSuite.withFixture$(TestSuite.scala:195)
	at org.scalatest.funsuite.AnyFunSuite.withFixture(AnyFunSuite.scala:1562)
	at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199)
	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181)
	at org.scalatest.funsuite.AnyFunSuite.runTest(AnyFunSuite.scala:1562)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232)
	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231)
	at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562)
	at org.scalatest.Suite.run(Suite.scala:1112)
	at org.scalatest.Suite.run$(Suite.scala:1094)
	at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236)
	at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
	at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236)
	at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.org$scalatest$BeforeAndAfterAll$$super$run(HoodieSparkSqlTestBase.scala:58)
	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
	at org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.run(HoodieSparkSqlTestBase.scala:58)
	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1320)
	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1314)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1314)
	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993)
	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971)
	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1480)
	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971)
	at org.scalatest.tools.Runner$.run(Runner.scala:798)
	at org.scalatest.tools.Runner.run(Runner.scala)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)
	Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
		at scala.Option.getOrElse(Option.scala:201)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
		at scala.collection.immutable.List.foreach(List.scala:334)
		at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
		at scala.Option.foreach(Option.scala:437)
		at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
		at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
		at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1009)
		at org.apache.spark.SparkContext.runJob(SparkContext.scala:2484)
		at org.apache.spark.SparkContext.runJob(SparkContext.scala:2505)
		at org.apache.spark.SparkContext.runJob(SparkContext.scala:2524)
		at org.apache.spark.SparkContext.runJob(SparkContext.scala:2549)
		at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1057)
		at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
		at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
		at org.apache.spark.rdd.RDD.withScope(RDD.scala:417)
		at org.apache.spark.rdd.RDD.collect(RDD.scala:1056)
		at org.apache.spark.api.java.JavaRDDLike.collect(JavaRDDLike.scala:363)
		at org.apache.spark.api.java.JavaRDDLike.collect$(JavaRDDLike.scala:362)
		at org.apache.spark.api.java.AbstractJavaRDDLike.collect(JavaRDDLike.scala:46)
		at org.apache.hudi.client.SparkRDDWriteClient$SlimWriteStats.from(SparkRDDWriteClient.java:441)
		at org.apache.hudi.client.SparkRDDTableServiceClient.triggerWritesAndFetchWriteStats(SparkRDDTableServiceClient.java:72)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.commitCompaction(BaseHoodieTableServiceClient.java:377)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.compact(BaseHoodieTableServiceClient.java:355)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.compact(BaseHoodieTableServiceClient.java:314)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.lambda$inlineCompaction$0(BaseHoodieTableServiceClient.java:201)
		at org.apache.hudi.common.util.Option.ifPresent(Option.java:101)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.inlineCompaction(BaseHoodieTableServiceClient.java:199)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.inlineCompaction(BaseHoodieTableServiceClient.java:211)
		at org.apache.hudi.client.BaseHoodieTableServiceClient.runTableServicesInline(BaseHoodieTableServiceClient.java:658)
		at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInlineInternal(BaseHoodieWriteClient.java:711)
		at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:698)
		at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:290)
		at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:148)
		at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1005)
		at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:560)
		at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:187)
		at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:205)
		at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:127)
		at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.executeUpsert(MergeIntoHoodieTableCommand.scala:510)
		at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.run(MergeIntoHoodieTableCommand.scala:278)
		at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:79)
		at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:77)
		at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:88)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:163)
		at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:272)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:125)
		at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
		at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
		at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
		at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:125)
		at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:295)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:124)
		at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
		at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:78)
		at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:237)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
		at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
		at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
		at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
		at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
		at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
		at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
		at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
		at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
		at scala.util.Try$.apply(Try.scala:217)
		at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
		at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
		at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
		... 75 more
Caused by: org.apache.hudi.exception.HoodieException: Could not instantiate the HoodieMergeHandle implementation: org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:159)
	at org.apache.hudi.io.HoodieMergeHandleFactory.create(HoodieMergeHandleFactory.java:132)
	at org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:160)
	at org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compact$da19f3d2$1(HoodieCompactor.java:145)
	at org.apache.spark.api.java.JavaPairRDD$.$anonfun$toScalaFunction$1(JavaPairRDD.scala:1072)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:232)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:370)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1651)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1560)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1625)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1424)
	at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1378)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:386)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:336)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:107)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
	at org.apache.spark.scheduler.Task.run(Task.scala:147)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate class org.apache.hudi.io.FileGroupReaderBasedMergeHandle
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
	at org.apache.hudi.io.HoodieMergeHandleFactory.instantiateMergeHandle(HoodieMergeHandleFactory.java:148)
	... 31 more
Caused by: java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:481)
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:71)
	... 32 more
Caused by: org.apache.spark.SparkUnsupportedOperationException: [UNSUPPORTED_DATATYPE] Unsupported data type "VARIANT". SQLSTATE: 0A000
	at org.apache.spark.sql.util.LanceArrowUtils$.unsupportedDataTypeError(LanceArrowUtils.scala:330)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowType(LanceArrowUtils.scala:278)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowField(LanceArrowUtils.scala:239)
	at org.apache.spark.sql.util.LanceArrowUtils$.$anonfun$toArrowSchema$1(LanceArrowUtils.scala:166)
	at scala.collection.Iterator$$anon$9.next(Iterator.scala:584)
	at scala.collection.immutable.List.prependedAll(List.scala:156)
	at scala.collection.immutable.List$.from(List.scala:685)
	at scala.collection.immutable.List$.from(List.scala:682)
	at scala.collection.SeqFactory$Delegate.from(Factory.scala:306)
	at scala.collection.immutable.Seq$.from(Seq.scala:42)
	at scala.collection.immutable.Seq$.from(Seq.scala:39)
	at scala.collection.IterableOps.map(Iterable.scala:684)
	at scala.collection.IterableOps.map$(Iterable.scala:684)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:105)
	at org.apache.spark.sql.util.LanceArrowUtils$.toArrowSchema(LanceArrowUtils.scala:160)
	at org.apache.spark.sql.util.LanceArrowUtils.toArrowSchema(LanceArrowUtils.scala)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.<init>(HoodieSparkLanceWriter.java:131)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter.create(HoodieSparkLanceWriter.java:108)
	at org.apache.hudi.io.storage.HoodieSparkLanceWriter$HoodieSparkLanceWriterBuilder.build(HoodieSparkLanceWriter.java:97)
	at org.apache.hudi.io.storage.HoodieSparkFileWriterFactory.newLanceFileWriter(HoodieSparkFileWriterFactory.java:133)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:76)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:53)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.init(FileGroupReaderBasedMergeHandle.java:234)
	at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.<init>(FileGroupReaderBasedMergeHandle.java:166)
	... 38 more

Metadata

Metadata

Assignees

Labels

type:bugBug reports and fixes

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions