-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Description
Steps to reproduce the behavior:
- start spark shell
spark-shell \ --packages org.apache.hudi:hudi-spark-bundle_2.11:0.7.0 \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' - run spark code and create data:
import org.apache.spark.sql.Row
val rdd = spark.sparkContext.parallelize(List(Row(1, "a")))
import org.apache.spark.sql.types._
val df = spark.createDataFrame(rdd, StructType(List(StructField("int", IntegerType), StructField("string", StringType))))
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
df.write.format("hudi").partitionBy("int").mode(Overwrite).option(TABLE_NAME, "hudi").option(PRECOMBINE_FIELD_OPT_KEY, "int").option(RECORDKEY_FIELD_OPT_KEY, "int").option(PARTITIONPATH_FIELD_OPT_KEY, "int").save("/data/test/swe/base/hudi/hudi")
- create hive table
CREATE EXTERNAL TABLE test_swe_base.t_hudi_hudi (`string` STRING)
PARTITIONED BY (`int` int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION '/data/test/swe/base/hudi/hudi';- recover partitions:
msck repair table test_swe_base.t_hudi_hudi;Expected behavior
repair table command succeeds and the table can be queried foe 1 row
Environment Description
-
Hudi version : 0.7.0
-
Spark version : 2.4.5
-
Hive version : 3
-
Hadoop version : 3
-
Storage (HDFS/S3/GCS..) : HDFS
-
Running on Docker? (yes/no) :no
Stacktrace
2021-03-11 10:44:36,243 ERROR org.apache.hadoop.hive.metastore.HiveMetaStoreChecker: [HiveServer2-Background-Pool: Thread-34494]: org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition name hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
2021-03-11 10:44:36,244 WARN org.apache.hadoop.hive.metastore.Msck: [HiveServer2-Background-Pool: Thread-34494]: Failed to run metacheck:
org.apache.hadoop.hive.metastore.utils.MetastoreException: org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition name hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:568) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:447) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.findUnknownPartitions(HiveMetaStoreChecker.java:380) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:353) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:273) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:139) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:121) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:74) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:80) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:740) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:495) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:489) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225) [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
]
at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87) [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:322) [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at java.security.AccessController.doPrivileged(Native Method) [?:1.8.0_232]
at javax.security.auth.Subject.doAs(Subject.java:422) [?:1.8.0_232]
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898) [hadoop-common-3.1.1.7.1.4.0-203.jar:?]
at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:340) [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_232]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_232]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_232]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_232]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_232]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_232]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_232]
Caused by: org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition name hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.logOrThrowExceptionWithMsg(HiveMetaStoreChecker.java:519) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:500) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:470) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:452) ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
... 4 more