diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java index f689401653f7..5707ecbe75b6 100644 --- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java +++ b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; @@ -32,6 +34,7 @@ import org.apache.avro.io.DatumWriter; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.DataFile; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.spark.sql.Dataset; @@ -394,6 +397,38 @@ public void addPartitionToPartitioned() { sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", tableName)); } + @Test + public void addPartitionToPartitionedSnapshotIdInheritanceEnabledInTwoRuns() { + createPartitionedFileTable("parquet"); + + String createIceberg = + "CREATE TABLE %s (id Integer, name String, dept String, subdept String) USING iceberg PARTITIONED BY (id)" + + "TBLPROPERTIES ('%s'='true')"; + + sql(createIceberg, tableName, TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED); + + sql( + "CALL %s.system.add_files('%s', '`parquet`.`%s`', map('id', 1))", + catalogName, tableName, fileTableDir.getAbsolutePath()); + + sql( + "CALL %s.system.add_files('%s', '`parquet`.`%s`', map('id', 2))", + catalogName, tableName, fileTableDir.getAbsolutePath()); + + assertEquals( + "Iceberg table contains correct data", + sql("SELECT id, name, dept, subdept FROM %s WHERE id < 3 ORDER BY id", sourceTableName), + sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", tableName)); + + // verify manifest file name has uuid pattern + String manifestPath = (String) sql("select path from %s.manifests", tableName).get(0)[0]; + + Pattern uuidPattern = Pattern.compile("[a-f0-9]{8}(?:-[a-f0-9]{4}){4}[a-f0-9]{8}"); + + Matcher matcher = uuidPattern.matcher(manifestPath); + Assert.assertTrue("verify manifest path has uuid", matcher.find()); + } + @Test public void addDataPartitionedByDateToPartitioned() { createDatePartitionedFileTable("parquet"); diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index 3b7e063d9960..4bfff5b2c44a 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -372,7 +373,9 @@ private static Iterator buildManifest( FileIO io = new HadoopFileIO(conf.get()); TaskContext ctx = TaskContext.get(); String suffix = - String.format("stage-%d-task-%d-manifest", ctx.stageId(), ctx.taskAttemptId()); + String.format( + "stage-%d-task-%d-manifest-%s", + ctx.stageId(), ctx.taskAttemptId(), UUID.randomUUID()); Path location = new Path(basePath, suffix); String outputPath = FileFormat.AVRO.addExtension(location.toString()); OutputFile outputFile = io.newOutputFile(outputPath);