From 80fc5fe3480932408cc4f9f247122a1bcc4888b3 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 22 Apr 2026 08:21:34 -0700 Subject: [PATCH 01/22] [core] v4: Add TrackedFileAdapters: bridge TrackedFile to DataFile/DeleteFile APIs This adapter would allow to minimize the v4 related code changes during scan planning and commits. --- .../apache/iceberg/TrackedFileAdapters.java | 461 +++++++++++++ .../iceberg/TestTrackedFileAdapters.java | 629 ++++++++++++++++++ 2 files changed, 1090 insertions(+) create mode 100644 core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java create mode 100644 core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java new file mode 100644 index 000000000000..295f90a67e9a --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -0,0 +1,461 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; + +/** + * Adapts {@link TrackedFile} entries to the {@link DataFile} and {@link DeleteFile} APIs. + * + *

Note: V4 colocates deletion vectors with data file entries in {@link TrackedFile}. This + * adapter does not carry over {@link TrackedFile#deletionVector()} because {@link DataFile} has no + * way to represent it. Once {@link DataFile} is extended with deletion vector support, this adapter + * should be updated to include it. + */ +class TrackedFileAdapters { + + private TrackedFileAdapters() {} + + static DataFile asDataFile(TrackedFile file, PartitionSpec spec) { + Preconditions.checkState( + file.contentType() == FileContent.DATA, + "Cannot convert tracked file to DataFile: content type is %s, not DATA", + file.contentType()); + return new TrackedDataFile(file, spec); + } + + static DeleteFile asDeleteFile(TrackedFile file, PartitionSpec spec) { + Preconditions.checkState( + file.contentType() == FileContent.EQUALITY_DELETES, + "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", + file.contentType()); + return new TrackedDeleteFile(file, spec); + } + + // TODO: TrackedFile will likely get an explicit partition tuple field (using a union partition + // schema), replacing this transform-based derivation. Once that lands, this method should be + // removed and the adapter should read the tuple directly. + @SuppressWarnings({"unchecked", "rawtypes"}) + static StructLike extractPartition(TrackedFile file, PartitionSpec spec) { + if (spec == null || spec.isUnpartitioned()) { + return null; + } + + ContentStats stats = file.contentStats(); + if (stats == null) { + return null; + } + + PartitionData partition = new PartitionData(spec.partitionType()); + + for (int i = 0; i < spec.fields().size(); i += 1) { + PartitionField field = spec.fields().get(i); + + if (field.transform().isVoid()) { + partition.set(i, null); + continue; + } + + FieldStats fieldStats = stats.statsFor(field.sourceId()); + if (fieldStats == null || fieldStats.lowerBound() == null) { + partition.set(i, null); + continue; + } + + Type sourceType = spec.schema().findType(field.sourceId()); + Function boundTransform = field.transform().bind(sourceType); + partition.set(i, boundTransform.apply(fieldStats.lowerBound())); + } + + return partition; + } + + static Map valueCounts(ContentStats stats) { + if (stats == null) { + return null; + } + + Map result = Maps.newHashMap(); + for (FieldStats fs : stats.fieldStats()) { + if (fs != null && fs.valueCount() != null) { + result.put(fs.fieldId(), fs.valueCount()); + } + } + + return result.isEmpty() ? null : result; + } + + static Map nullValueCounts(ContentStats stats) { + if (stats == null) { + return null; + } + + Map result = Maps.newHashMap(); + for (FieldStats fs : stats.fieldStats()) { + if (fs != null && fs.nullValueCount() != null) { + result.put(fs.fieldId(), fs.nullValueCount()); + } + } + + return result.isEmpty() ? null : result; + } + + static Map nanValueCounts(ContentStats stats) { + if (stats == null) { + return null; + } + + Map result = Maps.newHashMap(); + for (FieldStats fs : stats.fieldStats()) { + if (fs != null && fs.nanValueCount() != null) { + result.put(fs.fieldId(), fs.nanValueCount()); + } + } + + return result.isEmpty() ? null : result; + } + + static Map lowerBounds(ContentStats stats) { + if (stats == null) { + return null; + } + + Map result = Maps.newHashMap(); + for (FieldStats fs : stats.fieldStats()) { + if (fs != null && fs.lowerBound() != null && fs.type() != null) { + result.put(fs.fieldId(), Conversions.toByteBuffer(fs.type(), fs.lowerBound())); + } + } + + return result.isEmpty() ? null : result; + } + + static Map upperBounds(ContentStats stats) { + if (stats == null) { + return null; + } + + Map result = Maps.newHashMap(); + for (FieldStats fs : stats.fieldStats()) { + if (fs != null && fs.upperBound() != null && fs.type() != null) { + result.put(fs.fieldId(), Conversions.toByteBuffer(fs.type(), fs.upperBound())); + } + } + + return result.isEmpty() ? null : result; + } + + /** Adapts a TrackedFile DATA entry to the {@link DataFile} interface. */ + private static class TrackedDataFile implements DataFile { + private final TrackedFile file; + private final Tracking tracking; + private final PartitionSpec spec; + + private TrackedDataFile(TrackedFile file, PartitionSpec spec) { + this.file = file; + this.tracking = file.tracking(); + this.spec = spec; + } + + @Override + public Long pos() { + return tracking != null ? tracking.manifestPos() : null; + } + + @Override + public int specId() { + // null specId in v4 means unpartitioned; default to 0 to match PartitionSpec.unpartitioned() + return file.specId() != null ? file.specId() : 0; + } + + @Override + public FileContent content() { + return FileContent.DATA; + } + + @SuppressWarnings("deprecation") + @Override + public CharSequence path() { + return file.location(); + } + + @Override + public FileFormat format() { + return file.fileFormat(); + } + + @Override + public StructLike partition() { + return extractPartition(file, spec); + } + + @Override + public long recordCount() { + return file.recordCount(); + } + + @Override + public long fileSizeInBytes() { + return file.fileSizeInBytes(); + } + + @Override + public Integer sortOrderId() { + return file.sortOrderId(); + } + + @Override + public Long dataSequenceNumber() { + return tracking != null ? tracking.dataSequenceNumber() : null; + } + + @Override + public Long fileSequenceNumber() { + return tracking != null ? tracking.fileSequenceNumber() : null; + } + + @Override + public Long firstRowId() { + return tracking != null ? tracking.firstRowId() : null; + } + + @Override + public ByteBuffer keyMetadata() { + return file.keyMetadata(); + } + + @Override + public List splitOffsets() { + return file.splitOffsets(); + } + + @Override + public List equalityFieldIds() { + return null; + } + + @Override + public String manifestLocation() { + return tracking != null ? tracking.manifestLocation() : null; + } + + @Override + public Map columnSizes() { + return null; + } + + @Override + public Map valueCounts() { + return TrackedFileAdapters.valueCounts(file.contentStats()); + } + + @Override + public Map nullValueCounts() { + return TrackedFileAdapters.nullValueCounts(file.contentStats()); + } + + @Override + public Map nanValueCounts() { + return TrackedFileAdapters.nanValueCounts(file.contentStats()); + } + + @Override + public Map lowerBounds() { + return TrackedFileAdapters.lowerBounds(file.contentStats()); + } + + @Override + public Map upperBounds() { + return TrackedFileAdapters.upperBounds(file.contentStats()); + } + + @Override + public DataFile copy() { + return this; + } + + @Override + public DataFile copy(boolean withStats) { + return this; + } + + @Override + public DataFile copyWithoutStats() { + return this; + } + + @Override + public DataFile copyWithStats(Set requestedColumnIds) { + return this; + } + } + + /** Adapts a TrackedFile EQUALITY_DELETES entry to the {@link DeleteFile} interface. */ + private static class TrackedDeleteFile implements DeleteFile { + private final TrackedFile file; + private final Tracking tracking; + private final PartitionSpec spec; + + private TrackedDeleteFile(TrackedFile file, PartitionSpec spec) { + this.file = file; + this.tracking = file.tracking(); + this.spec = spec; + } + + @Override + public Long pos() { + return tracking != null ? tracking.manifestPos() : null; + } + + @Override + public int specId() { + // null specId in v4 means unpartitioned; default to 0 to match PartitionSpec.unpartitioned() + return file.specId() != null ? file.specId() : 0; + } + + @Override + public FileContent content() { + return FileContent.EQUALITY_DELETES; + } + + @SuppressWarnings("deprecation") + @Override + public CharSequence path() { + return file.location(); + } + + @Override + public FileFormat format() { + return file.fileFormat(); + } + + @Override + public StructLike partition() { + return extractPartition(file, spec); + } + + @Override + public long recordCount() { + return file.recordCount(); + } + + @Override + public long fileSizeInBytes() { + return file.fileSizeInBytes(); + } + + @Override + public Integer sortOrderId() { + return file.sortOrderId(); + } + + @Override + public Long dataSequenceNumber() { + return tracking != null ? tracking.dataSequenceNumber() : null; + } + + @Override + public Long fileSequenceNumber() { + return tracking != null ? tracking.fileSequenceNumber() : null; + } + + @Override + public Long firstRowId() { + return tracking != null ? tracking.firstRowId() : null; + } + + @Override + public ByteBuffer keyMetadata() { + return file.keyMetadata(); + } + + @Override + public List splitOffsets() { + return file.splitOffsets(); + } + + @Override + public List equalityFieldIds() { + return file.equalityIds(); + } + + @Override + public String manifestLocation() { + return tracking != null ? tracking.manifestLocation() : null; + } + + @Override + public Map columnSizes() { + return null; + } + + @Override + public Map valueCounts() { + return TrackedFileAdapters.valueCounts(file.contentStats()); + } + + @Override + public Map nullValueCounts() { + return TrackedFileAdapters.nullValueCounts(file.contentStats()); + } + + @Override + public Map nanValueCounts() { + return TrackedFileAdapters.nanValueCounts(file.contentStats()); + } + + @Override + public Map lowerBounds() { + return TrackedFileAdapters.lowerBounds(file.contentStats()); + } + + @Override + public Map upperBounds() { + return TrackedFileAdapters.upperBounds(file.contentStats()); + } + + @Override + public DeleteFile copy() { + return this; + } + + @Override + public DeleteFile copy(boolean withStats) { + return this; + } + + @Override + public DeleteFile copyWithoutStats() { + return this; + } + + @Override + public DeleteFile copyWithStats(Set requestedColumnIds) { + return this; + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java new file mode 100644 index 000000000000..33c41dfb8dde --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -0,0 +1,629 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.nio.ByteBuffer; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +class TestTrackedFileAdapters { + + @Test + void testAsDataFileValidatesContentType() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + assertThat(dataFile).isNotNull(); + assertThat(dataFile.content()).isEqualTo(FileContent.DATA); + assertThat(dataFile.location()).isEqualTo("s3://bucket/data.parquet"); + } + + @Test + void testAsDataFileRejectsNonData() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.EQUALITY_DELETES.id()); + file.set(2, "s3://bucket/delete.avro"); + file.set(3, "avro"); + file.set(4, 50L); + file.set(5, 512L); + file.set(6, 0); + + assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned())) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Cannot convert tracked file to DataFile: content type is %s, not DATA", + FileContent.EQUALITY_DELETES); + } + + @Test + void testAsDeleteFileValidatesContentType() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.EQUALITY_DELETES.id()); + file.set(2, "s3://bucket/eq-delete.avro"); + file.set(3, "avro"); + file.set(4, 50L); + file.set(5, 512L); + file.set(6, 0); + file.set(13, ImmutableList.of(1, 2)); + + DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + assertThat(deleteFile).isNotNull(); + assertThat(deleteFile.content()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(deleteFile.equalityFieldIds()).containsExactly(1, 2); + } + + @Test + void testAsDeleteFileRejectsNonEqualityDeletes() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + + assertThatThrownBy(() -> TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned())) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", + FileContent.DATA); + } + + @Test + void testDataFileAdapterDelegatesAllFields() { + TrackedFileStruct file = new TrackedFileStruct(); + Types.StructType trackingWithPos = + Types.StructType.of( + ImmutableList.builder() + .addAll(Tracking.schema().fields()) + .add(MetadataColumns.ROW_POSITION) + .build()); + TrackingStruct tracking = new TrackingStruct(trackingWithPos); + + tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(1, 42L); + tracking.set(2, 10L); + tracking.set(3, 11L); + tracking.set(5, 1000L); + tracking.setManifestLocation("s3://bucket/manifest.avro"); + tracking.set(8, 3L); + + file.set(0, tracking); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data/file.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + file.set(8, 3); + file.set(11, ByteBuffer.wrap(new byte[] {1, 2, 3})); + file.set(12, ImmutableList.of(50L, 100L)); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + + assertThat(dataFile.pos()).isEqualTo(3L); + assertThat(dataFile.specId()).isEqualTo(0); + assertThat(dataFile.content()).isEqualTo(FileContent.DATA); + assertThat(dataFile.location()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(dataFile.format()).isEqualTo(FileFormat.PARQUET); + assertThat(dataFile.recordCount()).isEqualTo(100L); + assertThat(dataFile.fileSizeInBytes()).isEqualTo(1024L); + assertThat(dataFile.sortOrderId()).isEqualTo(3); + assertThat(dataFile.dataSequenceNumber()).isEqualTo(10L); + assertThat(dataFile.fileSequenceNumber()).isEqualTo(11L); + assertThat(dataFile.firstRowId()).isEqualTo(1000L); + assertThat(dataFile.keyMetadata()).isEqualTo(ByteBuffer.wrap(new byte[] {1, 2, 3})); + assertThat(dataFile.splitOffsets()).containsExactly(50L, 100L); + assertThat(dataFile.manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); + assertThat(dataFile.equalityFieldIds()).isNull(); + assertThat(dataFile.columnSizes()).isNull(); + } + + @Test + void testDeleteFileAdapterDelegatesAllFields() { + TrackedFileStruct file = new TrackedFileStruct(); + Types.StructType trackingWithPos = + Types.StructType.of( + ImmutableList.builder() + .addAll(Tracking.schema().fields()) + .add(MetadataColumns.ROW_POSITION) + .build()); + TrackingStruct tracking = new TrackingStruct(trackingWithPos); + + tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(1, 42L); + tracking.set(2, 10L); + tracking.set(3, 11L); + tracking.set(5, 1000L); + tracking.setManifestLocation("s3://bucket/manifest.avro"); + tracking.set(8, 5L); + + file.set(0, tracking); + file.set(1, FileContent.EQUALITY_DELETES.id()); + file.set(2, "s3://bucket/eq-delete.avro"); + file.set(3, "avro"); + file.set(4, 50L); + file.set(5, 512L); + file.set(6, 1); + file.set(8, 5); + file.set(11, ByteBuffer.wrap(new byte[] {4, 5})); + file.set(12, ImmutableList.of(200L)); + file.set(13, ImmutableList.of(1, 2, 3)); + + DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + + assertThat(deleteFile.pos()).isEqualTo(5L); + assertThat(deleteFile.specId()).isEqualTo(1); + assertThat(deleteFile.content()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(deleteFile.location()).isEqualTo("s3://bucket/eq-delete.avro"); + assertThat(deleteFile.format()).isEqualTo(FileFormat.AVRO); + assertThat(deleteFile.recordCount()).isEqualTo(50L); + assertThat(deleteFile.fileSizeInBytes()).isEqualTo(512L); + assertThat(deleteFile.sortOrderId()).isEqualTo(5); + assertThat(deleteFile.dataSequenceNumber()).isEqualTo(10L); + assertThat(deleteFile.fileSequenceNumber()).isEqualTo(11L); + assertThat(deleteFile.firstRowId()).isEqualTo(1000L); + assertThat(deleteFile.keyMetadata()).isEqualTo(ByteBuffer.wrap(new byte[] {4, 5})); + assertThat(deleteFile.splitOffsets()).containsExactly(200L); + assertThat(deleteFile.manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); + assertThat(deleteFile.equalityFieldIds()).containsExactly(1, 2, 3); + assertThat(deleteFile.columnSizes()).isNull(); + } + + @Test + void testAdapterDelegatesNullTracking() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + + assertThat(dataFile.dataSequenceNumber()).isNull(); + assertThat(dataFile.fileSequenceNumber()).isNull(); + assertThat(dataFile.firstRowId()).isNull(); + assertThat(dataFile.manifestLocation()).isNull(); + assertThat(dataFile.pos()).isNull(); + } + + @Test + void testDataFileAdapterStatsFromContentStats() { + TrackedFileStruct file = createTrackedFileWithStats(); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + + assertThat(dataFile.valueCounts()).containsOnly(entry(1, 100L), entry(2, 200L)); + assertThat(dataFile.nullValueCounts()).containsOnly(entry(1, 5L), entry(2, 10L)); + assertThat(dataFile.nanValueCounts()).containsOnly(entry(2, 3L)); + assertThat(dataFile.lowerBounds()) + .containsEntry(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1)) + .containsEntry(2, Conversions.toByteBuffer(Types.FloatType.get(), 1.0f)); + assertThat(dataFile.upperBounds()) + .containsEntry(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1000)) + .containsEntry(2, Conversions.toByteBuffer(Types.FloatType.get(), 100.0f)); + assertThat(dataFile.columnSizes()).isNull(); + } + + @Test + void testDeleteFileAdapterStatsFromContentStats() { + TrackedFileStruct file = createTrackedFileWithStats(); + file.set(1, FileContent.EQUALITY_DELETES.id()); + file.set(13, ImmutableList.of(1)); + + DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + + assertThat(deleteFile.valueCounts()).containsOnly(entry(1, 100L), entry(2, 200L)); + assertThat(deleteFile.nullValueCounts()).containsOnly(entry(1, 5L), entry(2, 10L)); + assertThat(deleteFile.nanValueCounts()).containsOnly(entry(2, 3L)); + assertThat(deleteFile.lowerBounds()) + .containsEntry(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1)) + .containsEntry(2, Conversions.toByteBuffer(Types.FloatType.get(), 1.0f)); + assertThat(deleteFile.upperBounds()) + .containsEntry(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1000)) + .containsEntry(2, Conversions.toByteBuffer(Types.FloatType.get(), 100.0f)); + assertThat(deleteFile.columnSizes()).isNull(); + } + + @Test + void testDataFileAdapterStatsNullWhenNoContentStats() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + + assertThat(dataFile.valueCounts()).isNull(); + assertThat(dataFile.nullValueCounts()).isNull(); + assertThat(dataFile.nanValueCounts()).isNull(); + assertThat(dataFile.lowerBounds()).isNull(); + assertThat(dataFile.upperBounds()).isNull(); + } + + @Test + void testPartitionExtractedFromContentStatsWithIdentityTransform() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "category", Types.StringType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("category").build(); + + TrackedFileStruct file = createTrackedFileWithPartitionStats(spec); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + + StructLike partition = dataFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, CharSequence.class).toString()).isEqualTo("electronics"); + } + + @Test + void testPartitionExtractedWithYearTransform() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "ts", Types.DateType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).year("ts").build(); + + // date value 18628 = 2021-01-01 (days since epoch) + TrackedFileStruct file = createTrackedFileWithFieldStats(2, Types.DateType.get(), 18628); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + + StructLike partition = dataFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, Integer.class)).isEqualTo(51); + } + + @Test + void testPartitionExtractedWithBucketTransform() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "value", Types.IntegerType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 16).build(); + + TrackedFileStruct file = createTrackedFileWithFieldStats(2, Types.IntegerType.get(), 42); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + + StructLike partition = dataFile.partition(); + assertThat(partition).isNotNull(); + + // verify the bucket value is a valid bucket (0-15) + int bucket = partition.get(0, Integer.class); + assertThat(bucket).isBetween(0, 15); + } + + @Test + void testPartitionNullWhenNoContentStats() { + Schema schema = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").build(); + + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, spec.specId()); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + assertThat(dataFile.partition()).isNull(); + } + + @Test + void testPartitionNullWhenNullSpec() { + TrackedFileStruct file = createTrackedFileWithStats(); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, null); + assertThat(dataFile.partition()).isNull(); + } + + @Test + void testPartitionNullForUnpartitioned() { + PartitionSpec spec = PartitionSpec.unpartitioned(); + + TrackedFileStruct file = createTrackedFileWithStats(); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + assertThat(dataFile.partition()).isNull(); + } + + @Test + void testPartitionWithMultipleFields() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "category", Types.StringType.get())); + + PartitionSpec spec = + PartitionSpec.builderFor(schema).identity("id").identity("category").build(); + + Types.StructType statsStruct = + Types.StructType.of( + Types.NestedField.optional( + 10000, + "1", + Types.StructType.of( + Types.NestedField.optional(10006, "lower_bound", Types.IntegerType.get()), + Types.NestedField.optional(10007, "upper_bound", Types.IntegerType.get()))), + Types.NestedField.optional( + 20000, + "2", + Types.StructType.of( + Types.NestedField.optional(20006, "lower_bound", Types.StringType.get()), + Types.NestedField.optional(20007, "upper_bound", Types.StringType.get())))); + + @SuppressWarnings("unchecked") + List> fieldStatsList = + ImmutableList.of( + (FieldStats) + BaseFieldStats.builder() + .fieldId(1) + .type(Types.IntegerType.get()) + .lowerBound(42) + .upperBound(42) + .build(), + (FieldStats) + BaseFieldStats.builder() + .fieldId(2) + .type(Types.StringType.get()) + .lowerBound("electronics") + .upperBound("electronics") + .build()); + + BaseContentStats stats = + BaseContentStats.builder() + .withStatsStruct(statsStruct) + .withFieldStats(fieldStatsList) + .build(); + + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, spec.specId()); + file.set(7, stats); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + + StructLike partition = dataFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, Integer.class)).isEqualTo(42); + assertThat(partition.get(1, CharSequence.class).toString()).isEqualTo("electronics"); + } + + @Test + void testPartitionWithVoidTransform() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "data", Types.StringType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").alwaysNull("data").build(); + + TrackedFileStruct file = createTrackedFileWithFieldStats(1, Types.IntegerType.get(), 42); + DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); + + StructLike partition = dataFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, Integer.class)).isEqualTo(42); + assertThat(partition.get(1, CharSequence.class)).isNull(); + } + + @Test + void testDeleteFilePartitionExtracted() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "category", Types.StringType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("category").build(); + + TrackedFileStruct file = createTrackedFileWithPartitionStats(spec); + file.set(1, FileContent.EQUALITY_DELETES.id()); + file.set(13, ImmutableList.of(1)); + + DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, spec); + + StructLike partition = deleteFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, CharSequence.class).toString()).isEqualTo("electronics"); + } + + @Test + void testSpecIdDefaultsToZeroWhenNull() { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + + DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); + assertThat(dataFile.specId()).isEqualTo(0); + } + + private static java.util.Map.Entry entry(int key, long value) { + return java.util.Map.entry(key, value); + } + + @SuppressWarnings("unchecked") + private static TrackedFileStruct createTrackedFileWithPartitionStats(PartitionSpec spec) { + Types.StructType statsStruct = + Types.StructType.of( + Types.NestedField.optional( + 20000, + "2", + Types.StructType.of( + Types.NestedField.optional(20006, "lower_bound", Types.StringType.get()), + Types.NestedField.optional(20007, "upper_bound", Types.StringType.get())))); + + List> fieldStatsList = + ImmutableList.of( + (FieldStats) + BaseFieldStats.builder() + .fieldId(2) + .type(Types.StringType.get()) + .lowerBound("electronics") + .upperBound("electronics") + .build()); + + BaseContentStats stats = + BaseContentStats.builder() + .withStatsStruct(statsStruct) + .withFieldStats(fieldStatsList) + .build(); + + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data/file.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, spec.specId()); + file.set(7, stats); + + return file; + } + + @SuppressWarnings("unchecked") + private static TrackedFileStruct createTrackedFileWithFieldStats( + int fieldId, Type type, T value) { + int statsFieldId = fieldId * 10000; + Types.StructType statsStruct = + Types.StructType.of( + Types.NestedField.optional( + statsFieldId, + Integer.toString(fieldId), + Types.StructType.of( + Types.NestedField.optional(statsFieldId + 6, "lower_bound", type), + Types.NestedField.optional(statsFieldId + 7, "upper_bound", type)))); + + List> fieldStatsList = + ImmutableList.of( + (FieldStats) + BaseFieldStats.builder() + .fieldId(fieldId) + .type(type) + .lowerBound(value) + .upperBound(value) + .build()); + + BaseContentStats stats = + BaseContentStats.builder() + .withStatsStruct(statsStruct) + .withFieldStats(fieldStatsList) + .build(); + + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data/file.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + file.set(7, stats); + + return file; + } + + @SuppressWarnings("unchecked") + private static TrackedFileStruct createTrackedFileWithStats() { + Types.StructType statsStruct = + Types.StructType.of( + Types.NestedField.optional( + 10000, + "1", + Types.StructType.of( + Types.NestedField.optional(10001, "value_count", Types.LongType.get()), + Types.NestedField.optional(10002, "null_value_count", Types.LongType.get()), + Types.NestedField.optional(10003, "nan_value_count", Types.LongType.get()), + Types.NestedField.optional(10006, "lower_bound", Types.IntegerType.get()), + Types.NestedField.optional(10007, "upper_bound", Types.IntegerType.get()))), + Types.NestedField.optional( + 20000, + "2", + Types.StructType.of( + Types.NestedField.optional(20001, "value_count", Types.LongType.get()), + Types.NestedField.optional(20002, "null_value_count", Types.LongType.get()), + Types.NestedField.optional(20003, "nan_value_count", Types.LongType.get()), + Types.NestedField.optional(20006, "lower_bound", Types.FloatType.get()), + Types.NestedField.optional(20007, "upper_bound", Types.FloatType.get())))); + + List> fieldStatsList = + ImmutableList.of( + (FieldStats) + BaseFieldStats.builder() + .fieldId(1) + .type(Types.IntegerType.get()) + .valueCount(100L) + .nullValueCount(5L) + .lowerBound(1) + .upperBound(1000) + .build(), + (FieldStats) + BaseFieldStats.builder() + .fieldId(2) + .type(Types.FloatType.get()) + .valueCount(200L) + .nullValueCount(10L) + .nanValueCount(3L) + .lowerBound(1.0f) + .upperBound(100.0f) + .build()); + + BaseContentStats stats = + BaseContentStats.builder() + .withStatsStruct(statsStruct) + .withFieldStats(fieldStatsList) + .build(); + + TrackedFileStruct file = new TrackedFileStruct(); + file.set(1, FileContent.DATA.id()); + file.set(2, "s3://bucket/data/file.parquet"); + file.set(3, "parquet"); + file.set(4, 100L); + file.set(5, 1024L); + file.set(6, 0); + file.set(7, stats); + + return file; + } +} From cb7133dd57532d1dbd94c4fcb1f618b61ca6364a Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Fri, 24 Apr 2026 07:38:12 -0700 Subject: [PATCH 02/22] Clean up tests --- .../iceberg/TestTrackedFileAdapters.java | 169 +++++++++--------- 1 file changed, 80 insertions(+), 89 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 33c41dfb8dde..37441db3dbda 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -33,12 +33,9 @@ class TestTrackedFileAdapters { @Test void testAsDataFileValidatesContentType() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, 0); DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); @@ -49,12 +46,14 @@ void testAsDataFileValidatesContentType() { @Test void testAsDataFileRejectsNonData() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.EQUALITY_DELETES.id()); - file.set(2, "s3://bucket/delete.avro"); - file.set(3, "avro"); - file.set(4, 50L); - file.set(5, 512L); + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.EQUALITY_DELETES, + "s3://bucket/delete.avro", + FileFormat.AVRO, + 50L, + 512L); file.set(6, 0); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned())) @@ -66,12 +65,14 @@ void testAsDataFileRejectsNonData() { @Test void testAsDeleteFileValidatesContentType() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.EQUALITY_DELETES.id()); - file.set(2, "s3://bucket/eq-delete.avro"); - file.set(3, "avro"); - file.set(4, 50L); - file.set(5, 512L); + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.EQUALITY_DELETES, + "s3://bucket/eq-delete.avro", + FileFormat.AVRO, + 50L, + 512L); file.set(6, 0); file.set(13, ImmutableList.of(1, 2)); @@ -83,12 +84,9 @@ void testAsDeleteFileValidatesContentType() { @Test void testAsDeleteFileRejectsNonEqualityDeletes() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, 0); assertThatThrownBy(() -> TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned())) @@ -100,7 +98,6 @@ void testAsDeleteFileRejectsNonEqualityDeletes() { @Test void testDataFileAdapterDelegatesAllFields() { - TrackedFileStruct file = new TrackedFileStruct(); Types.StructType trackingWithPos = Types.StructType.of( ImmutableList.builder() @@ -117,12 +114,14 @@ void testDataFileAdapterDelegatesAllFields() { tracking.setManifestLocation("s3://bucket/manifest.avro"); tracking.set(8, 3L); - file.set(0, tracking); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data/file.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.DATA, + "s3://bucket/data/file.parquet", + FileFormat.PARQUET, + 100L, + 1024L); file.set(6, 0); file.set(8, 3); file.set(11, ByteBuffer.wrap(new byte[] {1, 2, 3})); @@ -150,7 +149,6 @@ void testDataFileAdapterDelegatesAllFields() { @Test void testDeleteFileAdapterDelegatesAllFields() { - TrackedFileStruct file = new TrackedFileStruct(); Types.StructType trackingWithPos = Types.StructType.of( ImmutableList.builder() @@ -167,12 +165,14 @@ void testDeleteFileAdapterDelegatesAllFields() { tracking.setManifestLocation("s3://bucket/manifest.avro"); tracking.set(8, 5L); - file.set(0, tracking); - file.set(1, FileContent.EQUALITY_DELETES.id()); - file.set(2, "s3://bucket/eq-delete.avro"); - file.set(3, "avro"); - file.set(4, 50L); - file.set(5, 512L); + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.EQUALITY_DELETES, + "s3://bucket/eq-delete.avro", + FileFormat.AVRO, + 50L, + 512L); file.set(6, 1); file.set(8, 5); file.set(11, ByteBuffer.wrap(new byte[] {4, 5})); @@ -201,12 +201,9 @@ void testDeleteFileAdapterDelegatesAllFields() { @Test void testAdapterDelegatesNullTracking() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, 0); DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); @@ -257,12 +254,9 @@ void testDeleteFileAdapterStatsFromContentStats() { @Test void testDataFileAdapterStatsNullWhenNoContentStats() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, 0); DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); @@ -300,13 +294,13 @@ void testPartitionExtractedWithYearTransform() { PartitionSpec spec = PartitionSpec.builderFor(schema).year("ts").build(); - // date value 18628 = 2021-01-01 (days since epoch) - TrackedFileStruct file = createTrackedFileWithFieldStats(2, Types.DateType.get(), 18628); + // date value 20546 = 2026-04-03 (days since epoch) + TrackedFileStruct file = createTrackedFileWithFieldStats(2, Types.DateType.get(), 20546); DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); StructLike partition = dataFile.partition(); assertThat(partition).isNotNull(); - assertThat(partition.get(0, Integer.class)).isEqualTo(51); + assertThat(partition.get(0, Integer.class)).isEqualTo(56); } @Test @@ -335,12 +329,9 @@ void testPartitionNullWhenNoContentStats() { PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").build(); - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, spec.specId()); DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); @@ -412,12 +403,9 @@ void testPartitionWithMultipleFields() { .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, spec.specId()); file.set(7, stats); @@ -469,12 +457,9 @@ void testDeleteFilePartitionExtracted() { @Test void testSpecIdDefaultsToZeroWhenNull() { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); DataFile dataFile = TrackedFileAdapters.asDataFile(file, PartitionSpec.unpartitioned()); assertThat(dataFile.specId()).isEqualTo(0); @@ -511,12 +496,14 @@ private static TrackedFileStruct createTrackedFileWithPartitionStats(PartitionSp .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data/file.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.DATA, + "s3://bucket/data/file.parquet", + FileFormat.PARQUET, + 100L, + 1024L); file.set(6, spec.specId()); file.set(7, stats); @@ -552,12 +539,14 @@ private static TrackedFileStruct createTrackedFileWithFieldStats( .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data/file.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.DATA, + "s3://bucket/data/file.parquet", + FileFormat.PARQUET, + 100L, + 1024L); file.set(6, 0); file.set(7, stats); @@ -615,12 +604,14 @@ private static TrackedFileStruct createTrackedFileWithStats() { .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = new TrackedFileStruct(); - file.set(1, FileContent.DATA.id()); - file.set(2, "s3://bucket/data/file.parquet"); - file.set(3, "parquet"); - file.set(4, 100L); - file.set(5, 1024L); + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.DATA, + "s3://bucket/data/file.parquet", + FileFormat.PARQUET, + 100L, + 1024L); file.set(6, 0); file.set(7, stats); From 2f9c29feb9c5eb0577168a46b479a13e849970fb Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 09:47:58 -0700 Subject: [PATCH 03/22] Change design such that a DV adapted to DeleteFile --- .../apache/iceberg/TrackedFileAdapters.java | 187 +++++++++++++++++- .../iceberg/TestTrackedFileAdapters.java | 154 +++++++++++++++ 2 files changed, 337 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index 295f90a67e9a..98a3426b1db5 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -31,10 +31,11 @@ /** * Adapts {@link TrackedFile} entries to the {@link DataFile} and {@link DeleteFile} APIs. * - *

Note: V4 colocates deletion vectors with data file entries in {@link TrackedFile}. This - * adapter does not carry over {@link TrackedFile#deletionVector()} because {@link DataFile} has no - * way to represent it. Once {@link DataFile} is extended with deletion vector support, this adapter - * should be updated to include it. + *

V4 colocates deletion vectors with data file entries in {@link TrackedFile}. Rather than + * extending {@link DataFile} with deletion vector fields, DVs are extracted as separate {@link + * DeleteFile} objects via {@link #asDVDeleteFile(TrackedFile, PartitionSpec)}. This matches the v3 + * convention where DVs are tracked as {@link DeleteFile} entries in delete manifests and keeps the + * existing {@link FileScanTask} contract ({@code file()} + {@code deletes()}) unchanged. */ class TrackedFileAdapters { @@ -56,6 +57,16 @@ static DeleteFile asDeleteFile(TrackedFile file, PartitionSpec spec) { return new TrackedDeleteFile(file, spec); } + static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { + Preconditions.checkState( + file.contentType() == FileContent.DATA, + "Cannot extract DV from tracked file: content type is %s, not DATA", + file.contentType()); + Preconditions.checkState( + file.deletionVector() != null, "Cannot extract DV from tracked file: no deletion vector"); + return new TrackedDVDeleteFile(file, spec); + } + // TODO: TrackedFile will likely get an explicit partition tuple field (using a union partition // schema), replacing this transform-based derivation. Once that lands, this method should be // removed and the adapter should read the tuple directly. @@ -458,4 +469,172 @@ public DeleteFile copyWithStats(Set requestedColumnIds) { return this; } } + + /** + * Adapts the deletion vector from a TrackedFile DATA entry to the {@link DeleteFile} interface. + * + *

The DV blob metadata is mapped to the DeleteFile DV fields: {@link + * DeleteFile#referencedDataFile()} is the data file location, and {@link + * DeleteFile#contentOffset()} / {@link DeleteFile#contentSizeInBytes()} point to the blob within + * the Puffin file. + */ + private static class TrackedDVDeleteFile implements DeleteFile { + private final TrackedFile file; + private final DeletionVector dv; + private final Tracking tracking; + private final PartitionSpec spec; + + private TrackedDVDeleteFile(TrackedFile file, PartitionSpec spec) { + this.file = file; + this.dv = file.deletionVector(); + this.tracking = file.tracking(); + this.spec = spec; + } + + @Override + public Long pos() { + return tracking != null ? tracking.manifestPos() : null; + } + + @Override + public int specId() { + return file.specId() != null ? file.specId() : 0; + } + + @Override + public FileContent content() { + return FileContent.POSITION_DELETES; + } + + @SuppressWarnings("deprecation") + @Override + public CharSequence path() { + return dv.location(); + } + + @Override + public FileFormat format() { + return FileFormat.PUFFIN; + } + + @Override + public StructLike partition() { + return extractPartition(file, spec); + } + + @Override + public long recordCount() { + return dv.cardinality(); + } + + @Override + public long fileSizeInBytes() { + return dv.sizeInBytes(); + } + + @Override + public Integer sortOrderId() { + return null; + } + + @Override + public Long dataSequenceNumber() { + return tracking != null ? tracking.dataSequenceNumber() : null; + } + + @Override + public Long fileSequenceNumber() { + return tracking != null ? tracking.fileSequenceNumber() : null; + } + + @Override + public Long firstRowId() { + return null; + } + + @Override + public ByteBuffer keyMetadata() { + return null; + } + + @Override + public List splitOffsets() { + return null; + } + + @Override + public List equalityFieldIds() { + return null; + } + + @Override + public String referencedDataFile() { + return file.location(); + } + + @Override + public Long contentOffset() { + return dv.offset(); + } + + @Override + public Long contentSizeInBytes() { + return dv.sizeInBytes(); + } + + @Override + public String manifestLocation() { + return tracking != null ? tracking.manifestLocation() : null; + } + + @Override + public Map columnSizes() { + return null; + } + + @Override + public Map valueCounts() { + return null; + } + + @Override + public Map nullValueCounts() { + return null; + } + + @Override + public Map nanValueCounts() { + return null; + } + + @Override + public Map lowerBounds() { + return null; + } + + @Override + public Map upperBounds() { + return null; + } + + @Override + public DeleteFile copy() { + return this; + } + + @Override + public DeleteFile copy(boolean withStats) { + return this; + } + + @Override + public DeleteFile copyWithoutStats() { + return this; + } + + @Override + public DeleteFile copyWithStats(Set requestedColumnIds) { + return this; + } + } } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 37441db3dbda..1a33de77276a 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -96,6 +96,151 @@ void testAsDeleteFileRejectsNonEqualityDeletes() { FileContent.DATA); } + @Test + void testAsDVDeleteFileValidatesContentType() { + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); + file.set(6, 0); + file.set(9, createDeletionVector()); + + DeleteFile dv = TrackedFileAdapters.asDVDeleteFile(file, PartitionSpec.unpartitioned()); + assertThat(dv).isNotNull(); + assertThat(dv.content()).isEqualTo(FileContent.POSITION_DELETES); + assertThat(dv.format()).isEqualTo(FileFormat.PUFFIN); + } + + @Test + void testAsDVDeleteFileRejectsNonData() { + TrackedFileStruct file = + new TrackedFileStruct( + null, + FileContent.EQUALITY_DELETES, + "s3://bucket/eq-delete.avro", + FileFormat.AVRO, + 50L, + 512L); + file.set(6, 0); + file.set(9, createDeletionVector()); + + assertThatThrownBy( + () -> TrackedFileAdapters.asDVDeleteFile(file, PartitionSpec.unpartitioned())) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Cannot extract DV from tracked file: content type is %s, not DATA", + FileContent.EQUALITY_DELETES); + } + + @Test + void testAsDVDeleteFileRejectsNullDV() { + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); + file.set(6, 0); + + assertThatThrownBy( + () -> TrackedFileAdapters.asDVDeleteFile(file, PartitionSpec.unpartitioned())) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot extract DV from tracked file: no deletion vector"); + } + + @Test + void testDVDeleteFileAdapterDelegatesAllFields() { + Types.StructType trackingWithPos = + Types.StructType.of( + ImmutableList.builder() + .addAll(Tracking.schema().fields()) + .add(MetadataColumns.ROW_POSITION) + .build()); + TrackingStruct tracking = new TrackingStruct(trackingWithPos); + + tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(1, 42L); + tracking.set(2, 10L); + tracking.set(3, 11L); + tracking.set(5, 1000L); + tracking.setManifestLocation("s3://bucket/manifest.avro"); + tracking.set(8, 7L); + + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.DATA, + "s3://bucket/data/file.parquet", + FileFormat.PARQUET, + 100L, + 1024L); + file.set(6, 2); + file.set(9, createDeletionVector()); + + DeleteFile dvFile = TrackedFileAdapters.asDVDeleteFile(file, PartitionSpec.unpartitioned()); + + // DV-specific fields from DeletionVector + assertThat(dvFile.content()).isEqualTo(FileContent.POSITION_DELETES); + assertThat(dvFile.location()).isEqualTo("s3://bucket/puffin/dv-file.bin"); + assertThat(dvFile.format()).isEqualTo(FileFormat.PUFFIN); + assertThat(dvFile.recordCount()).isEqualTo(10L); + assertThat(dvFile.fileSizeInBytes()).isEqualTo(256L); + assertThat(dvFile.referencedDataFile()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(dvFile.contentOffset()).isEqualTo(128L); + assertThat(dvFile.contentSizeInBytes()).isEqualTo(256L); + + // fields delegated from TrackedFile / Tracking + assertThat(dvFile.pos()).isEqualTo(7L); + assertThat(dvFile.specId()).isEqualTo(2); + assertThat(dvFile.dataSequenceNumber()).isEqualTo(10L); + assertThat(dvFile.fileSequenceNumber()).isEqualTo(11L); + assertThat(dvFile.manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); + + // fields that should be null for DVs + assertThat(dvFile.sortOrderId()).isNull(); + assertThat(dvFile.firstRowId()).isNull(); + assertThat(dvFile.keyMetadata()).isNull(); + assertThat(dvFile.splitOffsets()).isNull(); + assertThat(dvFile.equalityFieldIds()).isNull(); + assertThat(dvFile.columnSizes()).isNull(); + assertThat(dvFile.valueCounts()).isNull(); + assertThat(dvFile.nullValueCounts()).isNull(); + assertThat(dvFile.nanValueCounts()).isNull(); + assertThat(dvFile.lowerBounds()).isNull(); + assertThat(dvFile.upperBounds()).isNull(); + } + + @Test + void testDVDeleteFileAdapterDelegatesNullTracking() { + TrackedFileStruct file = + new TrackedFileStruct( + null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); + file.set(6, 0); + file.set(9, createDeletionVector()); + + DeleteFile dvFile = TrackedFileAdapters.asDVDeleteFile(file, PartitionSpec.unpartitioned()); + + assertThat(dvFile.dataSequenceNumber()).isNull(); + assertThat(dvFile.fileSequenceNumber()).isNull(); + assertThat(dvFile.manifestLocation()).isNull(); + assertThat(dvFile.pos()).isNull(); + } + + @Test + void testDVDeleteFilePartitionExtracted() { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "category", Types.StringType.get())); + + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("category").build(); + + TrackedFileStruct file = createTrackedFileWithPartitionStats(spec); + file.set(9, createDeletionVector()); + + DeleteFile dvFile = TrackedFileAdapters.asDVDeleteFile(file, spec); + + StructLike partition = dvFile.partition(); + assertThat(partition).isNotNull(); + assertThat(partition.get(0, CharSequence.class).toString()).isEqualTo("electronics"); + } + @Test void testDataFileAdapterDelegatesAllFields() { Types.StructType trackingWithPos = @@ -465,6 +610,15 @@ void testSpecIdDefaultsToZeroWhenNull() { assertThat(dataFile.specId()).isEqualTo(0); } + private static DeletionVectorStruct createDeletionVector() { + DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); + dv.set(0, "s3://bucket/puffin/dv-file.bin"); + dv.set(1, 128L); + dv.set(2, 256L); + dv.set(3, 10L); + return dv; + } + private static java.util.Map.Entry entry(int key, long value) { return java.util.Map.entry(key, value); } From 1a331306ebe030f7713654e1ac2131fd156139ed Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 10:07:08 -0700 Subject: [PATCH 04/22] Make copy safe --- .../apache/iceberg/TrackedFileAdapters.java | 37 ++++++++++++------- .../iceberg/TestTrackedFileAdapters.java | 16 +++++--- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index 98a3426b1db5..d9a7addf1288 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -70,15 +70,20 @@ static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { // TODO: TrackedFile will likely get an explicit partition tuple field (using a union partition // schema), replacing this transform-based derivation. Once that lands, this method should be // removed and the adapter should read the tuple directly. + // + // This derives partition values by applying the partition transform to the lower bound of the + // source column stats. This is correct because each data file belongs to exactly one partition, + // so lower == upper for partition source columns. For non-identity transforms (bucket, truncate), + // the transform of the lower bound produces the correct partition value under this invariant. @SuppressWarnings({"unchecked", "rawtypes"}) static StructLike extractPartition(TrackedFile file, PartitionSpec spec) { if (spec == null || spec.isUnpartitioned()) { - return null; + return BaseFile.EMPTY_PARTITION_DATA; } ContentStats stats = file.contentStats(); if (stats == null) { - return null; + return new PartitionData(spec.partitionType()); } PartitionData partition = new PartitionData(spec.partitionType()); @@ -306,22 +311,22 @@ public Map upperBounds() { @Override public DataFile copy() { - return this; + return new TrackedDataFile(file.copy(), spec); } @Override public DataFile copy(boolean withStats) { - return this; + return withStats ? copy() : copyWithoutStats(); } @Override public DataFile copyWithoutStats() { - return this; + return new TrackedDataFile(file.copyWithoutStats(), spec); } @Override public DataFile copyWithStats(Set requestedColumnIds) { - return this; + return new TrackedDataFile(file.copyWithStats(requestedColumnIds), spec); } } @@ -451,22 +456,22 @@ public Map upperBounds() { @Override public DeleteFile copy() { - return this; + return new TrackedDeleteFile(file.copy(), spec); } @Override public DeleteFile copy(boolean withStats) { - return this; + return withStats ? copy() : copyWithoutStats(); } @Override public DeleteFile copyWithoutStats() { - return this; + return new TrackedDeleteFile(file.copyWithoutStats(), spec); } @Override public DeleteFile copyWithStats(Set requestedColumnIds) { - return this; + return new TrackedDeleteFile(file.copyWithStats(requestedColumnIds), spec); } } @@ -527,6 +532,10 @@ public long recordCount() { return dv.cardinality(); } + // Returns the DV blob size, not the full Puffin file size. The DeletionVector metadata does not + // include the Puffin file size, so this is the best approximation available. Space accounting + // that sums fileSizeInBytes() was already imprecise in v3 (multiple DVs sharing a Puffin file + // each reported the full file size). @Override public long fileSizeInBytes() { return dv.sizeInBytes(); @@ -619,22 +628,22 @@ public Map upperBounds() { @Override public DeleteFile copy() { - return this; + return new TrackedDVDeleteFile(file.copy(), spec); } @Override public DeleteFile copy(boolean withStats) { - return this; + return copy(); } @Override public DeleteFile copyWithoutStats() { - return this; + return copy(); } @Override public DeleteFile copyWithStats(Set requestedColumnIds) { - return this; + return copy(); } } } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 1a33de77276a..e432ce2b9d5d 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -469,7 +469,7 @@ void testPartitionExtractedWithBucketTransform() { } @Test - void testPartitionNullWhenNoContentStats() { + void testPartitionEmptyWhenNoContentStats() { Schema schema = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").build(); @@ -480,23 +480,27 @@ void testPartitionNullWhenNoContentStats() { file.set(6, spec.specId()); DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); - assertThat(dataFile.partition()).isNull(); + assertThat(dataFile.partition()).isNotNull(); + assertThat(dataFile.partition().size()).isEqualTo(1); + assertThat(dataFile.partition().get(0, Integer.class)).isNull(); } @Test - void testPartitionNullWhenNullSpec() { + void testPartitionEmptyWhenNullSpec() { TrackedFileStruct file = createTrackedFileWithStats(); DataFile dataFile = TrackedFileAdapters.asDataFile(file, null); - assertThat(dataFile.partition()).isNull(); + assertThat(dataFile.partition()).isNotNull(); + assertThat(dataFile.partition().size()).isEqualTo(0); } @Test - void testPartitionNullForUnpartitioned() { + void testPartitionEmptyForUnpartitioned() { PartitionSpec spec = PartitionSpec.unpartitioned(); TrackedFileStruct file = createTrackedFileWithStats(); DataFile dataFile = TrackedFileAdapters.asDataFile(file, spec); - assertThat(dataFile.partition()).isNull(); + assertThat(dataFile.partition()).isNotNull(); + assertThat(dataFile.partition().size()).isEqualTo(0); } @Test From 4ed70e1cf0f2c8a73bb9e10244717b612c850caa Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 10:13:12 -0700 Subject: [PATCH 05/22] Reorder --- .../apache/iceberg/TrackedFileAdapters.java | 16 ++++++------- .../iceberg/TestTrackedFileAdapters.java | 24 +++++++++++-------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index d9a7addf1288..a28dd872a713 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -49,14 +49,6 @@ static DataFile asDataFile(TrackedFile file, PartitionSpec spec) { return new TrackedDataFile(file, spec); } - static DeleteFile asDeleteFile(TrackedFile file, PartitionSpec spec) { - Preconditions.checkState( - file.contentType() == FileContent.EQUALITY_DELETES, - "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", - file.contentType()); - return new TrackedDeleteFile(file, spec); - } - static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { Preconditions.checkState( file.contentType() == FileContent.DATA, @@ -67,6 +59,14 @@ static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { return new TrackedDVDeleteFile(file, spec); } + static DeleteFile asEqualityDeleteFile(TrackedFile file, PartitionSpec spec) { + Preconditions.checkState( + file.contentType() == FileContent.EQUALITY_DELETES, + "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", + file.contentType()); + return new TrackedDeleteFile(file, spec); + } + // TODO: TrackedFile will likely get an explicit partition tuple field (using a union partition // schema), replacing this transform-based derivation. Once that lands, this method should be // removed and the adapter should read the tuple directly. diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index e432ce2b9d5d..b5f19dbbe920 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -64,7 +64,7 @@ void testAsDataFileRejectsNonData() { } @Test - void testAsDeleteFileValidatesContentType() { + void testAsEqualityDeleteFileValidatesContentType() { TrackedFileStruct file = new TrackedFileStruct( null, @@ -76,20 +76,22 @@ void testAsDeleteFileValidatesContentType() { file.set(6, 0); file.set(13, ImmutableList.of(1, 2)); - DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + DeleteFile deleteFile = + TrackedFileAdapters.asEqualityDeleteFile(file, PartitionSpec.unpartitioned()); assertThat(deleteFile).isNotNull(); assertThat(deleteFile.content()).isEqualTo(FileContent.EQUALITY_DELETES); assertThat(deleteFile.equalityFieldIds()).containsExactly(1, 2); } @Test - void testAsDeleteFileRejectsNonEqualityDeletes() { + void testAsEqualityDeleteFileRejectsNonEqualityDeletes() { TrackedFileStruct file = new TrackedFileStruct( null, FileContent.DATA, "s3://bucket/data.parquet", FileFormat.PARQUET, 100L, 1024L); file.set(6, 0); - assertThatThrownBy(() -> TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned())) + assertThatThrownBy( + () -> TrackedFileAdapters.asEqualityDeleteFile(file, PartitionSpec.unpartitioned())) .isInstanceOf(IllegalStateException.class) .hasMessage( "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", @@ -293,7 +295,7 @@ void testDataFileAdapterDelegatesAllFields() { } @Test - void testDeleteFileAdapterDelegatesAllFields() { + void testEqualityDeleteFileAdapterDelegatesAllFields() { Types.StructType trackingWithPos = Types.StructType.of( ImmutableList.builder() @@ -324,7 +326,8 @@ void testDeleteFileAdapterDelegatesAllFields() { file.set(12, ImmutableList.of(200L)); file.set(13, ImmutableList.of(1, 2, 3)); - DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + DeleteFile deleteFile = + TrackedFileAdapters.asEqualityDeleteFile(file, PartitionSpec.unpartitioned()); assertThat(deleteFile.pos()).isEqualTo(5L); assertThat(deleteFile.specId()).isEqualTo(1); @@ -378,12 +381,13 @@ void testDataFileAdapterStatsFromContentStats() { } @Test - void testDeleteFileAdapterStatsFromContentStats() { + void testEqualityDeleteFileAdapterStatsFromContentStats() { TrackedFileStruct file = createTrackedFileWithStats(); file.set(1, FileContent.EQUALITY_DELETES.id()); file.set(13, ImmutableList.of(1)); - DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, PartitionSpec.unpartitioned()); + DeleteFile deleteFile = + TrackedFileAdapters.asEqualityDeleteFile(file, PartitionSpec.unpartitioned()); assertThat(deleteFile.valueCounts()).containsOnly(entry(1, 100L), entry(2, 200L)); assertThat(deleteFile.nullValueCounts()).containsOnly(entry(1, 5L), entry(2, 10L)); @@ -585,7 +589,7 @@ void testPartitionWithVoidTransform() { } @Test - void testDeleteFilePartitionExtracted() { + void testEqualityDeleteFilePartitionExtracted() { Schema schema = new Schema( Types.NestedField.required(1, "id", Types.IntegerType.get()), @@ -597,7 +601,7 @@ void testDeleteFilePartitionExtracted() { file.set(1, FileContent.EQUALITY_DELETES.id()); file.set(13, ImmutableList.of(1)); - DeleteFile deleteFile = TrackedFileAdapters.asDeleteFile(file, spec); + DeleteFile deleteFile = TrackedFileAdapters.asEqualityDeleteFile(file, spec); StructLike partition = deleteFile.partition(); assertThat(partition).isNotNull(); From f9b5437fcfe08ac50645a359f4630aee3d5be848 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Sat, 14 Mar 2026 20:58:31 -0500 Subject: [PATCH 06/22] Core, Parquet: Allow for Writing Parquet/Avro Manifests in V4 - Parquet by Default Extends V4 Manifest writer to allow it to write manfiests in either Parquet or Avro based on the file extension. A default is also added to do Parquet Manifests in the SDK when the Version is 4. This could be parameterized later but that will requrie parameterizing the test suites so I decied on a single format (parquet) for now. There are a few other requried changes here outside of testing 1. Handling of splitOffsets in Parquet needs to be changed since BaseFile returns an immutable view which Parquet was attempting to re-use by clearing. 2. Unpartitioned Tables need special care since parquet cannot store empty structs in the schema. This means reading from parquet manfiests means skipping the parquet field and then changing read offsets if the partition is not defined. The read code is shared between all versions at this time so this change effects older avro readers as well. 3. Some of the tests code for TestReplacePartitions assumed that you could validate against a slightly different vesrion of the table. This is a problem if the table you make is partitioned and the validation table is unpartitioned. It use to work ... accidently I think because we would make unpartitioned operations committed to a partitioned table. --- .../org/apache/iceberg/ManifestBenchmark.java | 178 +++++----------- .../apache/iceberg/ManifestBenchmarkUtil.java | 118 ++++++++++ .../iceberg/ManifestCompressionBenchmark.java | 201 ++++++++++++++++++ .../apache/iceberg/ManifestReadBenchmark.java | 173 --------------- .../iceberg/ManifestWriteBenchmark.java | 173 --------------- .../org/apache/iceberg/ManifestReader.java | 30 ++- .../org/apache/iceberg/ManifestWriter.java | 58 +++-- .../org/apache/iceberg/SnapshotProducer.java | 6 +- .../org/apache/iceberg/TableMetadata.java | 1 + .../java/org/apache/iceberg/V4Metadata.java | 71 ++++--- .../java/org/apache/iceberg/TestBase.java | 27 ++- .../org/apache/iceberg/TestFastAppend.java | 8 +- .../apache/iceberg/TestManifestReader.java | 4 + .../apache/iceberg/TestManifestWriter.java | 11 +- .../iceberg/TestManifestWriterVersions.java | 125 ++++++++++- .../org/apache/iceberg/TestMergeAppend.java | 18 +- .../apache/iceberg/TestRewriteManifests.java | 70 +++--- .../apache/iceberg/TestSnapshotProducer.java | 9 + .../org/apache/iceberg/TestTransaction.java | 6 +- .../apache/iceberg/jdbc/TestJdbcCatalog.java | 2 +- .../iceberg/util/TestManifestFileUtil.java | 3 +- .../iceberg/parquet/ParquetValueReaders.java | 8 +- 22 files changed, 715 insertions(+), 585 deletions(-) create mode 100644 core/src/jmh/java/org/apache/iceberg/ManifestBenchmarkUtil.java create mode 100644 core/src/jmh/java/org/apache/iceberg/ManifestCompressionBenchmark.java delete mode 100644 core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java delete mode 100644 core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestBenchmark.java index cbd372b7a4ba..b48a2aa82a7f 100644 --- a/core/src/jmh/java/org/apache/iceberg/ManifestBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/ManifestBenchmark.java @@ -18,23 +18,15 @@ */ package org.apache.iceberg; -import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Random; import java.util.concurrent.TimeUnit; -import org.apache.commons.io.FileUtils; import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.types.Types; -import org.openjdk.jmh.annotations.AuxCounters; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -52,10 +44,16 @@ import org.openjdk.jmh.infra.Blackhole; /** - * A benchmark that measures manifest read/write performance across compression codecs. + * A benchmark that measures manifest read/write performance across format versions and file + * formats. * - *

Entry counts are calibrated per column count via {@link #ENTRY_BASE}. Set to 300_000 for ~8 MB - * manifests (matching the default {@code commit.manifest.target-size-bytes}) or 15_000 for ~400 KB. + *

V1-V3 only support Avro manifests. V4 supports both Avro and Parquet. The {@code + * versionFormat} parameter encodes valid combinations as {@code "_"} (e.g. {@code + * "4_PARQUET"}) so that only meaningful pairings are benchmarked. + * + *

Entry counts are calibrated per column count via {@link ManifestBenchmarkUtil#ENTRY_BASE}. Set + * to 300_000 for ~8 MB manifests (matching the default {@code commit.manifest.target-size-bytes}) + * or 15_000 for ~400 KB. * *

To run this benchmark: * @@ -63,34 +61,33 @@ * # all combinations * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestBenchmark * - * # single codec + * # V4-only (Avro vs Parquet) + * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestBenchmark \ + * -PjmhParams="versionFormat=4_AVRO|4_PARQUET" + * + * # all versions, single column count * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestBenchmark \ - * -PjmhParams="codec=gzip" + * -PjmhParams="numCols=50" + * + * # single version + * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestBenchmark \ + * -PjmhParams="versionFormat=3_AVRO" * } */ @Fork(1) @State(Scope.Benchmark) +// Parquet's columnar write path has a deep call graph (per-column encoders, page assembly, +// dictionary management) that requires more warmup iterations than Avro for the JIT compiler to +// fully optimize. Profiling shows ~650ms of JIT compilation spread across the first 3-4 +// iterations, so 6 warmups ensure measurement begins after JIT has stabilized. @Warmup(iterations = 6) @Measurement(iterations = 10) @BenchmarkMode(Mode.SingleShotTime) @Timeout(time = 10, timeUnit = TimeUnit.MINUTES) public class ManifestBenchmark { - static final int ENTRY_BASE = 300_000; - - private static final int FORMAT_VERSION = 4; - - private static final Schema SCHEMA = - new Schema( - Types.NestedField.required(1, "id", Types.IntegerType.get()), - Types.NestedField.required(2, "data", Types.StringType.get()), - Types.NestedField.required(3, "customer", Types.StringType.get())); - - private static final PartitionSpec SPEC = - PartitionSpec.builderFor(SCHEMA).identity("id").identity("data").identity("customer").build(); - - @Param({"gzip", "snappy", "zstd", "uncompressed"}) - private String codec; + @Param({"1_AVRO", "2_AVRO", "3_AVRO", "4_AVRO", "4_PARQUET"}) + private String versionFormat; @Param({"true", "false"}) private String partitioned; @@ -98,11 +95,11 @@ public class ManifestBenchmark { @Param({"10", "50", "100"}) private int numCols; + private int formatVersion; + private FileFormat fileFormat; private PartitionSpec spec; private Map specsById; - private Map writerProperties; private List dataFiles; - private int numEntries; private String writeBaseDir; private OutputFile writeOutputFile; @@ -112,21 +109,26 @@ public class ManifestBenchmark { @Setup(Level.Trial) public void setupTrial() { - this.spec = Boolean.parseBoolean(partitioned) ? SPEC : PartitionSpec.unpartitioned(); - this.specsById = Map.of(spec.specId(), spec); - this.writerProperties = Map.of(TableProperties.AVRO_COMPRESSION, codec); - // ENTRY_BASE / cols: empirically calibrated — 300_000 → ~8 MB, 15_000 → ~400 KB manifests - this.numEntries = ENTRY_BASE / numCols; - this.dataFiles = generateDataFiles(); + String[] parts = versionFormat.split("_", 2); + this.formatVersion = Integer.parseInt(parts[0]); + this.fileFormat = FileFormat.fromString(parts[1]); + this.spec = + Boolean.parseBoolean(partitioned) + ? ManifestBenchmarkUtil.SPEC + : PartitionSpec.unpartitioned(); + this.specsById = ImmutableMap.of(spec.specId(), spec); + int numEntries = ManifestBenchmarkUtil.entriesForColumnCount(numCols); + this.dataFiles = ManifestBenchmarkUtil.generateDataFiles(spec, numEntries, numCols); setupReadManifest(); } @Setup(Level.Invocation) public void setupWriteInvocation() throws IOException { - this.writeBaseDir = Files.createTempDirectory("bench-write-").toAbsolutePath().toString(); + this.writeBaseDir = + java.nio.file.Files.createTempDirectory("bench-write-").toAbsolutePath().toString(); this.writeOutputFile = - org.apache.iceberg.Files.localOutput( - String.format(Locale.ROOT, "%s/manifest.avro", writeBaseDir)); + Files.localOutput( + String.format(Locale.ROOT, "%s/%s", writeBaseDir, fileFormat.addExtension("manifest"))); for (DataFile file : dataFiles) { file.path(); @@ -137,7 +139,7 @@ public void setupWriteInvocation() throws IOException { @TearDown(Level.Trial) public void tearDownTrial() { - cleanDir(readBaseDir); + ManifestBenchmarkUtil.cleanDir(readBaseDir); readBaseDir = null; readManifest = null; dataFiles = null; @@ -145,28 +147,15 @@ public void tearDownTrial() { @TearDown(Level.Invocation) public void tearDownInvocation() { - cleanDir(writeBaseDir); + ManifestBenchmarkUtil.cleanDir(writeBaseDir); writeBaseDir = null; writeOutputFile = null; } - @AuxCounters(AuxCounters.Type.EVENTS) - @State(Scope.Thread) - @SuppressWarnings("checkstyle:VisibilityModifier") - public static class FileSizeCounters { - public double manifestSizeMB; - - @Setup(Level.Invocation) - public void reset() { - manifestSizeMB = 0; - } - } - @Benchmark @Threads(1) - public ManifestFile writeManifest(FileSizeCounters counters) throws IOException { - ManifestWriter writer = - ManifestFiles.write(FORMAT_VERSION, spec, writeOutputFile, 1L, writerProperties); + public ManifestFile writeManifest() throws IOException { + ManifestWriter writer = ManifestFiles.write(formatVersion, spec, writeOutputFile, 1L); try (ManifestWriter w = writer) { for (DataFile file : dataFiles) { @@ -174,9 +163,7 @@ public ManifestFile writeManifest(FileSizeCounters counters) throws IOException } } - ManifestFile manifest = writer.toManifestFile(); - counters.manifestSizeMB = manifest.length() / (1024.0 * 1024.0); - return manifest; + return writer.toManifestFile(); } @Benchmark @@ -193,17 +180,17 @@ public void readManifest(Blackhole blackhole) throws IOException { private void setupReadManifest() { try { - this.readBaseDir = Files.createTempDirectory("bench-read-").toAbsolutePath().toString(); + this.readBaseDir = + java.nio.file.Files.createTempDirectory("bench-read-").toAbsolutePath().toString(); } catch (IOException e) { throw new UncheckedIOException(e); } OutputFile manifestFile = - org.apache.iceberg.Files.localOutput( - String.format(Locale.ROOT, "%s/manifest.avro", readBaseDir)); + Files.localOutput( + String.format(Locale.ROOT, "%s/%s", readBaseDir, fileFormat.addExtension("manifest"))); - ManifestWriter writer = - ManifestFiles.write(FORMAT_VERSION, spec, manifestFile, 1L, writerProperties); + ManifestWriter writer = ManifestFiles.write(formatVersion, spec, manifestFile, 1L); try (ManifestWriter w = writer) { for (DataFile file : dataFiles) { @@ -215,65 +202,4 @@ private void setupReadManifest() { this.readManifest = writer.toManifestFile(); } - - private List generateDataFiles() { - Random random = new Random(42); - List files = Lists.newArrayListWithCapacity(numEntries); - for (int i = 0; i < numEntries; i++) { - DataFiles.Builder builder = - DataFiles.builder(spec) - .withFormat(FileFormat.PARQUET) - .withPath(String.format(Locale.ROOT, "/path/to/data-%d.parquet", i)) - .withFileSizeInBytes(1024 + i) - .withRecordCount(1000 + i) - .withMetrics(randomMetrics(random, numCols)); - - if (!spec.isUnpartitioned()) { - builder.withPartitionPath( - String.format( - Locale.ROOT, "id=%d/data=val-%d/customer=cust-%d", i % 100, i % 50, i % 200)); - } - - files.add(builder.build()); - } - - return files; - } - - static Metrics randomMetrics(Random random, int cols) { - long rowCount = 100_000L + random.nextInt(1000); - Map columnSizes = Maps.newHashMap(); - Map valueCounts = Maps.newHashMap(); - Map nullValueCounts = Maps.newHashMap(); - Map nanValueCounts = Maps.newHashMap(); - Map lowerBounds = Maps.newHashMap(); - Map upperBounds = Maps.newHashMap(); - for (int i = 0; i < cols; i++) { - columnSizes.put(i, 1_000_000L + random.nextInt(100_000)); - valueCounts.put(i, 100_000L + random.nextInt(100)); - nullValueCounts.put(i, (long) random.nextInt(5)); - nanValueCounts.put(i, (long) random.nextInt(5)); - byte[] lower = new byte[8]; - random.nextBytes(lower); - lowerBounds.put(i, ByteBuffer.wrap(lower)); - byte[] upper = new byte[8]; - random.nextBytes(upper); - upperBounds.put(i, ByteBuffer.wrap(upper)); - } - - return new Metrics( - rowCount, - columnSizes, - valueCounts, - nullValueCounts, - nanValueCounts, - lowerBounds, - upperBounds); - } - - private static void cleanDir(String dir) { - if (dir != null) { - FileUtils.deleteQuietly(new File(dir)); - } - } } diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestBenchmarkUtil.java b/core/src/jmh/java/org/apache/iceberg/ManifestBenchmarkUtil.java new file mode 100644 index 000000000000..64602ad0a8b1 --- /dev/null +++ b/core/src/jmh/java/org/apache/iceberg/ManifestBenchmarkUtil.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Random; +import org.apache.commons.io.FileUtils; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Types; + +/** + * Shared constants and stateless helpers for {@link ManifestBenchmark} and {@link + * ManifestCompressionBenchmark}. + */ +final class ManifestBenchmarkUtil { + + /** Scale factor for entry counts. 300_000 yields ~8 MB manifests; 15_000 yields ~400 KB. */ + static final int ENTRY_BASE = 300_000; + + static final Schema SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "data", Types.StringType.get()), + Types.NestedField.required(3, "customer", Types.StringType.get())); + + static final PartitionSpec SPEC = + PartitionSpec.builderFor(SCHEMA).identity("id").identity("data").identity("customer").build(); + + private ManifestBenchmarkUtil() {} + + /** + * Returns the number of manifest entries for the given column count, scaled by {@link + * #ENTRY_BASE}. + */ + static int entriesForColumnCount(int cols) { + return ENTRY_BASE / cols; + } + + static List generateDataFiles(PartitionSpec spec, int numEntries, int numCols) { + Random random = new Random(42); + List files = Lists.newArrayListWithCapacity(numEntries); + for (int i = 0; i < numEntries; i++) { + DataFiles.Builder builder = + DataFiles.builder(spec) + .withFormat(FileFormat.PARQUET) + .withPath(String.format(Locale.ROOT, "/path/to/data-%d.parquet", i)) + .withFileSizeInBytes(1024 + i) + .withRecordCount(1000 + i) + .withMetrics(randomMetrics(random, numCols)); + + if (!spec.isUnpartitioned()) { + builder.withPartitionPath( + String.format( + Locale.ROOT, "id=%d/data=val-%d/customer=cust-%d", i % 100, i % 50, i % 200)); + } + + files.add(builder.build()); + } + return files; + } + + static Metrics randomMetrics(Random random, int cols) { + long rowCount = 100_000L + random.nextInt(1000); + Map columnSizes = Maps.newHashMap(); + Map valueCounts = Maps.newHashMap(); + Map nullValueCounts = Maps.newHashMap(); + Map nanValueCounts = Maps.newHashMap(); + Map lowerBounds = Maps.newHashMap(); + Map upperBounds = Maps.newHashMap(); + for (int i = 0; i < cols; i++) { + columnSizes.put(i, 1_000_000L + random.nextInt(100_000)); + valueCounts.put(i, 100_000L + random.nextInt(100)); + nullValueCounts.put(i, (long) random.nextInt(5)); + nanValueCounts.put(i, (long) random.nextInt(5)); + byte[] lower = new byte[8]; + random.nextBytes(lower); + lowerBounds.put(i, ByteBuffer.wrap(lower)); + byte[] upper = new byte[8]; + random.nextBytes(upper); + upperBounds.put(i, ByteBuffer.wrap(upper)); + } + + return new Metrics( + rowCount, + columnSizes, + valueCounts, + nullValueCounts, + nanValueCounts, + lowerBounds, + upperBounds); + } + + static void cleanDir(String dir) { + if (dir != null) { + FileUtils.deleteQuietly(new java.io.File(dir)); + } + } +} diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestCompressionBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestCompressionBenchmark.java new file mode 100644 index 000000000000..7ba9e47c611b --- /dev/null +++ b/core/src/jmh/java/org/apache/iceberg/ManifestCompressionBenchmark.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.iceberg.io.CloseableIterator; +import org.apache.iceberg.io.OutputFile; +import org.openjdk.jmh.annotations.AuxCounters; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Timeout; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * A benchmark that measures manifest read/write performance across compression codecs. + * + *

Entry counts are calibrated per column count via {@link ManifestBenchmarkUtil#ENTRY_BASE}. Set + * to 300_000 for ~8 MB manifests (matching the default {@code commit.manifest.target-size-bytes}) + * or 15_000 for ~400 KB. + * + *

To run this benchmark: + * + *

{@code
+ * # all combinations
+ * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestCompressionBenchmark
+ *
+ * # single codec
+ * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestCompressionBenchmark \
+ *     -PjmhParams="codec=gzip"
+ * }
+ */ +@Fork(1) +@State(Scope.Benchmark) +@Warmup(iterations = 6) +@Measurement(iterations = 10) +@BenchmarkMode(Mode.SingleShotTime) +@Timeout(time = 10, timeUnit = TimeUnit.MINUTES) +public class ManifestCompressionBenchmark { + + private static final int FORMAT_VERSION = 4; + + @Param({"gzip", "snappy", "zstd", "uncompressed"}) + private String codec; + + @Param({"true", "false"}) + private String partitioned; + + @Param({"10", "50", "100"}) + private int numCols; + + private PartitionSpec spec; + private Map specsById; + private Map writerProperties; + private List dataFiles; + + private String writeBaseDir; + private OutputFile writeOutputFile; + + private String readBaseDir; + private ManifestFile readManifest; + + @Setup(Level.Trial) + public void setupTrial() { + this.spec = + Boolean.parseBoolean(partitioned) + ? ManifestBenchmarkUtil.SPEC + : PartitionSpec.unpartitioned(); + this.specsById = Map.of(spec.specId(), spec); + this.writerProperties = Map.of(TableProperties.AVRO_COMPRESSION, codec); + int numEntries = ManifestBenchmarkUtil.entriesForColumnCount(numCols); + this.dataFiles = ManifestBenchmarkUtil.generateDataFiles(spec, numEntries, numCols); + setupReadManifest(); + } + + @Setup(Level.Invocation) + public void setupWriteInvocation() throws IOException { + this.writeBaseDir = + java.nio.file.Files.createTempDirectory("bench-write-").toAbsolutePath().toString(); + this.writeOutputFile = + Files.localOutput(String.format(Locale.ROOT, "%s/manifest.avro", writeBaseDir)); + + for (DataFile file : dataFiles) { + file.path(); + file.fileSizeInBytes(); + file.recordCount(); + } + } + + @TearDown(Level.Trial) + public void tearDownTrial() { + ManifestBenchmarkUtil.cleanDir(readBaseDir); + readBaseDir = null; + readManifest = null; + dataFiles = null; + } + + @TearDown(Level.Invocation) + public void tearDownInvocation() { + ManifestBenchmarkUtil.cleanDir(writeBaseDir); + writeBaseDir = null; + writeOutputFile = null; + } + + @AuxCounters(AuxCounters.Type.EVENTS) + @State(Scope.Thread) + @SuppressWarnings("checkstyle:VisibilityModifier") + public static class FileSizeCounters { + public double manifestSizeMB; + + @Setup(Level.Invocation) + public void reset() { + manifestSizeMB = 0; + } + } + + @Benchmark + @Threads(1) + public ManifestFile writeManifest(FileSizeCounters counters) throws IOException { + ManifestWriter writer = + ManifestFiles.write(FORMAT_VERSION, spec, writeOutputFile, 1L, writerProperties); + + try (ManifestWriter w = writer) { + for (DataFile file : dataFiles) { + w.add(file); + } + } + + ManifestFile manifest = writer.toManifestFile(); + counters.manifestSizeMB = manifest.length() / (1024.0 * 1024.0); + return manifest; + } + + @Benchmark + @Threads(1) + public void readManifest(Blackhole blackhole) throws IOException { + TestTables.LocalFileIO fileIO = new TestTables.LocalFileIO(); + try (CloseableIterator it = + ManifestFiles.read(readManifest, fileIO, specsById).iterator()) { + while (it.hasNext()) { + blackhole.consume(it.next()); + } + } + } + + private void setupReadManifest() { + try { + this.readBaseDir = + java.nio.file.Files.createTempDirectory("bench-read-").toAbsolutePath().toString(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + OutputFile manifestFile = + Files.localOutput(String.format(Locale.ROOT, "%s/manifest.avro", readBaseDir)); + + ManifestWriter writer = + ManifestFiles.write(FORMAT_VERSION, spec, manifestFile, 1L, writerProperties); + + try (ManifestWriter w = writer) { + for (DataFile file : dataFiles) { + w.add(file); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + this.readManifest = writer.toManifestFile(); + } +} diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java deleted file mode 100644 index 588b5df1ba97..000000000000 --- a/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.stream.Stream; -import org.apache.iceberg.encryption.PlaintextEncryptionManager; -import org.apache.iceberg.io.CloseableIterator; -import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.annotations.Threads; -import org.openjdk.jmh.annotations.Timeout; - -@Fork(1) -@State(Scope.Benchmark) -@Measurement(iterations = 5) -@BenchmarkMode(Mode.SingleShotTime) -@Timeout(time = 1000, timeUnit = TimeUnit.HOURS) -public class ManifestReadBenchmark { - - private static final int NUM_FILES = 10; - private static final int NUM_ROWS = 100000; - private static final int NUM_COLS = 10; - - private String baseDir; - private String manifestListFile; - - @Setup - public void before() { - baseDir = - Paths.get(new File(System.getProperty("java.io.tmpdir")).getAbsolutePath()).toString(); - manifestListFile = String.format("%s/%s.avro", baseDir, UUID.randomUUID()); - - Random random = new Random(System.currentTimeMillis()); - - try (ManifestListWriter listWriter = - ManifestLists.write( - 1, - org.apache.iceberg.Files.localOutput(manifestListFile), - PlaintextEncryptionManager.instance(), - 0, - 1L, - 0, - 0L)) { - for (int i = 0; i < NUM_FILES; i++) { - OutputFile manifestFile = - org.apache.iceberg.Files.localOutput( - String.format("%s/%s.avro", baseDir, UUID.randomUUID())); - - ManifestWriter writer = - ManifestFiles.write(1, PartitionSpec.unpartitioned(), manifestFile, 1L); - try (ManifestWriter finalWriter = writer) { - for (int j = 0; j < NUM_ROWS; j++) { - DataFile dataFile = - DataFiles.builder(PartitionSpec.unpartitioned()) - .withFormat(FileFormat.PARQUET) - .withPath(String.format("/path/to/data-%s-%s.parquet", i, j)) - .withFileSizeInBytes(j) - .withRecordCount(j) - .withMetrics(randomMetrics(random)) - .build(); - finalWriter.add(dataFile); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - - listWriter.add(writer.toManifestFile()); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @TearDown - public void after() throws IOException { - if (baseDir != null) { - try (Stream walk = Files.walk(Paths.get(baseDir))) { - walk.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); - } - baseDir = null; - } - - manifestListFile = null; - } - - @Benchmark - @Threads(1) - public void readManifestFile() throws IOException { - List manifests = - ManifestLists.read(org.apache.iceberg.Files.localInput(manifestListFile)); - TestTables.LocalFileIO fileIO = new TestTables.LocalFileIO(); - Map specs = - ImmutableMap.of(PartitionSpec.unpartitioned().specId(), PartitionSpec.unpartitioned()); - for (ManifestFile manifestFile : manifests) { - ManifestReader reader = ManifestFiles.read(manifestFile, fileIO, specs); - try (CloseableIterator it = reader.iterator()) { - while (it.hasNext()) { - it.next().recordCount(); - } - } - } - } - - private Metrics randomMetrics(Random random) { - long rowCount = 100000L + random.nextInt(1000); - Map columnSizes = Maps.newHashMap(); - Map valueCounts = Maps.newHashMap(); - Map nullValueCounts = Maps.newHashMap(); - Map nanValueCounts = Maps.newHashMap(); - Map lowerBounds = Maps.newHashMap(); - Map upperBounds = Maps.newHashMap(); - for (int i = 0; i < NUM_COLS; i++) { - columnSizes.put(i, 1000000L + random.nextInt(100000)); - valueCounts.put(i, 100000L + random.nextInt(100)); - nullValueCounts.put(i, (long) random.nextInt(5)); - nanValueCounts.put(i, (long) random.nextInt(5)); - byte[] lower = new byte[8]; - random.nextBytes(lower); - lowerBounds.put(i, ByteBuffer.wrap(lower)); - byte[] upper = new byte[8]; - random.nextBytes(upper); - upperBounds.put(i, ByteBuffer.wrap(upper)); - } - - return new Metrics( - rowCount, - columnSizes, - valueCounts, - nullValueCounts, - nanValueCounts, - lowerBounds, - upperBounds); - } -} diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java deleted file mode 100644 index b0dab63dea06..000000000000 --- a/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.util.Map; -import java.util.Random; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import org.apache.commons.io.FileUtils; -import org.apache.iceberg.encryption.PlaintextEncryptionManager; -import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.annotations.Threads; -import org.openjdk.jmh.annotations.Timeout; - -/** - * A benchmark that evaluates the performance of writing manifest files - * - *

To run this benchmark: - * ./gradlew :iceberg-core:jmh -PjmhIncludeRegex=ManifestWriteBenchmark - * - */ -@Fork(1) -@State(Scope.Benchmark) -@Measurement(iterations = 5) -@BenchmarkMode(Mode.SingleShotTime) -@Timeout(time = 5, timeUnit = TimeUnit.MINUTES) -public class ManifestWriteBenchmark { - - private static final int NUM_FILES = 10; - private static final int NUM_ROWS = 100000; - private static final int NUM_COLS = 100; - - private String baseDir; - private String manifestListFile; - - private Metrics metrics; - - @Setup - public void before() { - Random random = new Random(System.currentTimeMillis()); - // Pre-create the metrics to avoid doing this in the benchmark itself - metrics = randomMetrics(random); - } - - @TearDown - public void after() { - if (baseDir != null) { - FileUtils.deleteQuietly(new File(baseDir)); - baseDir = null; - } - - manifestListFile = null; - } - - @State(Scope.Benchmark) - public static class BenchmarkState { - @Param({"1", "2"}) - private int formatVersion; - - public int getFormatVersion() { - return formatVersion; - } - } - - @Benchmark - @Threads(1) - public void writeManifestFile(BenchmarkState state) throws IOException { - this.baseDir = - java.nio.file.Files.createTempDirectory("benchmark-").toAbsolutePath().toString(); - this.manifestListFile = String.format("%s/%s.avro", baseDir, UUID.randomUUID()); - - try (ManifestListWriter listWriter = - ManifestLists.write( - state.getFormatVersion(), - org.apache.iceberg.Files.localOutput(manifestListFile), - PlaintextEncryptionManager.instance(), - 0, - 1L, - 0, - 0L)) { - for (int i = 0; i < NUM_FILES; i++) { - OutputFile manifestFile = - org.apache.iceberg.Files.localOutput( - String.format("%s/%s.avro", baseDir, UUID.randomUUID())); - - ManifestWriter writer = - ManifestFiles.write( - state.formatVersion, PartitionSpec.unpartitioned(), manifestFile, 1L); - try (ManifestWriter finalWriter = writer) { - for (int j = 0; j < NUM_ROWS; j++) { - DataFile dataFile = - DataFiles.builder(PartitionSpec.unpartitioned()) - .withFormat(FileFormat.PARQUET) - .withPath(String.format("/path/to/data-%s-%s.parquet", i, j)) - .withFileSizeInBytes(j) - .withRecordCount(j) - .withMetrics(metrics) - .build(); - finalWriter.add(dataFile); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - - listWriter.add(writer.toManifestFile()); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private Metrics randomMetrics(Random random) { - long rowCount = 100000L + random.nextInt(1000); - Map columnSizes = Maps.newHashMap(); - Map valueCounts = Maps.newHashMap(); - Map nullValueCounts = Maps.newHashMap(); - Map nanValueCounts = Maps.newHashMap(); - Map lowerBounds = Maps.newHashMap(); - Map upperBounds = Maps.newHashMap(); - for (int i = 0; i < NUM_COLS; i++) { - columnSizes.put(i, 1000000L + random.nextInt(100000)); - valueCounts.put(i, 100000L + random.nextInt(100)); - nullValueCounts.put(i, (long) random.nextInt(5)); - nanValueCounts.put(i, (long) random.nextInt(5)); - byte[] lower = new byte[8]; - random.nextBytes(lower); - lowerBounds.put(i, ByteBuffer.wrap(lower)); - byte[] upper = new byte[8]; - random.nextBytes(upper); - upperBounds.put(i, ByteBuffer.wrap(upper)); - } - - return new Metrics( - rowCount, - columnSizes, - valueCounts, - nullValueCounts, - nanValueCounts, - lowerBounds, - upperBounds); - } -} diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java b/core/src/main/java/org/apache/iceberg/ManifestReader.java index 668a3764de1d..8e483fca8775 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestReader.java +++ b/core/src/main/java/org/apache/iceberg/ManifestReader.java @@ -154,6 +154,12 @@ private > PartitionSpec readPartitionSpec(InputFile inp } private static > Map readMetadata(InputFile inputFile) { + FileFormat manifestFormat = FileFormat.fromFileName(inputFile.location()); + Preconditions.checkArgument( + manifestFormat == FileFormat.AVRO, + "Reading manifest metadata is only supported for Avro manifests: %s", + inputFile.location()); + Map metadata; try { try (CloseableIterable> headerReader = @@ -281,6 +287,8 @@ private CloseableIterable> open(Schema projection) { Preconditions.checkArgument( format != null, "Unable to determine format of manifest: %s", file.location()); + boolean unpartitioned = spec.rawPartitionType().fields().isEmpty(); + List fields = Lists.newArrayList(); fields.addAll(projection.asStruct().fields()); if (projection.findField(DataFile.RECORD_COUNT.fieldId()) == null) { @@ -291,14 +299,26 @@ private CloseableIterable> open(Schema projection) { } fields.add(MetadataColumns.ROW_POSITION); - CloseableIterable> reader = + // V4+ manifests omit the partition field when unpartitioned (Parquet cannot represent + // empty structs, and the field is meaningless regardless of format). For older versions + // the empty struct is present but safe to skip. + if (unpartitioned) { + fields.removeIf(f -> f.fieldId() == DataFile.PARTITION_ID); + } + + InternalData.ReadBuilder readBuilder = InternalData.read(format, file) .project(ManifestEntry.wrapFileSchema(Types.StructType.of(fields))) .setRootType(GenericManifestEntry.class) - .setCustomType(ManifestEntry.DATA_FILE_ID, content.fileClass()) - .setCustomType(DataFile.PARTITION_ID, PartitionData.class) - .reuseContainers() - .build(); + .setCustomType(ManifestEntry.DATA_FILE_ID, content.fileClass()); + + if (!unpartitioned) { + readBuilder.setCustomType(DataFile.PARTITION_ID, PartitionData.class); + } + + readBuilder.reuseContainers(); + + CloseableIterable> reader = readBuilder.build(); addCloseable(reader); diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index 7d85f991b080..07b9f0209074 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -24,6 +24,7 @@ import org.apache.iceberg.encryption.EncryptedOutputFile; import org.apache.iceberg.encryption.EncryptionKeyMetadata; import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; +import org.apache.iceberg.encryption.NativeEncryptionOutputFile; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; @@ -40,6 +41,7 @@ public abstract class ManifestWriter> implements FileAp // this is replaced when writing a manifest list by the ManifestFile wrapper static final long UNASSIGNED_SEQ = -1L; + private final FileFormat format; private final OutputFile file; private final EncryptionKeyMetadata keyMetadata; private final int specId; @@ -65,7 +67,8 @@ private ManifestWriter( Long snapshotId, Long firstRowId, Map writerProperties) { - this.file = file.encryptingOutputFile(); + this.format = FileFormat.fromFileName(file.encryptingOutputFile().location()); + this.file = outputFile(file); this.specId = spec.specId(); this.writerProperties = writerProperties; this.writer = newAppender(spec, this.file); @@ -82,6 +85,19 @@ private ManifestWriter( protected abstract FileAppender> newAppender( PartitionSpec spec, OutputFile outputFile); + private OutputFile outputFile(EncryptedOutputFile encryptedFile) { + // Casting to NativeEncryptionOutputFile actually makes the file rely on native encryption + // rather than whole-file encryption. + if (format == FileFormat.PARQUET && encryptedFile instanceof NativeEncryptionOutputFile) { + return (NativeEncryptionOutputFile) encryptedFile; + } + return encryptedFile.encryptingOutputFile(); + } + + protected FileFormat format() { + return format; + } + protected Map writerProperties() { return writerProperties; } @@ -206,16 +222,7 @@ public long length() { public ManifestFile toManifestFile() { Preconditions.checkState(closed, "Cannot build ManifestFile, writer is not closed"); - ByteBuffer keyMetadataBuffer; - if (keyMetadata instanceof NativeEncryptionKeyMetadata) { - // File length is required by AES GCM Stream encryption, to prevent file truncation attacks - keyMetadataBuffer = - ((NativeEncryptionKeyMetadata) keyMetadata).copyWithLength(length()).buffer(); - } else if (keyMetadata != null) { - keyMetadataBuffer = keyMetadata.buffer(); - } else { - keyMetadataBuffer = null; - } + ByteBuffer keyMetadataBuffer = keyMetadataBuffer(); // if the minSequenceNumber is null, then no manifests with a sequence number have been written, // so the min data sequence number is the one that will be assigned when this is committed. @@ -240,6 +247,17 @@ public ManifestFile toManifestFile() { firstRowId); } + private ByteBuffer keyMetadataBuffer() { + if (keyMetadata instanceof NativeEncryptionKeyMetadata && format == FileFormat.AVRO) { + // Whole-file encryption needs the file length embedded for GCM truncation protection. + // Formats with native encryption (like Parquet) handle this directly and don't need it. + return ((NativeEncryptionKeyMetadata) keyMetadata).copyWithLength(length()).buffer(); + } else if (keyMetadata != null) { + return keyMetadata.buffer(); + } + return null; + } + @Override public void close() throws IOException { this.closed = true; @@ -256,7 +274,7 @@ static class V4Writer extends ManifestWriter { Long firstRowId, Map writerProperties) { super(spec, file, snapshotId, firstRowId, writerProperties); - this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId); + this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType()); } @Override @@ -269,7 +287,7 @@ protected FileAppender> newAppender( PartitionSpec spec, OutputFile file) { Schema manifestSchema = V4Metadata.entrySchema(spec.partitionType()); try { - return InternalData.write(FileFormat.AVRO, file) + return InternalData.write(format(), file) .schema(manifestSchema) .named("manifest_entry") .meta("schema", SchemaParser.toJson(spec.schema())) @@ -296,7 +314,7 @@ static class V4DeleteWriter extends ManifestWriter { Long snapshotId, Map writerProperties) { super(spec, file, snapshotId, null, writerProperties); - this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId); + this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType()); } @Override @@ -309,7 +327,7 @@ protected FileAppender> newAppender( PartitionSpec spec, OutputFile file) { Schema manifestSchema = V4Metadata.entrySchema(spec.partitionType()); try { - return InternalData.write(FileFormat.AVRO, file) + return InternalData.write(format(), file) .schema(manifestSchema) .named("manifest_entry") .meta("schema", SchemaParser.toJson(spec.schema())) @@ -342,6 +360,8 @@ static class V3Writer extends ManifestWriter { Long firstRowId, Map writerProperties) { super(spec, file, snapshotId, firstRowId, writerProperties); + Preconditions.checkArgument( + format() == FileFormat.AVRO, "V3 manifests must use Avro, but got: %s", format()); this.entryWrapper = new V3Metadata.ManifestEntryWrapper<>(snapshotId); } @@ -382,6 +402,8 @@ static class V3DeleteWriter extends ManifestWriter { Long snapshotId, Map writerProperties) { super(spec, file, snapshotId, null, writerProperties); + Preconditions.checkArgument( + format() == FileFormat.AVRO, "V3 manifests must use Avro, but got: %s", format()); this.entryWrapper = new V3Metadata.ManifestEntryWrapper<>(snapshotId); } @@ -427,6 +449,8 @@ static class V2Writer extends ManifestWriter { Long snapshotId, Map writerProperties) { super(spec, file, snapshotId, null, writerProperties); + Preconditions.checkArgument( + format() == FileFormat.AVRO, "V2 manifests must use Avro, but got: %s", format()); this.entryWrapper = new V2Metadata.ManifestEntryWrapper<>(snapshotId); } @@ -467,6 +491,8 @@ static class V2DeleteWriter extends ManifestWriter { Long snapshotId, Map writerProperties) { super(spec, file, snapshotId, null, writerProperties); + Preconditions.checkArgument( + format() == FileFormat.AVRO, "V2 manifests must use Avro, but got: %s", format()); this.entryWrapper = new V2Metadata.ManifestEntryWrapper<>(snapshotId); } @@ -512,6 +538,8 @@ static class V1Writer extends ManifestWriter { Long snapshotId, Map writerProperties) { super(spec, file, snapshotId, null, writerProperties); + Preconditions.checkArgument( + format() == FileFormat.AVRO, "V1 manifests must use Avro, but got: %s", format()); this.entryWrapper = new V1Metadata.ManifestEntryWrapper(); } diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 6ba10e8049f6..108ec73e9366 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -601,9 +601,13 @@ protected OutputFile manifestListPath() { } protected EncryptedOutputFile newManifestOutputFile() { + FileFormat manifestFormat = + ops.current().formatVersion() >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS + ? FileFormat.PARQUET + : FileFormat.AVRO; String manifestFileLocation = ops.metadataFileLocation( - FileFormat.AVRO.addExtension(commitUUID + "-m" + manifestCount.getAndIncrement())); + manifestFormat.addExtension(commitUUID + "-m" + manifestCount.getAndIncrement())); return EncryptingFileIO.combine(ops.io(), ops.encryption()) .newEncryptingOutputFile(manifestFileLocation); } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 43a67dd2bef2..c4a7bfc5c83c 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -57,6 +57,7 @@ public class TableMetadata implements Serializable { static final int DEFAULT_TABLE_FORMAT_VERSION = 2; static final int SUPPORTED_TABLE_FORMAT_VERSION = 4; static final int MIN_FORMAT_VERSION_ROW_LINEAGE = 3; + static final int MIN_FORMAT_VERSION_PARQUET_MANIFESTS = 4; static final int INITIAL_SPEC_ID = 0; static final int INITIAL_SORT_ORDER_ID = 1; static final int INITIAL_SCHEMA_ID = 0; diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 67478290aa10..2637c5186e6c 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; class V4Metadata { @@ -278,28 +279,35 @@ static Schema wrapFileSchema(Types.StructType fileSchema) { } static Types.StructType fileType(Types.StructType partitionType) { - return Types.StructType.of( - DataFile.CONTENT.asRequired(), - DataFile.FILE_PATH, - DataFile.FILE_FORMAT, - required( - DataFile.PARTITION_ID, DataFile.PARTITION_NAME, partitionType, DataFile.PARTITION_DOC), - DataFile.RECORD_COUNT, - DataFile.FILE_SIZE, - DataFile.COLUMN_SIZES, - DataFile.VALUE_COUNTS, - DataFile.NULL_VALUE_COUNTS, - DataFile.NAN_VALUE_COUNTS, - DataFile.LOWER_BOUNDS, - DataFile.UPPER_BOUNDS, - DataFile.KEY_METADATA, - DataFile.SPLIT_OFFSETS, - DataFile.EQUALITY_IDS, - DataFile.SORT_ORDER_ID, - DataFile.FIRST_ROW_ID, - DataFile.REFERENCED_DATA_FILE, - DataFile.CONTENT_OFFSET, - DataFile.CONTENT_SIZE); + List fields = Lists.newArrayList(); + fields.add(DataFile.CONTENT.asRequired()); + fields.add(DataFile.FILE_PATH); + fields.add(DataFile.FILE_FORMAT); + if (!partitionType.fields().isEmpty()) { + fields.add( + required( + DataFile.PARTITION_ID, + DataFile.PARTITION_NAME, + partitionType, + DataFile.PARTITION_DOC)); + } + fields.add(DataFile.RECORD_COUNT); + fields.add(DataFile.FILE_SIZE); + fields.add(DataFile.COLUMN_SIZES); + fields.add(DataFile.VALUE_COUNTS); + fields.add(DataFile.NULL_VALUE_COUNTS); + fields.add(DataFile.NAN_VALUE_COUNTS); + fields.add(DataFile.LOWER_BOUNDS); + fields.add(DataFile.UPPER_BOUNDS); + fields.add(DataFile.KEY_METADATA); + fields.add(DataFile.SPLIT_OFFSETS); + fields.add(DataFile.EQUALITY_IDS); + fields.add(DataFile.SORT_ORDER_ID); + fields.add(DataFile.FIRST_ROW_ID); + fields.add(DataFile.REFERENCED_DATA_FILE); + fields.add(DataFile.CONTENT_OFFSET); + fields.add(DataFile.CONTENT_SIZE); + return Types.StructType.of(fields); } static class ManifestEntryWrapper> @@ -309,10 +317,10 @@ static class ManifestEntryWrapper> private final DataFileWrapper fileWrapper; private ManifestEntry wrapped = null; - ManifestEntryWrapper(Long commitSnapshotId) { - this.size = entrySchema(Types.StructType.of()).columns().size(); + ManifestEntryWrapper(Long commitSnapshotId, Types.StructType partitionType) { + this.size = entrySchema(partitionType).columns().size(); this.commitSnapshotId = commitSnapshotId; - this.fileWrapper = new DataFileWrapper<>(); + this.fileWrapper = new DataFileWrapper<>(partitionType); } public ManifestEntryWrapper wrap(ManifestEntry entry) { @@ -423,11 +431,15 @@ public ManifestEntry copyWithoutStats() { /** Wrapper used to write DataFile or DeleteFile to v4 metadata. */ static class DataFileWrapper> extends Delegates.DelegatingContentFile implements ContentFile, StructLike { + private static final int PARTITION_POSITION = 3; + private final int size; + private final boolean hasPartition; - DataFileWrapper() { + DataFileWrapper(Types.StructType partitionType) { super(null); - this.size = fileType(Types.StructType.of()).fields().size(); + this.hasPartition = !partitionType.fields().isEmpty(); + this.size = fileType(partitionType).fields().size(); } @SuppressWarnings("unchecked") @@ -452,7 +464,10 @@ public T get(int pos, Class javaClass) { } private Object get(int pos) { - switch (pos) { + // when the partition field is omitted, positions at or after where it would appear + // shift down by 1, so adjust back to the canonical field ordering + int adjusted = hasPartition ? pos : (pos >= PARTITION_POSITION ? pos + 1 : pos); + switch (adjusted) { case 0: return wrapped.content().id(); case 1: diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index 27b8a49d0497..d7410a15833e 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -263,7 +263,8 @@ List listManifestFiles(File tableDirToList) { .listFiles( (dir, name) -> !name.startsWith("snap") - && Files.getFileExtension(name).equalsIgnoreCase("avro"))); + && (Files.getFileExtension(name).equalsIgnoreCase("avro") + || Files.getFileExtension(name).equalsIgnoreCase("parquet")))); } List listManifestLists(File tableDirToList) { @@ -297,12 +298,22 @@ public TableMetadata readMetadata() { return TestTables.readMetadata("test"); } + static FileFormat manifestFormat(int version) { + return version >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS + ? FileFormat.PARQUET + : FileFormat.AVRO; + } + + FileFormat manifestFormat() { + return manifestFormat(formatVersion); + } + ManifestFile writeManifest(DataFile... files) throws IOException { return writeManifest(null, files); } ManifestFile writeManifest(Long snapshotId, DataFile... files) throws IOException { - File manifestFile = temp.resolve("input.m0.avro").toFile(); + File manifestFile = temp.resolve(manifestFormat().addExtension("input.m0")).toFile(); assertThat(manifestFile).doesNotExist(); OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); @@ -324,7 +335,7 @@ ManifestFile writeManifest(String fileName, ManifestEntry... entries) throws } ManifestFile writeManifest(Long snapshotId, ManifestEntry... entries) throws IOException { - return writeManifest(snapshotId, "input.m0.avro", entries); + return writeManifest(snapshotId, manifestFormat().addExtension("input.m0"), entries); } @SuppressWarnings("unchecked") @@ -360,8 +371,8 @@ ManifestFile writeDeleteManifest(int newFormatVersion, Long snapshotId, DeleteFi throws IOException { OutputFile manifestFile = org.apache.iceberg.Files.localOutput( - FileFormat.AVRO.addExtension( - temp.resolve("junit" + System.nanoTime()).toFile().toString())); + manifestFormat(newFormatVersion) + .addExtension(temp.resolve("junit" + System.nanoTime()).toFile().toString())); ManifestWriter writer = ManifestFiles.writeDeleteManifest(newFormatVersion, SPEC, manifestFile, snapshotId); try { @@ -375,7 +386,7 @@ ManifestFile writeDeleteManifest(int newFormatVersion, Long snapshotId, DeleteFi } ManifestFile writeManifestWithName(String name, DataFile... files) throws IOException { - File manifestFile = temp.resolve(name + ".avro").toFile(); + File manifestFile = temp.resolve(manifestFormat().addExtension(name)).toFile(); assertThat(manifestFile).doesNotExist(); OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); @@ -453,6 +464,10 @@ void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile validateSnapshot(old, snap, (Long) sequenceNumber, newFiles); } + void validateSnapshot(Table validationTable, Snapshot old, Snapshot snap, DataFile... newFiles) { + validateSnapshot(validationTable, old, snap, null, newFiles); + } + @SuppressWarnings("checkstyle:HiddenField") Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { Snapshot snapshot; diff --git a/core/src/test/java/org/apache/iceberg/TestFastAppend.java b/core/src/test/java/org/apache/iceberg/TestFastAppend.java index 8f427525e214..bc28ecd88022 100644 --- a/core/src/test/java/org/apache/iceberg/TestFastAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestFastAppend.java @@ -509,14 +509,18 @@ public void testInvalidAppendManifest() throws IOException { assertThat(base.currentSnapshot()).isNull(); ManifestFile manifestWithExistingFiles = - writeManifest("manifest-file-1.avro", manifestEntry(Status.EXISTING, null, FILE_A)); + writeManifest( + manifestFormat().addExtension("manifest-file-1"), + manifestEntry(Status.EXISTING, null, FILE_A)); assertThatThrownBy( () -> table.newFastAppend().appendManifest(manifestWithExistingFiles).commit()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot append manifest with existing files"); ManifestFile manifestWithDeletedFiles = - writeManifest("manifest-file-2.avro", manifestEntry(Status.DELETED, null, FILE_A)); + writeManifest( + manifestFormat().addExtension("manifest-file-2"), + manifestEntry(Status.DELETED, null, FILE_A)); assertThatThrownBy( () -> table.newFastAppend().appendManifest(manifestWithDeletedFiles).commit()) .isInstanceOf(IllegalArgumentException.class) diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReader.java b/core/src/test/java/org/apache/iceberg/TestManifestReader.java index 0af0c87d3512..4c7a065efdae 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestReader.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestReader.java @@ -237,6 +237,10 @@ public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException { @SuppressWarnings("deprecation") @TestTemplate public void testDeprecatedReadWithoutSpecsById() throws IOException { + assumeThat(formatVersion) + .as("Deprecated read without specsById requires Avro metadata; V4 uses Parquet") + .isLessThan(4); + ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 1000L, FILE_A)); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { ManifestEntry entry = Iterables.getOnlyElement(reader.entries()); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriter.java b/core/src/test/java/org/apache/iceberg/TestManifestWriter.java index 00e66bdd7d0d..d710d949c52b 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriter.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriter.java @@ -41,7 +41,7 @@ public class TestManifestWriter extends TestBase { public void testManifestStats() throws IOException { ManifestFile manifest = writeManifest( - "manifest.avro", + manifestFormat().addExtension("manifest"), manifestEntry(Status.ADDED, null, newFile(10)), manifestEntry(Status.ADDED, null, newFile(20)), manifestEntry(Status.ADDED, null, newFile(5)), @@ -67,7 +67,7 @@ public void testManifestStats() throws IOException { public void testManifestPartitionStats() throws IOException { ManifestFile manifest = writeManifest( - "manifest.avro", + manifestFormat().addExtension("manifest"), manifestEntry(Status.ADDED, null, newFile(10, TestHelpers.Row.of(1))), manifestEntry(Status.EXISTING, null, newFile(15, TestHelpers.Row.of(2))), manifestEntry(Status.DELETED, null, newFile(2, TestHelpers.Row.of(3)))); @@ -92,7 +92,8 @@ public void testManifestPartitionStats() throws IOException { @TestTemplate public void testWriteManifestWithSequenceNumber() throws IOException { assumeThat(formatVersion).isGreaterThan(1); - File manifestFile = temp.resolve("manifest" + System.nanoTime() + ".avro").toFile(); + File manifestFile = + temp.resolve(manifestFormat().addExtension("manifest" + System.nanoTime())).toFile(); OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); ManifestWriter writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, 1L); @@ -119,7 +120,7 @@ public void testCommitManifestWithExplicitDataSequenceNumber() throws IOExceptio ManifestFile manifest = writeManifest( - "manifest.avro", + manifestFormat().addExtension("manifest"), manifestEntry(Status.ADDED, null, dataSequenceNumber, null, file1), manifestEntry(Status.ADDED, null, dataSequenceNumber, null, file2)); @@ -161,7 +162,7 @@ public void testCommitManifestWithExistingEntriesWithoutFileSequenceNumber() thr ManifestFile newManifest = writeManifest( - "manifest.avro", + manifestFormat().addExtension("manifest"), manifestEntry(Status.EXISTING, appendSnapshotId, appendSequenceNumber, null, file1), manifestEntry(Status.EXISTING, appendSnapshotId, appendSequenceNumber, null, file2)); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java index 5e83827f0c45..966b573bd93b 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java @@ -115,6 +115,8 @@ public class TestManifestWriterVersions { null, null); + static final List V4_FORMATS = ImmutableList.of(FileFormat.AVRO, FileFormat.PARQUET); + @TempDir private Path temp; @Test @@ -344,6 +346,100 @@ public void testCustomManifestCompression(int formatVersion) throws IOException assertThat(readAvroCodec(manifestFile)).isEqualTo("snappy"); } + @ParameterizedTest + @FieldSource("V4_FORMATS") + public void testV4WritePartitioned(FileFormat fileFormat) throws IOException { + ManifestFile manifest = writeManifest(4, fileFormat, SPEC, DATA_FILE); + checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); + checkEntry( + readManifest(manifest), + ManifestWriter.UNASSIGNED_SEQ, + ManifestWriter.UNASSIGNED_SEQ, + FileContent.DATA, + FIRST_ROW_ID); + } + + @ParameterizedTest + @FieldSource("V4_FORMATS") + public void testV4WriteUnpartitioned(FileFormat fileFormat) throws IOException { + DataFile unpartitionedFile = + DataFiles.builder(PartitionSpec.unpartitioned()) + .withPath(PATH) + .withFormat(FORMAT) + .withFileSizeInBytes(150972L) + .withMetrics(METRICS) + .withSplitOffsets(OFFSETS) + .withSortOrderId(SORT_ORDER_ID) + .withFirstRowId(FIRST_ROW_ID) + .build(); + + ManifestFile manifest = + writeManifest(4, fileFormat, PartitionSpec.unpartitioned(), unpartitionedFile); + checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); + + Map unpartitionedSpecs = + ImmutableMap.of(PartitionSpec.unpartitioned().specId(), PartitionSpec.unpartitioned()); + try (CloseableIterable> reader = + ManifestFiles.read(manifest, io, unpartitionedSpecs).entries()) { + ManifestEntry entry = Iterables.getOnlyElement(reader); + assertThat(entry.status()).isEqualTo(ManifestEntry.Status.ADDED); + assertThat(entry.file().location()).isEqualTo(PATH); + assertThat(entry.file().recordCount()).isEqualTo(METRICS.recordCount()); + assertThat(entry.file().firstRowId()).isEqualTo(FIRST_ROW_ID); + } + } + + @ParameterizedTest + @FieldSource("V4_FORMATS") + public void testV4WriteDeletePartitioned(FileFormat fileFormat) throws IOException { + ManifestFile manifest = writeDeleteManifest(4, fileFormat, SPEC); + checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); + assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); + checkEntry( + readDeleteManifest(manifest), + ManifestWriter.UNASSIGNED_SEQ, + ManifestWriter.UNASSIGNED_SEQ, + FileContent.EQUALITY_DELETES); + } + + @ParameterizedTest + @FieldSource("V4_FORMATS") + public void testV4WriteDeleteUnpartitioned(FileFormat fileFormat) throws IOException { + DeleteFile unpartitionedDelete = + new GenericDeleteFile( + 0, + FileContent.EQUALITY_DELETES, + PATH, + FORMAT, + new PartitionData(PartitionSpec.unpartitioned().partitionType()), + 22905L, + METRICS, + EQUALITY_ID_ARR, + SORT_ORDER_ID, + null, + null, + null, + null, + null); + + ManifestFile manifest = + writeDeleteManifest(4, fileFormat, PartitionSpec.unpartitioned(), unpartitionedDelete); + checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); + assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); + + Map unpartitionedSpecs = + ImmutableMap.of(PartitionSpec.unpartitioned().specId(), PartitionSpec.unpartitioned()); + try (CloseableIterable> reader = + ManifestFiles.readDeleteManifest(manifest, io, unpartitionedSpecs).entries()) { + ManifestEntry entry = Iterables.getOnlyElement(reader); + assertThat(entry.status()).isEqualTo(ManifestEntry.Status.ADDED); + assertThat(entry.file().content()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(entry.file().location()).isEqualTo(PATH); + assertThat(entry.file().recordCount()).isEqualTo(METRICS.recordCount()); + assertThat(entry.file().equalityFieldIds()).isEqualTo(EQUALITY_IDS); + } + } + void checkEntry( ManifestEntry entry, Long expectedDataSequenceNumber, @@ -466,7 +562,7 @@ private ManifestFile writeAndReadManifestList(ManifestFile manifest, int formatV private ManifestFile rewriteManifest(ManifestFile manifest, int formatVersion) throws IOException { - String filename = FileFormat.AVRO.addExtension("rewrite-manifest"); + String filename = TestBase.manifestFormat(formatVersion).addExtension("rewrite-manifest"); EncryptedOutputFile manifestFile = encryptionManager().encrypt(io.newOutputFile(filename)); ManifestWriter writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID); @@ -483,10 +579,16 @@ private ManifestFile writeManifest(int formatVersion) throws IOException { } private ManifestFile writeManifest(int formatVersion, DataFile... files) throws IOException { - String filename = FileFormat.AVRO.addExtension("manifest"); + return writeManifest(formatVersion, TestBase.manifestFormat(formatVersion), SPEC, files); + } + + private ManifestFile writeManifest( + int formatVersion, FileFormat fileFormat, PartitionSpec spec, DataFile... files) + throws IOException { + String filename = fileFormat.addExtension("manifest"); EncryptedOutputFile manifestFile = encryptionManager().encrypt(io.newOutputFile(filename)); ManifestWriter writer = - ManifestFiles.newWriter(formatVersion, SPEC, manifestFile, SNAPSHOT_ID, FIRST_ROW_ID); + ManifestFiles.newWriter(formatVersion, spec, manifestFile, SNAPSHOT_ID, FIRST_ROW_ID); try { for (DataFile file : files) { writer.add(file); @@ -512,12 +614,23 @@ private ManifestEntry readManifest(ManifestFile manifest) throws IOExc } private ManifestFile writeDeleteManifest(int formatVersion) throws IOException { - String filename = FileFormat.AVRO.addExtension("manifest"); + return writeDeleteManifest(formatVersion, TestBase.manifestFormat(formatVersion), SPEC); + } + + private ManifestFile writeDeleteManifest( + int formatVersion, FileFormat fileFormat, PartitionSpec spec) throws IOException { + return writeDeleteManifest(formatVersion, fileFormat, spec, DELETE_FILE); + } + + private ManifestFile writeDeleteManifest( + int formatVersion, FileFormat fileFormat, PartitionSpec spec, DeleteFile deleteFile) + throws IOException { + String filename = fileFormat.addExtension("manifest"); EncryptedOutputFile manifestFile = encryptionManager().encrypt(io.newOutputFile(filename)); ManifestWriter writer = - ManifestFiles.writeDeleteManifest(formatVersion, SPEC, manifestFile, SNAPSHOT_ID); + ManifestFiles.writeDeleteManifest(formatVersion, spec, manifestFile, SNAPSHOT_ID); try { - writer.add(DELETE_FILE); + writer.add(deleteFile); } finally { writer.close(); } diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java index 3947f16fe159..b7700d7ce719 100644 --- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java @@ -629,7 +629,8 @@ public void testManifestsMergeIntoOne() throws IOException { .newAppend() .appendManifest( writeManifest( - "input-m0.avro", manifestEntry(ManifestEntry.Status.ADDED, null, FILE_C))), + manifestFormat().addExtension("input-m0"), + manifestEntry(ManifestEntry.Status.ADDED, null, FILE_C))), branch); base = readMetadata(); @@ -671,7 +672,8 @@ public void testManifestsMergeIntoOne() throws IOException { .newAppend() .appendManifest( writeManifest( - "input-m1.avro", manifestEntry(ManifestEntry.Status.ADDED, null, FILE_D))), + manifestFormat().addExtension("input-m1"), + manifestEntry(ManifestEntry.Status.ADDED, null, FILE_D))), branch); base = readMetadata(); @@ -1274,7 +1276,7 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); - ManifestFile manifest1 = writeManifestWithName("manifest-file-1.avro", FILE_A, FILE_B); + ManifestFile manifest1 = writeManifestWithName("manifest-file-1", FILE_A, FILE_B); Snapshot snap1 = commit(table, table.newAppend().appendManifest(manifest1), branch); long commitId1 = snap1.snapshotId(); @@ -1290,7 +1292,7 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO statuses(Status.ADDED, Status.ADDED)); assertThat(new File(manifest1.path())).exists(); - ManifestFile manifest2 = writeManifestWithName("manifest-file-2.avro", FILE_C, FILE_D); + ManifestFile manifest2 = writeManifestWithName("manifest-file-2", FILE_C, FILE_D); Snapshot snap2 = commit(table, table.newAppend().appendManifest(manifest2), branch); long commitId2 = snap2.snapshotId(); @@ -1347,7 +1349,9 @@ public void testInvalidAppendManifest() throws IOException { assertThat(base.currentSnapshot()).isNull(); ManifestFile manifestWithExistingFiles = - writeManifest("manifest-file-1.avro", manifestEntry(Status.EXISTING, null, FILE_A)); + writeManifest( + manifestFormat().addExtension("manifest-file-1"), + manifestEntry(Status.EXISTING, null, FILE_A)); assertThatThrownBy( () -> commit(table, table.newAppend().appendManifest(manifestWithExistingFiles), branch)) @@ -1356,7 +1360,9 @@ public void testInvalidAppendManifest() throws IOException { assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); ManifestFile manifestWithDeletedFiles = - writeManifest("manifest-file-2.avro", manifestEntry(Status.DELETED, null, FILE_A)); + writeManifest( + manifestFormat().addExtension("manifest-file-2"), + manifestEntry(Status.DELETED, null, FILE_A)); assertThatThrownBy( () -> commit(table, table.newAppend().appendManifest(manifestWithDeletedFiles), branch)) .isInstanceOf(IllegalArgumentException.class) diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java b/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java index 19ee156c9eec..dab323743bb1 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java @@ -55,7 +55,8 @@ public void testRewriteManifestsAppendedDirectly() throws IOException { ManifestFile newManifest = writeManifest( - "manifest-file-1.avro", manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); + manifestFormat().addExtension("manifest-file-1"), + manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); table.newFastAppend().appendManifest(newManifest).commit(); long appendId = table.currentSnapshot().snapshotId(); @@ -79,7 +80,8 @@ public void testRewriteManifestsWithScanExecutor() throws IOException { ManifestFile newManifest = writeManifest( - "manifest-file-1.avro", manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); + manifestFormat().addExtension("manifest-file-1"), + manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); table.newFastAppend().appendManifest(newManifest).commit(); @@ -115,7 +117,8 @@ public void testRewriteManifestsGeneratedAndAppendedDirectly() throws IOExceptio ManifestFile newManifest = writeManifest( - "manifest-file-1.avro", manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); + manifestFormat().addExtension("manifest-file-1"), + manifestEntry(ManifestEntry.Status.ADDED, null, FILE_A)); table.newFastAppend().appendManifest(newManifest).commit(); long manifestAppendId = table.currentSnapshot().snapshotId(); @@ -428,11 +431,11 @@ public void testBasicManifestReplacement() throws IOException { ManifestFile firstNewManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); ManifestFile secondNewManifest = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_B)); RewriteManifests rewriteManifests = table.rewriteManifests(); @@ -492,11 +495,11 @@ public void testBasicManifestReplacementWithSnapshotIdInheritance() throws IOExc ManifestFile firstNewManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); ManifestFile secondNewManifest = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_B)); RewriteManifests rewriteManifests = table.rewriteManifests(); @@ -679,11 +682,11 @@ public void testManifestReplacementConcurrentAppend() throws IOException { ManifestFile firstNewManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); ManifestFile secondNewManifest = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_B)); RewriteManifests rewriteManifests = table.rewriteManifests(); @@ -741,11 +744,11 @@ public void testManifestReplacementConcurrentDelete() throws IOException { ManifestFile firstNewManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); ManifestFile secondNewManifest = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_B)); RewriteManifests rewriteManifests = table.rewriteManifests(); @@ -796,11 +799,11 @@ public void testManifestReplacementConcurrentConflictingDelete() throws IOExcept ManifestFile firstNewManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); ManifestFile secondNewManifest = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_B)); RewriteManifests rewriteManifests = table.rewriteManifests(); @@ -841,7 +844,7 @@ public void testManifestReplacementCombinedWithRewrite() throws IOException { ManifestFile newManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A)); table @@ -904,7 +907,8 @@ public void testManifestReplacementCombinedWithRewriteConcurrentDelete() throws manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A); // update the entry's sequence number or else it will be rejected by the writer entry.setDataSequenceNumber(firstSnapshot.sequenceNumber()); - ManifestFile newManifest = writeManifest("manifest-file-1.avro", entry); + ManifestFile newManifest = + writeManifest(manifestFormat().addExtension("manifest-file-1"), entry); RewriteManifests rewriteManifests = table @@ -954,7 +958,8 @@ public void testInvalidUsage() throws IOException { // update the entry's sequence number or else it will be rejected by the writer appendEntry.setDataSequenceNumber(snapshot.sequenceNumber()); - ManifestFile invalidAddedFileManifest = writeManifest("manifest-file-2.avro", appendEntry); + ManifestFile invalidAddedFileManifest = + writeManifest(manifestFormat().addExtension("manifest-file-2"), appendEntry); assertThatThrownBy( () -> @@ -971,7 +976,8 @@ public void testInvalidUsage() throws IOException { // update the entry's sequence number or else it will be rejected by the writer deleteEntry.setDataSequenceNumber(snapshot.sequenceNumber()); - ManifestFile invalidDeletedFileManifest = writeManifest("manifest-file-3.avro", deleteEntry); + ManifestFile invalidDeletedFileManifest = + writeManifest(manifestFormat().addExtension("manifest-file-3"), deleteEntry); assertThatThrownBy( () -> @@ -1009,7 +1015,7 @@ public void testManifestReplacementFailure() throws IOException { ManifestFile newManifest = writeManifest( - "manifest-file.avro", + manifestFormat().addExtension("manifest-file"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A), manifestEntry(ManifestEntry.Status.EXISTING, secondSnapshot.snapshotId(), FILE_B)); @@ -1051,7 +1057,7 @@ public void testManifestReplacementFailureWithSnapshotIdInheritance() throws IOE ManifestFile newManifest = writeManifest( - "manifest-file.avro", + manifestFormat().addExtension("manifest-file"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshot.snapshotId(), FILE_A), manifestEntry(ManifestEntry.Status.EXISTING, secondSnapshot.snapshotId(), FILE_B)); @@ -1176,7 +1182,7 @@ public void testReplaceDeleteManifestsOnly() throws IOException { Iterables.getOnlyElement(deleteSnapshot.deleteManifests(table.io())); ManifestFile newDeleteManifest1 = writeManifest( - "delete-manifest-file-1.avro", + manifestFormat().addExtension("delete-manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1185,7 +1191,7 @@ public void testReplaceDeleteManifestsOnly() throws IOException { fileADeletes())); ManifestFile newDeleteManifest2 = writeManifest( - "delete-manifest-file-2.avro", + manifestFormat().addExtension("delete-manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1262,7 +1268,7 @@ public void testReplaceDataAndDeleteManifests() throws IOException { Iterables.getOnlyElement(deleteSnapshot.dataManifests(table.io())); ManifestFile newDataManifest1 = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, appendSnapshotId, @@ -1271,7 +1277,7 @@ public void testReplaceDataAndDeleteManifests() throws IOException { FILE_A)); ManifestFile newDataManifest2 = writeManifest( - "manifest-file-2.avro", + manifestFormat().addExtension("manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, appendSnapshotId, @@ -1284,7 +1290,7 @@ public void testReplaceDataAndDeleteManifests() throws IOException { Iterables.getOnlyElement(deleteSnapshot.deleteManifests(table.io())); ManifestFile newDeleteManifest1 = writeManifest( - "delete-manifest-file-1.avro", + manifestFormat().addExtension("delete-manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1293,7 +1299,7 @@ public void testReplaceDataAndDeleteManifests() throws IOException { fileADeletes())); ManifestFile newDeleteManifest2 = writeManifest( - "delete-manifest-file-2.avro", + manifestFormat().addExtension("delete-manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1376,7 +1382,7 @@ public void testDeleteManifestReplacementConcurrentAppend() throws IOException { Iterables.getOnlyElement(deleteSnapshot.deleteManifests(table.io())); ManifestFile newDeleteManifest1 = writeManifest( - "delete-manifest-file-1.avro", + manifestFormat().addExtension("delete-manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1385,7 +1391,7 @@ public void testDeleteManifestReplacementConcurrentAppend() throws IOException { fileADeletes())); ManifestFile newDeleteManifest2 = writeManifest( - "delete-manifest-file-2.avro", + manifestFormat().addExtension("delete-manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1486,7 +1492,7 @@ public void testDeleteManifestReplacementConcurrentDeleteFileRemoval() throws IO ManifestFile originalDeleteManifest = deleteSnapshot1.deleteManifests(table.io()).get(0); ManifestFile newDeleteManifest1 = writeManifest( - "delete-manifest-file-1.avro", + manifestFormat().addExtension("delete-manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId1, @@ -1495,7 +1501,7 @@ public void testDeleteManifestReplacementConcurrentDeleteFileRemoval() throws IO fileADeletes())); ManifestFile newDeleteManifest2 = writeManifest( - "delete-manifest-file-2.avro", + manifestFormat().addExtension("delete-manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId1, @@ -1581,7 +1587,7 @@ public void testDeleteManifestReplacementConflictingDeleteFileRemoval() throws I ManifestFile originalDeleteManifest = deleteSnapshot.deleteManifests(table.io()).get(0); ManifestFile newDeleteManifest1 = writeManifest( - "delete-manifest-file-1.avro", + manifestFormat().addExtension("delete-manifest-file-1"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1590,7 +1596,7 @@ public void testDeleteManifestReplacementConflictingDeleteFileRemoval() throws I fileADeletes())); ManifestFile newDeleteManifest2 = writeManifest( - "delete-manifest-file-2.avro", + manifestFormat().addExtension("delete-manifest-file-2"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId, @@ -1645,7 +1651,7 @@ public void testDeleteManifestReplacementFailure() throws IOException { // combine the original delete manifests into 1 new delete manifest ManifestFile newDeleteManifest = writeManifest( - "delete-manifest-file.avro", + manifestFormat().addExtension("delete-manifest-file"), manifestEntry( ManifestEntry.Status.EXISTING, deleteSnapshotId1, diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java index dd97738759f4..fababafd8214 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java @@ -22,6 +22,7 @@ import static org.apache.iceberg.avro.AvroTestHelpers.readAvroCodec; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; @@ -228,6 +229,10 @@ public TableMetadata refresh() { @TestTemplate public void testDefaultManifestCompression() throws IOException { + assumeThat(formatVersion) + .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") + .isLessThan(4); + table.newFastAppend().appendFile(FILE_A).commit(); ManifestFile manifest = table.currentSnapshot().dataManifests(table.io()).get(0); @@ -236,6 +241,10 @@ public void testDefaultManifestCompression() throws IOException { @TestTemplate public void testManifestCompressionFromTableProperty() throws IOException { + assumeThat(formatVersion) + .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") + .isLessThan(4); + table.updateProperties().set(TableProperties.MANIFEST_COMPRESSION, "snappy").commit(); table.newFastAppend().appendFile(FILE_A).commit(); diff --git a/core/src/test/java/org/apache/iceberg/TestTransaction.java b/core/src/test/java/org/apache/iceberg/TestTransaction.java index 9ec8c47840d9..fe47ac62561d 100644 --- a/core/src/test/java/org/apache/iceberg/TestTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestTransaction.java @@ -666,7 +666,7 @@ public void testTransactionRewriteManifestsAppendedDirectly() throws IOException ManifestFile newManifest = writeManifest( - "manifest-file-1.avro", + manifestFormat().addExtension("manifest-file-1"), manifestEntry(ManifestEntry.Status.EXISTING, firstSnapshotId, FILE_A), manifestEntry(ManifestEntry.Status.EXISTING, secondSnapshotId, FILE_B)); @@ -811,7 +811,7 @@ public void testRowDeltaWithConcurrentManifestRewrite() throws IOException { .rewriteManifests() .addManifest( writeManifest( - "new_delete_manifest.avro", + manifestFormat().addExtension("new_delete_manifest"), // Specify data sequence number so that the delete files don't get aged out // first manifestEntry( @@ -880,7 +880,7 @@ public void testOverwriteWithConcurrentManifestRewrite() throws IOException { .rewriteManifests() .addManifest( writeManifest( - "new_manifest.avro", + manifestFormat().addExtension("new_manifest"), manifestEntry(Status.EXISTING, first.snapshotId(), FILE_A), manifestEntry(Status.EXISTING, first.snapshotId(), FILE_A2), manifestEntry(Status.EXISTING, second.snapshotId(), FILE_B))) diff --git a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java index 310d918849f3..b30bc4d15d6e 100644 --- a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java +++ b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java @@ -130,7 +130,7 @@ protected List manifestFiles(String location) { return Stream.of(new File(location).listFiles()) .filter(file -> !file.isDirectory()) .map(File::getName) - .filter(fileName -> fileName.endsWith(".avro")) + .filter(fileName -> fileName.endsWith(".avro") || fileName.endsWith(".parquet")) .collect(Collectors.toList()); } diff --git a/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java b/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java index 8d2416032058..a87ee09d395d 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java @@ -111,7 +111,8 @@ public void canContainWithUnknownType() throws IOException { private ManifestFile writeManifestWithDataFile(PartitionSpec spec, PartitionData partition) throws IOException { - ManifestWriter writer = ManifestFiles.write(spec, Files.localOutput(temp.toFile())); + ManifestWriter writer = + ManifestFiles.write(spec, Files.localOutput(temp.resolve("manifest.avro").toFile())); try (writer) { writer.add( DataFiles.builder(spec) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java index 8aa9aa4779d9..0e865ba2a13d 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java @@ -25,7 +25,9 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.UUID; @@ -847,7 +849,8 @@ protected List newListData(List reuse) { } if (reuse != null) { - this.lastList = reuse; + // reuse containers may come from a different reader (e.g. Avro) with incompatible types + this.lastList = reuse instanceof ArrayList ? reuse : null; this.elements = reuse.iterator(); } else { this.lastList = null; @@ -973,7 +976,8 @@ protected Map newMapData(Map reuse) { } if (reuse != null) { - this.lastMap = reuse; + // reuse containers may come from a different reader (e.g. Avro) with incompatible types + this.lastMap = reuse instanceof LinkedHashMap ? reuse : null; this.pairs = reuse.entrySet().iterator(); } else { this.lastMap = null; From 14231dd12a4a8cd075485bccc4fbb638c62d3134 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Wed, 25 Mar 2026 16:52:26 -0500 Subject: [PATCH 07/22] Core, Spark: Fix V4 Parquet manifest reading issues - ManifestReader: Mark partition field optional for unpartitioned tables instead of removing it from the projection, preserving positional access and avoiding ClassCastException from shifted ordinals - BaseFile: Deep copy ByteBuffer values in copyByteBufferMap to prevent Parquet container reuse from corrupting bounds in copied files, which caused equality deletes to fail stats-based overlap checks - BaseFile: Guard against null partition value in internalSet - TestRewriteTablePathsAction: Simplify manifest file predicate to use name patterns instead of file extensions --- .../java/org/apache/iceberg/BaseFile.java | 35 +++++++++++++++++-- .../org/apache/iceberg/ManifestReader.java | 35 ++++++++++++------- .../actions/TestRewriteTablePathsAction.java | 5 +-- .../actions/TestRewriteTablePathsAction.java | 5 +-- .../actions/TestRewriteTablePathsAction.java | 5 +-- .../actions/TestRewriteTablePathsAction.java | 5 +-- 6 files changed, 59 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java index 3c31c50f099f..1af677394f7a 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFile.java +++ b/core/src/main/java/org/apache/iceberg/BaseFile.java @@ -32,6 +32,7 @@ import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ArrayUtil; @@ -329,7 +330,9 @@ protected void internalSet(int pos, T value) { this.partitionSpecId = (value != null) ? (Integer) value : -1; return; case 4: - this.partitionData = (PartitionData) value; + if (value != null) { + this.partitionData = (PartitionData) value; + } return; case 5: this.recordCount = (Long) value; @@ -581,9 +584,33 @@ private static Map copyMap(Map map, Set keys) { private static Map copyByteBufferMap( Map map, Set keys) { - return SerializableByteBufferMap.wrap(copyMap(map, keys)); + if (map == null) { + return null; + } + + // This is required as long as we have Map in the API since Parquet is + // re-using buffers. + Map deepCopy = Maps.newHashMapWithExpectedSize(map.size()); + for (Map.Entry entry : map.entrySet()) { + if (keys == null || keys.contains(entry.getKey())) { + ByteBuffer buf = entry.getValue(); + if (buf != null) { + ByteBuffer copy = ByteBuffer.allocate(buf.remaining()); + copy.put(buf.duplicate()); + copy.flip(); + deepCopy.put(entry.getKey(), copy); + } else { + deepCopy.put(entry.getKey(), null); + } + } + } + + return SerializableByteBufferMap.wrap(deepCopy); } + // Returns an unmodifiable view of the map. The SerializableMap check is needed because + // internal maps may be wrapped for serialization after being populated by a format reader + // with container reuse enabled, and immutableMap() provides a stable snapshot. private static Map toReadableMap(Map map) { if (map == null) { return null; @@ -594,6 +621,10 @@ private static Map toReadableMap(Map map) { } } + // Separate from toReadableMap because SerializableByteBufferMap is its own wrapper type + // (not a SerializableMap subclass) to handle ByteBuffer-specific serialization. ByteBuffer + // values are mutable and can be overwritten by Parquet container reuse, so callers that + // retain references must use copyByteBufferMap to get independent copies. private static Map toReadableByteBufferMap(Map map) { if (map == null) { return null; diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java b/core/src/main/java/org/apache/iceberg/ManifestReader.java index 8e483fca8775..92a51161f31d 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestReader.java +++ b/core/src/main/java/org/apache/iceberg/ManifestReader.java @@ -59,6 +59,13 @@ public class ManifestReader> extends CloseableGroup static final ImmutableList ALL_COLUMNS = ImmutableList.of("*"); + private static final Types.NestedField UNPARTITIONED_PARTITION_FIELD = + Types.NestedField.optional( + DataFile.PARTITION_ID, + DataFile.PARTITION_NAME, + Types.StructType.of(), + DataFile.PARTITION_DOC); + private static final Set STATS_COLUMNS = ImmutableSet.of( "value_counts", @@ -289,8 +296,20 @@ private CloseableIterable> open(Schema projection) { boolean unpartitioned = spec.rawPartitionType().fields().isEmpty(); + // V4+ manifests omit the partition field when unpartitioned (Parquet cannot represent + // empty structs, and the field is meaningless regardless of format). Mark it optional so + // the reader returns null for the missing field instead of throwing. The field must stay + // in the projection to preserve positional access for callers like StructProjection. + // For older versions where the empty struct is present, making it optional is harmless. List fields = Lists.newArrayList(); - fields.addAll(projection.asStruct().fields()); + for (Types.NestedField field : projection.asStruct().fields()) { + if (unpartitioned && field.fieldId() == DataFile.PARTITION_ID) { + fields.add(UNPARTITIONED_PARTITION_FIELD); + } else { + fields.add(field); + } + } + if (projection.findField(DataFile.RECORD_COUNT.fieldId()) == null) { fields.add(DataFile.RECORD_COUNT); } @@ -299,22 +318,12 @@ private CloseableIterable> open(Schema projection) { } fields.add(MetadataColumns.ROW_POSITION); - // V4+ manifests omit the partition field when unpartitioned (Parquet cannot represent - // empty structs, and the field is meaningless regardless of format). For older versions - // the empty struct is present but safe to skip. - if (unpartitioned) { - fields.removeIf(f -> f.fieldId() == DataFile.PARTITION_ID); - } - InternalData.ReadBuilder readBuilder = InternalData.read(format, file) .project(ManifestEntry.wrapFileSchema(Types.StructType.of(fields))) .setRootType(GenericManifestEntry.class) - .setCustomType(ManifestEntry.DATA_FILE_ID, content.fileClass()); - - if (!unpartitioned) { - readBuilder.setCustomType(DataFile.PARTITION_ID, PartitionData.class); - } + .setCustomType(ManifestEntry.DATA_FILE_ID, content.fileClass()) + .setCustomType(DataFile.PARTITION_ID, PartitionData.class); readBuilder.reuseContainers(); diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java index dae721b1d73d..c5db04762f21 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java @@ -1469,10 +1469,7 @@ protected void checkFileNum( .as(Encoders.STRING()) .collectAsList(); Predicate isManifest = - f -> - (f.contains("optimized-m-") && f.endsWith(".avro")) - || f.endsWith("-m0.avro") - || f.endsWith("-m1.avro"); + f -> f.contains("optimized-m-") || f.contains("-m0.") || f.contains("-m1."); Predicate isManifestList = f -> f.contains("snap-") && f.endsWith(".avro"); Predicate isMetadataJSON = f -> f.endsWith(".metadata.json"); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java index dae721b1d73d..c5db04762f21 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java @@ -1469,10 +1469,7 @@ protected void checkFileNum( .as(Encoders.STRING()) .collectAsList(); Predicate isManifest = - f -> - (f.contains("optimized-m-") && f.endsWith(".avro")) - || f.endsWith("-m0.avro") - || f.endsWith("-m1.avro"); + f -> f.contains("optimized-m-") || f.contains("-m0.") || f.contains("-m1."); Predicate isManifestList = f -> f.contains("snap-") && f.endsWith(".avro"); Predicate isMetadataJSON = f -> f.endsWith(".metadata.json"); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java index dae721b1d73d..c5db04762f21 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java @@ -1469,10 +1469,7 @@ protected void checkFileNum( .as(Encoders.STRING()) .collectAsList(); Predicate isManifest = - f -> - (f.contains("optimized-m-") && f.endsWith(".avro")) - || f.endsWith("-m0.avro") - || f.endsWith("-m1.avro"); + f -> f.contains("optimized-m-") || f.contains("-m0.") || f.contains("-m1."); Predicate isManifestList = f -> f.contains("snap-") && f.endsWith(".avro"); Predicate isMetadataJSON = f -> f.endsWith(".metadata.json"); diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java index dae721b1d73d..c5db04762f21 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java @@ -1469,10 +1469,7 @@ protected void checkFileNum( .as(Encoders.STRING()) .collectAsList(); Predicate isManifest = - f -> - (f.contains("optimized-m-") && f.endsWith(".avro")) - || f.endsWith("-m0.avro") - || f.endsWith("-m1.avro"); + f -> f.contains("optimized-m-") || f.contains("-m0.") || f.contains("-m1."); Predicate isManifestList = f -> f.contains("snap-") && f.endsWith(".avro"); Predicate isMetadataJSON = f -> f.endsWith(".metadata.json"); From 38199c5cf96615ee8abcdde13a0fccd111f6f3be Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Thu, 26 Mar 2026 12:16:45 -0500 Subject: [PATCH 08/22] Core, Parquet: Clean up Parquet manifest code and tests - Collapse broken builder chain in ManifestReader.open() into a single fluent expression - Extract manifest format determination in SnapshotProducer into a private field computed once in the constructor - Replace magic format version 4 with TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS in tests - Parameterize TestManifestFileUtil across all format versions - Fix TestJdbcCatalog.manifestFiles to use exclusion filter instead of allowlisting file extensions - Improve ParquetValueReaders container reuse comments to reference specific BaseFile fields --- .../org/apache/iceberg/ManifestReader.java | 10 ++-- .../org/apache/iceberg/SnapshotProducer.java | 9 ++-- .../apache/iceberg/TestManifestReader.java | 2 +- .../apache/iceberg/TestSnapshotProducer.java | 4 +- .../apache/iceberg/jdbc/TestJdbcCatalog.java | 2 +- .../iceberg/util/TestManifestFileUtil.java | 54 +++++++++++++------ .../iceberg/parquet/ParquetValueReaders.java | 6 ++- 7 files changed, 54 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java b/core/src/main/java/org/apache/iceberg/ManifestReader.java index 92a51161f31d..a69f57a47b3e 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestReader.java +++ b/core/src/main/java/org/apache/iceberg/ManifestReader.java @@ -318,16 +318,14 @@ private CloseableIterable> open(Schema projection) { } fields.add(MetadataColumns.ROW_POSITION); - InternalData.ReadBuilder readBuilder = + CloseableIterable> reader = InternalData.read(format, file) .project(ManifestEntry.wrapFileSchema(Types.StructType.of(fields))) .setRootType(GenericManifestEntry.class) .setCustomType(ManifestEntry.DATA_FILE_ID, content.fileClass()) - .setCustomType(DataFile.PARTITION_ID, PartitionData.class); - - readBuilder.reuseContainers(); - - CloseableIterable> reader = readBuilder.build(); + .setCustomType(DataFile.PARTITION_ID, PartitionData.class) + .reuseContainers() + .build(); addCloseable(reader); diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 108ec73e9366..e351009a9ea6 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -113,6 +113,7 @@ public void accept(String file) { private final AtomicInteger attempt = new AtomicInteger(0); private final List manifestLists = Lists.newArrayList(); private final long targetManifestSizeBytes; + private final FileFormat manifestFormat; private final Map manifestWriterProps; private MetricsReporter reporter = LoggingMetricsReporter.instance(); private volatile Long snapshotId = null; @@ -142,6 +143,10 @@ protected SnapshotProducer(TableOperations ops) { this.targetManifestSizeBytes = ops.current() .propertyAsLong(MANIFEST_TARGET_SIZE_BYTES, MANIFEST_TARGET_SIZE_BYTES_DEFAULT); + this.manifestFormat = + ops.current().formatVersion() >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS + ? FileFormat.PARQUET + : FileFormat.AVRO; this.manifestWriterProps = manifestWriterProperties(ops.current()); boolean snapshotIdInheritanceEnabled = ops.current() @@ -601,10 +606,6 @@ protected OutputFile manifestListPath() { } protected EncryptedOutputFile newManifestOutputFile() { - FileFormat manifestFormat = - ops.current().formatVersion() >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS - ? FileFormat.PARQUET - : FileFormat.AVRO; String manifestFileLocation = ops.metadataFileLocation( manifestFormat.addExtension(commitUUID + "-m" + manifestCount.getAndIncrement())); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReader.java b/core/src/test/java/org/apache/iceberg/TestManifestReader.java index 4c7a065efdae..778f8b70d6e0 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestReader.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestReader.java @@ -239,7 +239,7 @@ public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException { public void testDeprecatedReadWithoutSpecsById() throws IOException { assumeThat(formatVersion) .as("Deprecated read without specsById requires Avro metadata; V4 uses Parquet") - .isLessThan(4); + .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 1000L, FILE_A)); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java index fababafd8214..507d0cf3523e 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java @@ -231,7 +231,7 @@ public TableMetadata refresh() { public void testDefaultManifestCompression() throws IOException { assumeThat(formatVersion) .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") - .isLessThan(4); + .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); table.newFastAppend().appendFile(FILE_A).commit(); @@ -243,7 +243,7 @@ public void testDefaultManifestCompression() throws IOException { public void testManifestCompressionFromTableProperty() throws IOException { assumeThat(formatVersion) .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") - .isLessThan(4); + .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); table.updateProperties().set(TableProperties.MANIFEST_COMPRESSION, "snappy").commit(); diff --git a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java index b30bc4d15d6e..bf821a65eebd 100644 --- a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java +++ b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java @@ -130,7 +130,7 @@ protected List manifestFiles(String location) { return Stream.of(new File(location).listFiles()) .filter(file -> !file.isDirectory()) .map(File::getName) - .filter(fileName -> fileName.endsWith(".avro") || fileName.endsWith(".parquet")) + .filter(fileName -> !fileName.startsWith(".") && !fileName.endsWith("metadata.json")) .collect(Collectors.toList()); } diff --git a/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java b/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java index a87ee09d395d..0c7e032bde4e 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestManifestFileUtil.java @@ -20,11 +20,14 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.IOException; import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileFormat; import org.apache.iceberg.Files; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; @@ -35,24 +38,32 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; + +class TestManifestFileUtil { + private static final int MIN_FORMAT_VERSION_PARQUET_MANIFESTS = 4; -public class TestManifestFileUtil { private static final Schema SCHEMA = new Schema( optional(1, "id", Types.IntegerType.get()), optional(2, "unknown", Types.UnknownType.get()), optional(3, "floats", Types.FloatType.get())); + private final AtomicInteger manifestCounter = new AtomicInteger(0); + @TempDir private Path temp; - @Test - public void canContainWithUnknownTypeOnly() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + void canContainWithUnknownTypeOnly(int formatVersion) throws IOException { + // Parquet cannot represent the empty struct produced by an UnknownType-only partition + assumeThat(formatVersion).isLessThan(MIN_FORMAT_VERSION_PARQUET_MANIFESTS); PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("unknown").build(); PartitionData partition = new PartitionData(spec.partitionType()); partition.set(0, "someValue"); - ManifestFile manifestFile = writeManifestWithDataFile(spec, partition); + ManifestFile manifestFile = writeManifestWithDataFile(formatVersion, spec, partition); assertThat( ManifestFileUtil.canContainAny( @@ -62,12 +73,13 @@ public void canContainWithUnknownTypeOnly() throws IOException { .isTrue(); } - @Test - public void canContainWithNaNValueOnly() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + void canContainWithNaNValueOnly(int formatVersion) throws IOException { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("floats").build(); PartitionData partition = new PartitionData(spec.partitionType()); partition.set(0, Float.NaN); - ManifestFile manifestFile = writeManifestWithDataFile(spec, partition); + ManifestFile manifestFile = writeManifestWithDataFile(formatVersion, spec, partition); assertThat( ManifestFileUtil.canContainAny( @@ -77,12 +89,13 @@ public void canContainWithNaNValueOnly() throws IOException { .isTrue(); } - @Test - public void canContainWithNullValueOnly() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + void canContainWithNullValueOnly(int formatVersion) throws IOException { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("floats").build(); PartitionData partition = new PartitionData(spec.partitionType()); partition.set(0, null); - ManifestFile manifestFile = writeManifestWithDataFile(spec, partition); + ManifestFile manifestFile = writeManifestWithDataFile(formatVersion, spec, partition); assertThat( ManifestFileUtil.canContainAny( @@ -92,14 +105,15 @@ public void canContainWithNullValueOnly() throws IOException { .isTrue(); } - @Test - public void canContainWithUnknownType() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + void canContainWithUnknownType(int formatVersion) throws IOException { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("floats").identity("unknown").build(); PartitionData partition = new PartitionData(spec.partitionType()); partition.set(0, 1.0f); partition.set(1, "someValue"); - ManifestFile manifestFile = writeManifestWithDataFile(spec, partition); + ManifestFile manifestFile = writeManifestWithDataFile(formatVersion, spec, partition); assertThat( ManifestFileUtil.canContainAny( @@ -109,10 +123,16 @@ public void canContainWithUnknownType() throws IOException { .isTrue(); } - private ManifestFile writeManifestWithDataFile(PartitionSpec spec, PartitionData partition) - throws IOException { + private ManifestFile writeManifestWithDataFile( + int formatVersion, PartitionSpec spec, PartitionData partition) throws IOException { + FileFormat format = + formatVersion >= MIN_FORMAT_VERSION_PARQUET_MANIFESTS + ? FileFormat.PARQUET + : FileFormat.AVRO; + String filename = format.addExtension("manifest-" + manifestCounter.getAndIncrement()); ManifestWriter writer = - ManifestFiles.write(spec, Files.localOutput(temp.resolve("manifest.avro").toFile())); + ManifestFiles.write( + formatVersion, spec, Files.localOutput(temp.resolve(filename).toFile()), null); try (writer) { writer.add( DataFiles.builder(spec) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java index 0e865ba2a13d..4c142226200a 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java @@ -849,7 +849,8 @@ protected List newListData(List reuse) { } if (reuse != null) { - // reuse containers may come from a different reader (e.g. Avro) with incompatible types + // reuse containers may be unmodifiable (e.g. BaseFile.splitOffsets, + // BaseFile.equalityFieldIds) and cannot be cleared and reused this.lastList = reuse instanceof ArrayList ? reuse : null; this.elements = reuse.iterator(); } else { @@ -976,7 +977,8 @@ protected Map newMapData(Map reuse) { } if (reuse != null) { - // reuse containers may come from a different reader (e.g. Avro) with incompatible types + // reuse containers may be wrapped or immutable (e.g. BaseFile.lowerBounds, + // BaseFile.upperBounds via SerializableByteBufferMap) and cannot be cleared and reused this.lastMap = reuse instanceof LinkedHashMap ? reuse : null; this.pairs = reuse.entrySet().iterator(); } else { From 9e0d9d70a3d744cd567bc9932e375fedf7c277a1 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Fri, 27 Mar 2026 16:22:28 -0500 Subject: [PATCH 09/22] Core: Use instanceof pattern matching in ManifestWriter Replace instanceof-then-cast with Java 16+ pattern matching to eliminate redundant casts in outputFile() and keyMetadataBuffer(). --- .../main/java/org/apache/iceberg/ManifestWriter.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index 07b9f0209074..4b1d063aad83 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -88,8 +88,9 @@ protected abstract FileAppender> newAppender( private OutputFile outputFile(EncryptedOutputFile encryptedFile) { // Casting to NativeEncryptionOutputFile actually makes the file rely on native encryption // rather than whole-file encryption. - if (format == FileFormat.PARQUET && encryptedFile instanceof NativeEncryptionOutputFile) { - return (NativeEncryptionOutputFile) encryptedFile; + if (format == FileFormat.PARQUET + && encryptedFile instanceof NativeEncryptionOutputFile nativeFile) { + return nativeFile; } return encryptedFile.encryptingOutputFile(); } @@ -248,10 +249,11 @@ public ManifestFile toManifestFile() { } private ByteBuffer keyMetadataBuffer() { - if (keyMetadata instanceof NativeEncryptionKeyMetadata && format == FileFormat.AVRO) { + if (keyMetadata instanceof NativeEncryptionKeyMetadata nativeKeyMetadata + && format == FileFormat.AVRO) { // Whole-file encryption needs the file length embedded for GCM truncation protection. // Formats with native encryption (like Parquet) handle this directly and don't need it. - return ((NativeEncryptionKeyMetadata) keyMetadata).copyWithLength(length()).buffer(); + return nativeKeyMetadata.copyWithLength(length()).buffer(); } else if (keyMetadata != null) { return keyMetadata.buffer(); } From 2e2aa47596492684667ffc310c14331aed2bdfc5 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Fri, 3 Apr 2026 09:33:26 -0500 Subject: [PATCH 10/22] Core: Remove duplicate validateSnapshot overload in TestBase --- core/src/test/java/org/apache/iceberg/TestBase.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index d7410a15833e..0f649cabeb81 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -464,10 +464,6 @@ void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile validateSnapshot(old, snap, (Long) sequenceNumber, newFiles); } - void validateSnapshot(Table validationTable, Snapshot old, Snapshot snap, DataFile... newFiles) { - validateSnapshot(validationTable, old, snap, null, newFiles); - } - @SuppressWarnings("checkstyle:HiddenField") Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { Snapshot snapshot; From a6fe88567452e70afe27a2d3b23a18d4016de2ef Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Mon, 20 Apr 2026 12:51:50 -0500 Subject: [PATCH 11/22] Address PR review: Parquet reuse, BaseFile copy, V4Metadata builder, test names - ParquetValueReaders: only skip recycling reuse as scratch buffer for Guava ImmutableList / ImmutableMap - BaseFile: factor ByteBuffer map deep copy into deepCopyByteBufferMap - V4Metadata: build file schema fields with ImmutableList.builderWithExpectedSize - TestSnapshotProducer: rename Avro manifest compression tests for clarity --- .../java/org/apache/iceberg/BaseFile.java | 10 +++++++--- .../java/org/apache/iceberg/V4Metadata.java | 7 ++++--- .../apache/iceberg/TestSnapshotProducer.java | 4 ++-- .../iceberg/parquet/ParquetValueReaders.java | 19 +++++++++++-------- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java index 1af677394f7a..7f15c9188c87 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFile.java +++ b/core/src/main/java/org/apache/iceberg/BaseFile.java @@ -588,8 +588,12 @@ private static Map copyByteBufferMap( return null; } - // This is required as long as we have Map in the API since Parquet is - // re-using buffers. + return SerializableByteBufferMap.wrap(deepCopyByteBufferMap(map, keys)); + } + + // Required as long as we have Map in the API since Parquet reuses buffers. + private static Map deepCopyByteBufferMap( + Map map, Set keys) { Map deepCopy = Maps.newHashMapWithExpectedSize(map.size()); for (Map.Entry entry : map.entrySet()) { if (keys == null || keys.contains(entry.getKey())) { @@ -605,7 +609,7 @@ private static Map copyByteBufferMap( } } - return SerializableByteBufferMap.wrap(deepCopy); + return deepCopy; } // Returns an unmodifiable view of the map. The SerializableMap check is needed because diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 2637c5186e6c..f822840102f4 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -23,7 +23,7 @@ import java.nio.ByteBuffer; import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; class V4Metadata { @@ -279,7 +279,8 @@ static Schema wrapFileSchema(Types.StructType fileSchema) { } static Types.StructType fileType(Types.StructType partitionType) { - List fields = Lists.newArrayList(); + ImmutableList.Builder fields = + ImmutableList.builderWithExpectedSize(partitionType.fields().isEmpty() ? 18 : 19); fields.add(DataFile.CONTENT.asRequired()); fields.add(DataFile.FILE_PATH); fields.add(DataFile.FILE_FORMAT); @@ -307,7 +308,7 @@ static Types.StructType fileType(Types.StructType partitionType) { fields.add(DataFile.REFERENCED_DATA_FILE); fields.add(DataFile.CONTENT_OFFSET); fields.add(DataFile.CONTENT_SIZE); - return Types.StructType.of(fields); + return Types.StructType.of(fields.build()); } static class ManifestEntryWrapper> diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java index 507d0cf3523e..c6092f0238b9 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotProducer.java @@ -228,7 +228,7 @@ public TableMetadata refresh() { } @TestTemplate - public void testDefaultManifestCompression() throws IOException { + public void testDefaultAvroManifestCompression() throws IOException { assumeThat(formatVersion) .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); @@ -240,7 +240,7 @@ public void testDefaultManifestCompression() throws IOException { } @TestTemplate - public void testManifestCompressionFromTableProperty() throws IOException { + public void testAvroManifestCompressionFromTableProperty() throws IOException { assumeThat(formatVersion) .as("V4 uses Parquet manifests by default; Avro codec checks do not apply") .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java index 4c142226200a..f49c5286ebb1 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java @@ -25,9 +25,7 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.UUID; @@ -38,6 +36,7 @@ import org.apache.iceberg.data.Record; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; @@ -830,6 +829,14 @@ public List> columns() { protected abstract T buildList(I list); } + private static boolean canReuseListAsReadBuffer(List list) { + return !(list instanceof ImmutableList); + } + + private static boolean canReuseMapAsReadBuffer(Map map) { + return !(map instanceof ImmutableMap); + } + public static class ListReader extends RepeatedReader, List, E> { private List lastList = null; private Iterator elements = null; @@ -849,9 +856,7 @@ protected List newListData(List reuse) { } if (reuse != null) { - // reuse containers may be unmodifiable (e.g. BaseFile.splitOffsets, - // BaseFile.equalityFieldIds) and cannot be cleared and reused - this.lastList = reuse instanceof ArrayList ? reuse : null; + this.lastList = canReuseListAsReadBuffer(reuse) ? reuse : null; this.elements = reuse.iterator(); } else { this.lastList = null; @@ -977,9 +982,7 @@ protected Map newMapData(Map reuse) { } if (reuse != null) { - // reuse containers may be wrapped or immutable (e.g. BaseFile.lowerBounds, - // BaseFile.upperBounds via SerializableByteBufferMap) and cannot be cleared and reused - this.lastMap = reuse instanceof LinkedHashMap ? reuse : null; + this.lastMap = canReuseMapAsReadBuffer(reuse) ? reuse : null; this.pairs = reuse.entrySet().iterator(); } else { this.lastMap = null; From 0276a961ebe75f6e066972aa3c2eaa2316429d8e Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Mon, 20 Apr 2026 15:10:05 -0500 Subject: [PATCH 12/22] Parquet: Whitelist mutable JDK collections for Parquet list/map scratch reuse Reuse ArrayList/LinkedHashMap-style buffers only via instanceof; avoids Class.forName and non-API JDK type checks while keeping clear() safe. --- .../apache/iceberg/parquet/ParquetValueReaders.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java index f49c5286ebb1..63d6d80d5869 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java @@ -25,7 +25,11 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.UUID; @@ -36,7 +40,6 @@ import org.apache.iceberg.data.Record; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; @@ -829,12 +832,14 @@ public List> columns() { protected abstract T buildList(I list); } + // Only recycle known growable JDK collections as scratch buffers. Reuse may be an unmodifiable + // view, Guava immutable type, List.of / Map.of, etc.; those are not these concrete classes. private static boolean canReuseListAsReadBuffer(List list) { - return !(list instanceof ImmutableList); + return list instanceof ArrayList || list instanceof LinkedList; } private static boolean canReuseMapAsReadBuffer(Map map) { - return !(map instanceof ImmutableMap); + return map instanceof LinkedHashMap || map instanceof HashMap; } public static class ListReader extends RepeatedReader, List, E> { From dec20aa4b2dc5745daee1bafc6b675ef3130c40d Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 12:01:49 -0700 Subject: [PATCH 13/22] Checkpoint --- .../org/apache/iceberg/BaseFilesTable.java | 28 +++ .../org/apache/iceberg/DataTableScan.java | 33 +++ .../org/apache/iceberg/ManifestExpander.java | 176 +++++++++++++ .../org/apache/iceberg/ManifestReader.java | 30 +++ .../org/apache/iceberg/ManifestWriter.java | 4 +- .../apache/iceberg/TrackedFileAdapters.java | 129 +++++++++- .../iceberg/TrackedFileEntryAdapter.java | 141 +++++++++++ .../org/apache/iceberg/TrackingStruct.java | 4 + .../org/apache/iceberg/V4ManifestReader.java | 93 +++++++ .../java/org/apache/iceberg/V4Metadata.java | 227 ++++++----------- .../iceberg/TestV4ManifestReadWrite.java | 237 ++++++++++++++++++ .../spark/source/TestV4ReadEndToEnd.java | 108 ++++++++ 12 files changed, 1051 insertions(+), 159 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/ManifestExpander.java create mode 100644 core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java create mode 100644 core/src/main/java/org/apache/iceberg/V4ManifestReader.java create mode 100644 core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java create mode 100644 spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java diff --git a/core/src/main/java/org/apache/iceberg/BaseFilesTable.java b/core/src/main/java/org/apache/iceberg/BaseFilesTable.java index 4dff19b87990..615ec6b1dc20 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFilesTable.java +++ b/core/src/main/java/org/apache/iceberg/BaseFilesTable.java @@ -35,6 +35,7 @@ import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.StructType; +import org.apache.iceberg.util.StructProjection; /** Base class logic for files metadata tables */ abstract class BaseFilesTable extends BaseMetadataTable { @@ -161,6 +162,21 @@ private ManifestReadTask( @Override public CloseableIterable rows() { + if (isV4Manifest()) { + Schema dataFileSchema = + new Schema( + DataFile.getType( + specsById + .getOrDefault(manifest.partitionSpecId(), PartitionSpec.unpartitioned()) + .rawPartitionType()) + .fields()); + return CloseableIterable.transform( + v4Files(), + file -> + (StructLike) + StructProjection.create(dataFileSchema, projection).wrap((StructLike) file)); + } + Types.NestedField readableMetricsField = projection.findField(MetricsUtil.READABLE_METRICS); if (readableMetricsField == null) { @@ -180,6 +196,18 @@ public long estimatedRowsCount() { + (long) manifest.existingFilesCount(); } + private boolean isV4Manifest() { + return FileFormat.fromFileName(manifest.path()) == FileFormat.PARQUET; + } + + private CloseableIterable> v4Files() { + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(manifest), specsById); + PartitionSpec spec = + specsById.getOrDefault(manifest.partitionSpecId(), PartitionSpec.unpartitioned()); + return CloseableIterable.transform( + reader.liveEntries(), tf -> TrackedFileAdapters.asGenericDataFile(tf.copy(), spec)); + } + private CloseableIterable> files(Schema fileProjection) { return ManifestFiles.open(manifest, io, specsById).project(fileProjection); } diff --git a/core/src/main/java/org/apache/iceberg/DataTableScan.java b/core/src/main/java/org/apache/iceberg/DataTableScan.java index dc130c8064fc..7ad0d85fb6ff 100644 --- a/core/src/main/java/org/apache/iceberg/DataTableScan.java +++ b/core/src/main/java/org/apache/iceberg/DataTableScan.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import java.util.List; +import java.util.Map; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -62,6 +63,38 @@ protected TableScan newRefinedScan(Table table, Schema schema, TableScanContext @Override public CloseableIterable doPlanFiles() { + if (TableUtil.formatVersion(table()) >= 4) { + return doPlanFilesV4(); + } + + return doPlanFilesV3(); + } + + private CloseableIterable doPlanFilesV4() { + Snapshot snapshot = snapshot(); + FileIO io = table().io(); + List dataManifests = snapshot.dataManifests(io); + Map specsById = specs(); + scanMetrics().totalDataManifests().increment((long) dataManifests.size()); + + ManifestExpander expander = + new ManifestExpander(io, dataManifests, specsById) + .caseSensitive(isCaseSensitive()) + .filterData(filter()) + .scanMetrics(scanMetrics()); + + if (shouldIgnoreResiduals()) { + expander = expander.ignoreResiduals(); + } + + if (shouldPlanWithExecutor() && dataManifests.size() > 1) { + expander = expander.planWith(planExecutor()); + } + + return expander.planFiles(); + } + + private CloseableIterable doPlanFilesV3() { Snapshot snapshot = snapshot(); FileIO io = table().io(); diff --git a/core/src/main/java/org/apache/iceberg/ManifestExpander.java b/core/src/main/java/org/apache/iceberg/ManifestExpander.java new file mode 100644 index 000000000000..1cd736a6af7b --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/ManifestExpander.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.metrics.ScanMetrics; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.ParallelIterable; + +/** + * V4 replacement for {@link ManifestGroup}. + * + *

Reads v4 manifests via {@link V4ManifestReader}, separates entries by content type, expands + * leaf manifests (DATA_MANIFEST entries), and converts DATA entries to {@link FileScanTask} + * instances using {@link TrackedFileAdapters}. + * + *

DV support is deferred to a future phase. Delete files are not matched to data files. + */ +class ManifestExpander extends CloseableGroup { + private final FileIO io; + private final Iterable manifests; + private final Map specsById; + + private Expression dataFilter = Expressions.alwaysTrue(); + private boolean ignoreResiduals = false; + private boolean caseSensitive = true; + + @SuppressWarnings("UnusedVariable") + private ScanMetrics scanMetrics = ScanMetrics.noop(); + + private ExecutorService executorService = null; + + ManifestExpander( + FileIO io, Iterable manifests, Map specsById) { + this.io = io; + this.manifests = manifests; + this.specsById = specsById; + } + + ManifestExpander filterData(Expression newDataFilter) { + this.dataFilter = Expressions.and(dataFilter, newDataFilter); + return this; + } + + ManifestExpander ignoreResiduals() { + this.ignoreResiduals = true; + return this; + } + + ManifestExpander caseSensitive(boolean newCaseSensitive) { + this.caseSensitive = newCaseSensitive; + return this; + } + + ManifestExpander scanMetrics(ScanMetrics newScanMetrics) { + this.scanMetrics = newScanMetrics; + return this; + } + + ManifestExpander planWith(ExecutorService newExecutorService) { + this.executorService = newExecutorService; + return this; + } + + CloseableIterable planFiles() { + List> taskGroups = Lists.newArrayList(); + + for (ManifestFile manifest : manifests) { + taskGroups.addAll(expandManifest(manifest)); + } + + if (executorService != null) { + return new ParallelIterable<>(taskGroups, executorService); + } + + return CloseableIterable.concat(taskGroups); + } + + private List> expandManifest(ManifestFile manifest) { + InputFile manifestFile = io.newInputFile(manifest); + V4ManifestReader reader = new V4ManifestReader(manifestFile, specsById); + addCloseable(reader); + + // read all live entries once and partition by content type (entries are copied) + List dataFiles = Lists.newArrayList(); + List leafManifests = Lists.newArrayList(); + + try (CloseableIterable liveEntries = reader.liveEntries()) { + for (TrackedFile entry : liveEntries) { + switch (entry.contentType()) { + case DATA: + dataFiles.add(entry.copy()); + break; + case DATA_MANIFEST: + leafManifests.add(entry.copy()); + break; + default: + // EQUALITY_DELETES, DELETE_MANIFEST: skip for now (future phase) + break; + } + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + List> result = Lists.newArrayList(); + + // direct DATA entries from root + if (!dataFiles.isEmpty()) { + result.add( + CloseableIterable.transform( + CloseableIterable.withNoopClose(dataFiles), this::createTask)); + } + + // expand leaf manifests + for (TrackedFile leafEntry : leafManifests) { + result.add(expandLeafManifest(leafEntry)); + } + + return result; + } + + private CloseableIterable expandLeafManifest(TrackedFile manifestEntry) { + InputFile leafFile = io.newInputFile(manifestEntry.location()); + V4ManifestReader leafReader = new V4ManifestReader(leafFile, specsById); + addCloseable(leafReader); + + return CloseableIterable.transform( + CloseableIterable.filter( + leafReader.liveEntries(), tf -> tf.contentType() == FileContent.DATA), + tf -> createTask(tf.copy())); + } + + private FileScanTask createTask(TrackedFile trackedFile) { + int specId = trackedFile.specId() != null ? trackedFile.specId() : 0; + PartitionSpec spec = specsById.get(specId); + DataFile dataFile = TrackedFileAdapters.asDataFile(trackedFile, spec); + + Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : dataFilter; + ResidualEvaluator residuals = ResidualEvaluator.of(spec, filter, caseSensitive); + + return new BaseFileScanTask( + dataFile, + new DeleteFile[0], + SchemaParser.toJson(spec.schema()), + PartitionSpecParser.toJson(spec), + residuals); + } +} diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java b/core/src/main/java/org/apache/iceberg/ManifestReader.java index a69f57a47b3e..096b003cbee4 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestReader.java +++ b/core/src/main/java/org/apache/iceberg/ManifestReader.java @@ -40,6 +40,7 @@ import org.apache.iceberg.metrics.ScanMetrics; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; @@ -91,6 +92,7 @@ private Class fileClass() { } private final InputFile file; + private final Map specsById; private final InheritableMetadata inheritableMetadata; private final Long firstRowId; private final FileType content; @@ -130,6 +132,7 @@ protected ManifestReader( firstRowId == null || content == FileType.DATA_FILES, "First row ID is not valid for delete manifests"); this.file = file; + this.specsById = specsById; this.inheritableMetadata = inheritableMetadata; this.firstRowId = firstRowId; this.content = content; @@ -294,6 +297,10 @@ private CloseableIterable> open(Schema projection) { Preconditions.checkArgument( format != null, "Unable to determine format of manifest: %s", file.location()); + if (isV4Manifest(format)) { + return openV4(); + } + boolean unpartitioned = spec.rawPartitionType().fields().isEmpty(); // V4+ manifests omit the partition field when unpartitioned (Parquet cannot represent @@ -313,9 +320,11 @@ private CloseableIterable> open(Schema projection) { if (projection.findField(DataFile.RECORD_COUNT.fieldId()) == null) { fields.add(DataFile.RECORD_COUNT); } + if (projection.findField(DataFile.FIRST_ROW_ID.fieldId()) == null) { fields.add(DataFile.FIRST_ROW_ID); } + fields.add(MetadataColumns.ROW_POSITION); CloseableIterable> reader = @@ -334,6 +343,27 @@ private CloseableIterable> open(Schema projection) { return CloseableIterable.transform(withMetadata, idAssigner(firstRowId)); } + private boolean isV4Manifest(FileFormat format) { + return format == FileFormat.PARQUET; + } + + @SuppressWarnings("unchecked") + private CloseableIterable> openV4() { + V4ManifestReader v4Reader = + new V4ManifestReader(file, specsById != null ? specsById : ImmutableMap.of()); + addCloseable(v4Reader); + + // adapt TrackedFile entries to ManifestEntry via TrackedFileEntryAdapter + CloseableIterable> adapted = + CloseableIterable.transform( + v4Reader.entries(), + tf -> (ManifestEntry) new TrackedFileEntryAdapter(tf.copy(), spec)); + + CloseableIterable> withMetadata = + CloseableIterable.transform(adapted, inheritableMetadata::apply); + return CloseableIterable.transform(withMetadata, idAssigner(firstRowId)); + } + CloseableIterable> liveEntries() { return entries(true /* only live entries */); } diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index 4b1d063aad83..de0ccf43a3fb 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -291,7 +291,7 @@ protected FileAppender> newAppender( try { return InternalData.write(format(), file) .schema(manifestSchema) - .named("manifest_entry") + .named("tracked_file") .meta("schema", SchemaParser.toJson(spec.schema())) .meta("partition-spec", PartitionSpecParser.toJsonFields(spec)) .meta("partition-spec-id", String.valueOf(spec.specId())) @@ -331,7 +331,7 @@ protected FileAppender> newAppender( try { return InternalData.write(format(), file) .schema(manifestSchema) - .named("manifest_entry") + .named("tracked_file") .meta("schema", SchemaParser.toJson(spec.schema())) .meta("partition-spec", PartitionSpecParser.toJsonFields(spec)) .meta("partition-spec-id", String.valueOf(spec.specId())) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index a28dd872a713..63a9efadf562 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -27,6 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; /** * Adapts {@link TrackedFile} entries to the {@link DataFile} and {@link DeleteFile} APIs. @@ -41,6 +42,53 @@ class TrackedFileAdapters { private TrackedFileAdapters() {} + /** + * Creates a {@link GenericDataFile} from a TrackedFile using the reader constructor so that + * SupportsIndexProjection is correctly initialized for metadata table reads. + */ + static GenericDataFile asGenericDataFile(TrackedFile file, PartitionSpec spec) { + Preconditions.checkState( + file.contentType() == FileContent.DATA, + "Cannot convert tracked file to DataFile: content type is %s, not DATA", + file.contentType()); + + Types.StructType partitionType = spec != null ? spec.rawPartitionType() : Types.StructType.of(); + Types.StructType projection = DataFile.getType(partitionType); + + // use the reader constructor for correct SupportsIndexProjection mapping + GenericDataFile dataFile = new GenericDataFile(projection); + + // populate using DataFile.getType() positions (same as BaseFile internal positions) + // 0=content, 1=file_path, 2=file_format, 3=spec_id, 4=partition, 5=record_count, + // 6=file_size, 7=column_sizes, 8=value_counts, 9=null_value_counts, 10=nan_value_counts, + // 11=lower_bounds, 12=upper_bounds, 13=key_metadata, 14=split_offsets, 15=equality_ids, + // 16=sort_order_id, 17=first_row_id + Tracking tracking = file.tracking(); + dataFile.set(0, file.contentType().id()); + dataFile.set(1, file.location()); + dataFile.set(2, file.fileFormat() != null ? file.fileFormat().toString() : null); + dataFile.set(3, file.specId() != null ? file.specId() : 0); + if (!partitionType.fields().isEmpty()) { + dataFile.set(4, extractPartition(file, spec)); + } + + dataFile.set(5, file.recordCount()); + dataFile.set(6, file.fileSizeInBytes()); + // 7: column_sizes - null default + dataFile.set(8, valueCounts(file.contentStats())); + dataFile.set(9, nullValueCounts(file.contentStats())); + dataFile.set(10, nanValueCounts(file.contentStats())); + dataFile.set(11, lowerBounds(file.contentStats())); + dataFile.set(12, upperBounds(file.contentStats())); + dataFile.set(13, file.keyMetadata()); + dataFile.set(14, file.splitOffsets()); + // 15: equality_ids - null default + dataFile.set(16, file.sortOrderId()); + dataFile.set(17, tracking != null ? tracking.firstRowId() : null); + + return dataFile; + } + static DataFile asDataFile(TrackedFile file, PartitionSpec spec) { Preconditions.checkState( file.contentType() == FileContent.DATA, @@ -61,8 +109,9 @@ static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { static DeleteFile asEqualityDeleteFile(TrackedFile file, PartitionSpec spec) { Preconditions.checkState( - file.contentType() == FileContent.EQUALITY_DELETES, - "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", + file.contentType() == FileContent.EQUALITY_DELETES + || file.contentType() == FileContent.POSITION_DELETES, + "Cannot convert tracked file to DeleteFile: content type is %s", file.contentType()); return new TrackedDeleteFile(file, spec); } @@ -186,7 +235,10 @@ static Map upperBounds(ContentStats stats) { } /** Adapts a TrackedFile DATA entry to the {@link DataFile} interface. */ - private static class TrackedDataFile implements DataFile { + private static class TrackedDataFile implements DataFile, StructLike, java.io.Serializable { + // BaseFile StructLike field count (content through fileOrdinal) + private static final int STRUCT_SIZE = 22; + private final TrackedFile file; private final Tracking tracking; private final PartitionSpec spec; @@ -197,6 +249,73 @@ private TrackedDataFile(TrackedFile file, PartitionSpec spec) { this.spec = spec; } + @Override + public int size() { + return STRUCT_SIZE; + } + + @Override + public void set(int pos, T value) { + throw new UnsupportedOperationException("TrackedDataFile is read-only"); + } + + @Override + public T get(int pos, Class javaClass) { + return javaClass.cast(getByPos(pos)); + } + + // positions match BaseFile / DataFile.getType() field order + private Object getByPos(int pos) { + switch (pos) { + case 0: + return content().id(); + case 1: + return location(); + case 2: + return format() != null ? format().toString() : null; + case 3: + return specId(); + case 4: + return partition(); + case 5: + return recordCount(); + case 6: + return fileSizeInBytes(); + case 7: + return columnSizes(); + case 8: + return valueCounts(); + case 9: + return nullValueCounts(); + case 10: + return nanValueCounts(); + case 11: + return lowerBounds(); + case 12: + return upperBounds(); + case 13: + return keyMetadata(); + case 14: + return splitOffsets(); + case 15: + return equalityFieldIds(); + case 16: + return sortOrderId(); + case 17: + return firstRowId(); + case 18: + return null; // referencedDataFile + case 19: + return null; // contentOffset + case 20: + return null; // contentSizeInBytes + case 21: + return pos(); + default: + throw new UnsupportedOperationException("Unknown field ordinal: " + pos); + } + } + @Override public Long pos() { return tracking != null ? tracking.manifestPos() : null; @@ -331,7 +450,7 @@ public DataFile copyWithStats(Set requestedColumnIds) { } /** Adapts a TrackedFile EQUALITY_DELETES entry to the {@link DeleteFile} interface. */ - private static class TrackedDeleteFile implements DeleteFile { + private static class TrackedDeleteFile implements DeleteFile, java.io.Serializable { private final TrackedFile file; private final Tracking tracking; private final PartitionSpec spec; @@ -355,7 +474,7 @@ public int specId() { @Override public FileContent content() { - return FileContent.EQUALITY_DELETES; + return file.contentType(); } @SuppressWarnings("deprecation") diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java new file mode 100644 index 000000000000..2bdae8933d9d --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +/** + * Adapts a {@link TrackedFile} to the {@link ManifestEntry} interface for v3 pipeline + * compatibility. + * + *

This allows code that works with ManifestEntry (ManifestFiles.read(), ManifestGroup, etc.) to + * consume entries from v4 manifests via {@link V4ManifestReader}. + */ +class TrackedFileEntryAdapter> implements ManifestEntry { + private final TrackedFile trackedFile; + private final F adapted; + private final PartitionSpec spec; + + // mutable fields for InheritableMetadata + private Long overrideSnapshotId = null; + private Long overrideDataSeqNum = null; + private Long overrideFileSeqNum = null; + private boolean snapshotIdOverridden = false; + private boolean dataSeqNumOverridden = false; + private boolean fileSeqNumOverridden = false; + + @SuppressWarnings("unchecked") + TrackedFileEntryAdapter(TrackedFile trackedFile, PartitionSpec spec) { + this.trackedFile = trackedFile; + this.spec = spec; + this.adapted = (F) adaptFile(trackedFile, spec); + } + + private static ContentFile adaptFile(TrackedFile file, PartitionSpec spec) { + if (file.contentType() == FileContent.DATA) { + return TrackedFileAdapters.asDataFile(file, spec); + } + + // for EQUALITY_DELETES and POSITION_DELETES, use a minimal delete file adapter + return TrackedFileAdapters.asDeleteFile(file, spec); + } + + @Override + public Status status() { + Tracking tracking = trackedFile.tracking(); + if (tracking == null) { + return Status.EXISTING; + } + + EntryStatus entryStatus = tracking.status(); + if (entryStatus == null) { + return Status.EXISTING; + } + + switch (entryStatus) { + case EXISTING: + return Status.EXISTING; + case ADDED: + return Status.ADDED; + case DELETED: + case REPLACED: + return Status.DELETED; + default: + throw new UnsupportedOperationException("Unknown entry status: " + entryStatus); + } + } + + @Override + public Long snapshotId() { + if (snapshotIdOverridden) { + return overrideSnapshotId; + } + + return trackedFile.tracking() != null ? trackedFile.tracking().snapshotId() : null; + } + + @Override + public void setSnapshotId(long snapshotId) { + this.overrideSnapshotId = snapshotId; + this.snapshotIdOverridden = true; + } + + @Override + public Long dataSequenceNumber() { + if (dataSeqNumOverridden) { + return overrideDataSeqNum; + } + + return trackedFile.tracking() != null ? trackedFile.tracking().dataSequenceNumber() : null; + } + + @Override + public void setDataSequenceNumber(long dataSequenceNumber) { + this.overrideDataSeqNum = dataSequenceNumber; + this.dataSeqNumOverridden = true; + } + + @Override + public Long fileSequenceNumber() { + if (fileSeqNumOverridden) { + return overrideFileSeqNum; + } + + return trackedFile.tracking() != null ? trackedFile.tracking().fileSequenceNumber() : null; + } + + @Override + public void setFileSequenceNumber(long fileSequenceNumber) { + this.overrideFileSeqNum = fileSequenceNumber; + this.fileSeqNumOverridden = true; + } + + @Override + public F file() { + return adapted; + } + + @Override + public ManifestEntry copy() { + return new TrackedFileEntryAdapter<>(trackedFile.copy(), spec); + } + + @Override + public ManifestEntry copyWithoutStats() { + return new TrackedFileEntryAdapter<>(trackedFile.copyWithoutStats(), spec); + } +} diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index a8624aad15c1..03215d3cbc8d 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -113,6 +113,10 @@ void setManifestLocation(String location) { this.manifestLocation = location; } + void setManifestPos(long pos) { + this.manifestPos = pos; + } + @Override public EntryStatus status() { return status; diff --git a/core/src/main/java/org/apache/iceberg/V4ManifestReader.java b/core/src/main/java/org/apache/iceberg/V4ManifestReader.java new file mode 100644 index 000000000000..077d0d8c9b41 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/V4ManifestReader.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.Map; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.CloseableIterator; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Types; + +/** + * Reader for v4 manifest files containing {@link TrackedFile} entries. + * + *

Supports reading both root manifests and leaf manifests. Returns TrackedFile entries which can + * represent data files, equality deletes, or manifest references. + */ +class V4ManifestReader extends CloseableGroup implements CloseableIterable { + private final InputFile file; + private final Map specsById; + + V4ManifestReader(InputFile file, Map specsById) { + this.file = file; + this.specsById = specsById; + } + + /** Returns all entries in the manifest. */ + CloseableIterable entries() { + return open(); + } + + /** Returns only live entries (ADDED or EXISTING, not DELETED or REPLACED). */ + CloseableIterable liveEntries() { + return CloseableIterable.filter(open(), this::isLive); + } + + /** Returns copied live entries for safe use outside iteration. */ + @Override + public CloseableIterator iterator() { + return CloseableIterable.transform(liveEntries(), TrackedFile::copy).iterator(); + } + + Map specsById() { + return specsById; + } + + private boolean isLive(TrackedFile tf) { + if (tf == null) { + return false; + } + + Tracking tracking = tf.tracking(); + return tracking != null && tracking.isLive(); + } + + private CloseableIterable open() { + FileFormat format = FileFormat.fromFileName(file.location()); + Preconditions.checkArgument( + format != null, "Unable to determine format of manifest: %s", file.location()); + + Schema readSchema = V4Metadata.entrySchema(Types.StructType.of()); + + CloseableIterable reader = + InternalData.read(format, file) + .project(readSchema) + .setRootType(TrackedFileStruct.class) + .setCustomType(TrackedFile.TRACKING.fieldId(), TrackingStruct.class) + .setCustomType(TrackedFile.DELETION_VECTOR.fieldId(), DeletionVectorStruct.class) + .setCustomType(TrackedFile.MANIFEST_INFO.fieldId(), ManifestInfoStruct.class) + .reuseContainers() + .build(); + + addCloseable(reader); + return reader; + } +} diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index f822840102f4..2333a625fbb2 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -18,12 +18,9 @@ */ package org.apache.iceberg; -import static org.apache.iceberg.types.Types.NestedField.required; - import java.nio.ByteBuffer; import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; class V4Metadata { @@ -265,63 +262,31 @@ public ManifestFile copy() { } static Schema entrySchema(Types.StructType partitionType) { - return wrapFileSchema(fileType(partitionType)); - } - - static Schema wrapFileSchema(Types.StructType fileSchema) { - // this is used to build projection schemas return new Schema( - ManifestEntry.STATUS, - ManifestEntry.SNAPSHOT_ID, - ManifestEntry.SEQUENCE_NUMBER, - ManifestEntry.FILE_SEQUENCE_NUMBER, - required(ManifestEntry.DATA_FILE_ID, "data_file", fileSchema)); - } - - static Types.StructType fileType(Types.StructType partitionType) { - ImmutableList.Builder fields = - ImmutableList.builderWithExpectedSize(partitionType.fields().isEmpty() ? 18 : 19); - fields.add(DataFile.CONTENT.asRequired()); - fields.add(DataFile.FILE_PATH); - fields.add(DataFile.FILE_FORMAT); - if (!partitionType.fields().isEmpty()) { - fields.add( - required( - DataFile.PARTITION_ID, - DataFile.PARTITION_NAME, - partitionType, - DataFile.PARTITION_DOC)); - } - fields.add(DataFile.RECORD_COUNT); - fields.add(DataFile.FILE_SIZE); - fields.add(DataFile.COLUMN_SIZES); - fields.add(DataFile.VALUE_COUNTS); - fields.add(DataFile.NULL_VALUE_COUNTS); - fields.add(DataFile.NAN_VALUE_COUNTS); - fields.add(DataFile.LOWER_BOUNDS); - fields.add(DataFile.UPPER_BOUNDS); - fields.add(DataFile.KEY_METADATA); - fields.add(DataFile.SPLIT_OFFSETS); - fields.add(DataFile.EQUALITY_IDS); - fields.add(DataFile.SORT_ORDER_ID); - fields.add(DataFile.FIRST_ROW_ID); - fields.add(DataFile.REFERENCED_DATA_FILE); - fields.add(DataFile.CONTENT_OFFSET); - fields.add(DataFile.CONTENT_SIZE); - return Types.StructType.of(fields.build()); + TrackedFile.TRACKING, + TrackedFile.CONTENT_TYPE, + TrackedFile.LOCATION, + TrackedFile.FILE_FORMAT, + TrackedFile.RECORD_COUNT, + TrackedFile.FILE_SIZE_IN_BYTES, + TrackedFile.SPEC_ID, + TrackedFile.SORT_ORDER_ID, + TrackedFile.DELETION_VECTOR, + TrackedFile.MANIFEST_INFO, + TrackedFile.KEY_METADATA, + TrackedFile.SPLIT_OFFSETS, + TrackedFile.EQUALITY_IDS); } static class ManifestEntryWrapper> implements ManifestEntry, StructLike { - private final int size; - private final Long commitSnapshotId; - private final DataFileWrapper fileWrapper; + private static final int ENTRY_FIELD_COUNT = 13; + + private final TrackingWriteWrapper trackingWrapper; private ManifestEntry wrapped = null; ManifestEntryWrapper(Long commitSnapshotId, Types.StructType partitionType) { - this.size = entrySchema(partitionType).columns().size(); - this.commitSnapshotId = commitSnapshotId; - this.fileWrapper = new DataFileWrapper<>(partitionType); + this.trackingWrapper = new TrackingWriteWrapper(commitSnapshotId); } public ManifestEntryWrapper wrap(ManifestEntry entry) { @@ -331,7 +296,7 @@ public ManifestEntryWrapper wrap(ManifestEntry entry) { @Override public int size() { - return size; + return ENTRY_FIELD_COUNT; } @Override @@ -347,32 +312,31 @@ public T get(int pos, Class javaClass) { private Object get(int pos) { switch (pos) { case 0: - return wrapped.status().id(); + return trackingWrapper.wrap(wrapped); case 1: - return wrapped.snapshotId(); + return wrapped.file().content().id(); case 2: - if (wrapped.dataSequenceNumber() == null) { - // if the entry's data sequence number is null, - // then it will inherit the sequence number of the current commit. - // to validate that this is correct, check that the snapshot id is either null (will - // also be inherited) or that it matches the id of the current commit. - Preconditions.checkState( - wrapped.snapshotId() == null || wrapped.snapshotId().equals(commitSnapshotId), - "Found unassigned sequence number for an entry from snapshot: %s", - wrapped.snapshotId()); - - // inheritance should work only for ADDED entries - Preconditions.checkState( - wrapped.status() == Status.ADDED, - "Only entries with status ADDED can have null sequence number"); - - return null; - } - return wrapped.dataSequenceNumber(); + return wrapped.file().location(); case 3: - return wrapped.fileSequenceNumber(); + return wrapped.file().format() != null ? wrapped.file().format().toString() : null; case 4: - return fileWrapper.wrap(wrapped.file()); + return wrapped.file().recordCount(); + case 5: + return wrapped.file().fileSizeInBytes(); + case 6: + return wrapped.file().specId(); + case 7: + return wrapped.file().sortOrderId(); + case 8: + return null; // deletion_vector (future) + case 9: + return null; // manifest_info (null for data files) + case 10: + return wrapped.file().keyMetadata(); + case 11: + return wrapped.file().splitOffsets(); + case 12: + return wrapped.file().equalityFieldIds(); default: throw new UnsupportedOperationException("Unknown field ordinal: " + pos); } @@ -429,34 +393,30 @@ public ManifestEntry copyWithoutStats() { } } - /** Wrapper used to write DataFile or DeleteFile to v4 metadata. */ - static class DataFileWrapper> extends Delegates.DelegatingContentFile - implements ContentFile, StructLike { - private static final int PARTITION_POSITION = 3; + /** Wrapper that writes tracking fields from a ManifestEntry as a StructLike. */ + static class TrackingWriteWrapper implements StructLike { + private static final int TRACKING_FIELD_COUNT = 8; - private final int size; - private final boolean hasPartition; + private final Long commitSnapshotId; + private ManifestEntry entry = null; - DataFileWrapper(Types.StructType partitionType) { - super(null); - this.hasPartition = !partitionType.fields().isEmpty(); - this.size = fileType(partitionType).fields().size(); + TrackingWriteWrapper(Long commitSnapshotId) { + this.commitSnapshotId = commitSnapshotId; } - @SuppressWarnings("unchecked") - DataFileWrapper wrap(ContentFile file) { - setWrapped((F) file); + TrackingWriteWrapper wrap(ManifestEntry newEntry) { + this.entry = newEntry; return this; } @Override public int size() { - return size; + return TRACKING_FIELD_COUNT; } @Override public void set(int pos, T value) { - throw new UnsupportedOperationException("Cannot modify DataFileWrapper wrapper via set"); + throw new UnsupportedOperationException("Cannot modify TrackingWriteWrapper wrapper via set"); } @Override @@ -465,78 +425,41 @@ public T get(int pos, Class javaClass) { } private Object get(int pos) { - // when the partition field is omitted, positions at or after where it would appear - // shift down by 1, so adjust back to the canonical field ordering - int adjusted = hasPartition ? pos : (pos >= PARTITION_POSITION ? pos + 1 : pos); - switch (adjusted) { + switch (pos) { case 0: - return wrapped.content().id(); + return entry.status().id(); case 1: - return wrapped.location(); + return entry.snapshotId(); case 2: - return wrapped.format() != null ? wrapped.format().toString() : null; + if (entry.dataSequenceNumber() == null) { + Preconditions.checkState( + entry.snapshotId() == null || entry.snapshotId().equals(commitSnapshotId), + "Found unassigned sequence number for an entry from snapshot: %s", + entry.snapshotId()); + Preconditions.checkState( + entry.status() == ManifestEntry.Status.ADDED, + "Only entries with status ADDED can have null sequence number"); + return null; + } + + return entry.dataSequenceNumber(); case 3: - return wrapped.partition(); + return entry.fileSequenceNumber(); case 4: - return wrapped.recordCount(); + return null; // dv_snapshot_id (future) case 5: - return wrapped.fileSizeInBytes(); - case 6: - return wrapped.columnSizes(); - case 7: - return wrapped.valueCounts(); - case 8: - return wrapped.nullValueCounts(); - case 9: - return wrapped.nanValueCounts(); - case 10: - return wrapped.lowerBounds(); - case 11: - return wrapped.upperBounds(); - case 12: - return wrapped.keyMetadata(); - case 13: - return wrapped.splitOffsets(); - case 14: - return wrapped.equalityFieldIds(); - case 15: - return wrapped.sortOrderId(); - case 16: - if (wrapped.content() == FileContent.DATA) { - return wrapped.firstRowId(); - } else { - return null; - } - case 17: - if (wrapped.content() == FileContent.POSITION_DELETES) { - return ((DeleteFile) wrapped).referencedDataFile(); - } else { - return null; - } - case 18: - if (wrapped.content() == FileContent.POSITION_DELETES) { - return ((DeleteFile) wrapped).contentOffset(); - } else { - return null; - } - case 19: - if (wrapped.content() == FileContent.POSITION_DELETES) { - return ((DeleteFile) wrapped).contentSizeInBytes(); + if (entry.file().content() == FileContent.DATA) { + return entry.file().firstRowId(); } else { return null; } + case 6: + return null; // deleted_positions (future) + case 7: + return null; // replaced_positions (future) + default: + throw new UnsupportedOperationException("Unknown field ordinal: " + pos); } - throw new IllegalArgumentException("Unknown field ordinal: " + pos); - } - - @Override - public String manifestLocation() { - return null; - } - - @Override - public Long pos() { - return null; } } } diff --git a/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java new file mode 100644 index 000000000000..ce9f7b0829c2 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; +import org.apache.iceberg.inmemory.InMemoryFileIO; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestV4ManifestReadWrite { + private final FileIO io = + EncryptingFileIO.combine(new InMemoryFileIO(), PlaintextEncryptionManager.instance()); + + private static final Schema SCHEMA = + new Schema( + required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); + + private static final PartitionSpec SPEC = PartitionSpec.unpartitioned(); + + private static final Map SPECS_BY_ID = + ImmutableMap.of(SPEC.specId(), SPEC); + + private static final long SNAPSHOT_ID = 987134631982734L; + private static final long FIRST_ROW_ID = 100L; + + private static final String FILE_PATH = "s3://bucket/table/data/file.parquet"; + private static final FileFormat FILE_FORMAT = FileFormat.PARQUET; + + private static final Metrics METRICS = + new Metrics( + 100L, + ImmutableMap.of(1, 800L, 2, 2400L), + ImmutableMap.of(1, 100L, 2, 100L), + ImmutableMap.of(1, 0L, 2, 5L), + null, + ImmutableMap.of(1, Conversions.toByteBuffer(Types.LongType.get(), 1L)), + ImmutableMap.of(1, Conversions.toByteBuffer(Types.LongType.get(), 100L))); + + private static final List OFFSETS = ImmutableList.of(4L); + private static final Integer SORT_ORDER_ID = 0; + + private static final DataFile DATA_FILE = + new GenericDataFile( + 0, + FILE_PATH, + FILE_FORMAT, + null, + 150972L, + METRICS, + null, + OFFSETS, + SORT_ORDER_ID, + FIRST_ROW_ID); + + @Test + public void testWriteAndReadV4DataManifest() throws IOException { + ManifestFile manifest = writeV4Manifest(DATA_FILE); + + // read back via V4ManifestReader + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(manifest), SPECS_BY_ID); + + List entries = Lists.newArrayList(); + try (CloseableIterable liveEntries = reader.liveEntries()) { + for (TrackedFile tf : liveEntries) { + entries.add(tf.copy()); + } + } + + assertThat(entries).hasSize(1); + + TrackedFile entry = entries.get(0); + assertThat(entry.contentType()).isEqualTo(FileContent.DATA); + assertThat(entry.location()).isEqualTo(FILE_PATH); + assertThat(entry.fileFormat()).isEqualTo(FILE_FORMAT); + assertThat(entry.recordCount()).isEqualTo(100L); + assertThat(entry.fileSizeInBytes()).isEqualTo(150972L); + assertThat(entry.sortOrderId()).isEqualTo(SORT_ORDER_ID); + assertThat(entry.splitOffsets()).isEqualTo(OFFSETS); + + Tracking tracking = entry.tracking(); + assertThat(tracking).isNotNull(); + assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); + assertThat(tracking.snapshotId()).isEqualTo(SNAPSHOT_ID); + } + + @Test + public void testV4ManifestLiveEntries() throws IOException { + // write a manifest with an ADDED entry, then read as existing + add another + ManifestFile firstManifest = writeV4Manifest(DATA_FILE); + + // read the first manifest and rewrite with EXISTING + ADDED + DELETED entries + DataFile secondFile = + new GenericDataFile( + 0, + "s3://bucket/table/data/file2.parquet", + FILE_FORMAT, + null, + 200000L, + METRICS, + null, + OFFSETS, + SORT_ORDER_ID, + FIRST_ROW_ID + 100); + + DataFile deletedFile = + new GenericDataFile( + 0, + "s3://bucket/table/data/deleted.parquet", + FILE_FORMAT, + null, + 50000L, + METRICS, + null, + OFFSETS, + SORT_ORDER_ID, + FIRST_ROW_ID + 200); + + String filename = FileFormat.PARQUET.addExtension("manifest-mixed-" + System.nanoTime()); + EncryptedOutputFile outputFile = + PlaintextEncryptionManager.instance().encrypt(io.newOutputFile(filename)); + ManifestWriter writer = + ManifestFiles.newWriter(4, SPEC, outputFile, SNAPSHOT_ID, FIRST_ROW_ID); + try { + writer.existing(DATA_FILE, SNAPSHOT_ID, 1L, 1L); + writer.add(secondFile); + writer.delete(deletedFile, 1L, 1L); + } finally { + writer.close(); + } + + ManifestFile manifest = writer.toManifestFile(); + + // read liveEntries -- should only return EXISTING + ADDED, not DELETED + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(manifest), SPECS_BY_ID); + List liveFiles = Lists.newArrayList(); + try (CloseableIterable live = reader.liveEntries()) { + for (TrackedFile tf : live) { + liveFiles.add(tf.copy()); + } + } + + assertThat(liveFiles).hasSize(2); + assertThat(liveFiles).allSatisfy(tf -> assertThat(tf.tracking().isLive()).isTrue()); + + // read all entries -- should include all 3 + reader = new V4ManifestReader(io.newInputFile(manifest), SPECS_BY_ID); + List allFiles = Lists.newArrayList(); + try (CloseableIterable all = reader.entries()) { + for (TrackedFile tf : all) { + allFiles.add(tf.copy()); + } + } + + assertThat(allFiles).hasSize(3); + } + + @Test + public void testV4ManifestIterator() throws IOException { + ManifestFile manifest = writeV4Manifest(DATA_FILE); + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(manifest), SPECS_BY_ID); + + List files = Lists.newArrayList(reader); + assertThat(files).hasSize(1); + assertThat(files.get(0).location()).isEqualTo(FILE_PATH); + } + + @Test + public void testV4ManifestDataFileAdapter() throws IOException { + ManifestFile manifest = writeV4Manifest(DATA_FILE); + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(manifest), SPECS_BY_ID); + + List entries = Lists.newArrayList(); + try (CloseableIterable liveEntries = reader.liveEntries()) { + for (TrackedFile tf : liveEntries) { + entries.add(tf.copy()); + } + } + + assertThat(entries).hasSize(1); + + // convert to DataFile via adapter + DataFile adapted = TrackedFileAdapters.asDataFile(entries.get(0), SPEC); + assertThat(adapted.location()).isEqualTo(FILE_PATH); + assertThat(adapted.format()).isEqualTo(FILE_FORMAT); + assertThat(adapted.recordCount()).isEqualTo(100L); + assertThat(adapted.fileSizeInBytes()).isEqualTo(150972L); + assertThat(adapted.content()).isEqualTo(FileContent.DATA); + assertThat(adapted.splitOffsets()).isEqualTo(OFFSETS); + } + + private ManifestFile writeV4Manifest(DataFile... files) throws IOException { + String filename = FileFormat.PARQUET.addExtension("manifest-" + System.nanoTime()); + EncryptedOutputFile outputFile = + PlaintextEncryptionManager.instance().encrypt(io.newOutputFile(filename)); + ManifestWriter writer = + ManifestFiles.newWriter(4, SPEC, outputFile, SNAPSHOT_ID, FIRST_ROW_ID); + try { + for (DataFile file : files) { + writer.add(file); + } + } finally { + writer.close(); + } + + return writer.toManifestFile(); + } +} diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java new file mode 100644 index 000000000000..934d988a7cd9 --- /dev/null +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.source; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.spark.TestBaseWithCatalog; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +/** + * End-to-end tests for v4 table reads using the Adaptive Metadata Tree format. + * + *

V4 manifests use TrackedFile schema in Parquet format. These tests verify that the full + * pipeline works: Spark INSERT -> v4 Parquet manifest write -> v4 manifest read -> Spark SELECT. + */ +@ExtendWith(ParameterizedTestExtension.class) +public class TestV4ReadEndToEnd extends TestBaseWithCatalog { + + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + protected static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + SparkCatalogConfig.HADOOP.properties() + } + }; + } + + @AfterEach + public void dropTable() { + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @TestTemplate + public void testV4DataQuery() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(3); + assertThat(rows.get(0)).isEqualTo(row(1L, "a")); + assertThat(rows.get(1)).isEqualTo(row(2L, "b")); + assertThat(rows.get(2)).isEqualTo(row(3L, "c")); + } + + @TestTemplate + public void testV4MetadataTableQuery() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + List files = + sql("SELECT sum(record_count), count(*) FROM %s.files", tableName); + assertThat(files).hasSize(1); + assertThat(files.get(0)[0]).isEqualTo(3L); // total record count + assertThat((long) files.get(0)[1]).isGreaterThanOrEqualTo(1L); // at least 1 data file + } + + @TestTemplate + public void testV4MultiSnapshot() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + sql("INSERT INTO %s VALUES (4, 'd')", tableName); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(4); + assertThat(rows.get(0)).isEqualTo(row(1L, "a")); + assertThat(rows.get(3)).isEqualTo(row(4L, "d")); + + List files = sql("SELECT sum(record_count) FROM %s.files", tableName); + assertThat(files).hasSize(1); + assertThat(files.get(0)[0]).isEqualTo(4L); + } +} From a5cffb1a369589cdca90aa368c4396e6f1e20109 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 14:03:23 -0700 Subject: [PATCH 14/22] Core: Replace manifest list with root manifest for v4 For v4 tables, SnapshotProducer now writes a Parquet root manifest containing TrackedFile entries with content_type=DATA_MANIFEST instead of an Avro manifest list. BaseSnapshot detects Parquet format and reads root manifests via V4ManifestReader, converting entries back to ManifestFile objects for compatibility with the existing pipeline. --- .../java/org/apache/iceberg/BaseSnapshot.java | 30 ++++- .../apache/iceberg/ManifestInfoStruct.java | 4 + .../org/apache/iceberg/SnapshotProducer.java | 124 ++++++++++++++---- .../java/org/apache/iceberg/V4Metadata.java | 118 +++++++++++++++++ .../iceberg/TestV4ManifestReadWrite.java | 52 ++++++++ .../spark/source/TestV4ReadEndToEnd.java | 22 ++++ 6 files changed, 325 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java index 28a45d2c7821..a260773cf421 100644 --- a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java +++ b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java @@ -30,6 +30,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -182,9 +183,14 @@ private void cacheManifests(FileIO fileIO) { if (allManifests == null) { // if manifests isn't set, then the snapshotFile is set and should be read to get the list - this.allManifests = - ManifestLists.read( - fileIO.newInputFile(new BaseManifestListFile(manifestListLocation, keyId))); + FileFormat format = FileFormat.fromFileName(manifestListLocation); + if (format == FileFormat.PARQUET) { + this.allManifests = readRootManifest(fileIO); + } else { + this.allManifests = + ManifestLists.read( + fileIO.newInputFile(new BaseManifestListFile(manifestListLocation, keyId))); + } } if (dataManifests == null || deleteManifests == null) { @@ -199,6 +205,24 @@ private void cacheManifests(FileIO fileIO) { } } + private List readRootManifest(FileIO fileIO) { + List result = Lists.newArrayList(); + V4ManifestReader reader = + new V4ManifestReader(fileIO.newInputFile(manifestListLocation), ImmutableMap.of()); + try (CloseableIterable entries = reader.liveEntries()) { + for (TrackedFile tf : entries) { + if (tf.contentType() == FileContent.DATA_MANIFEST + || tf.contentType() == FileContent.DELETE_MANIFEST) { + result.add(V4Metadata.trackedFileToManifestFile(tf.copy())); + } + } + } catch (IOException e) { + throw new UncheckedIOException("Failed to read root manifest", e); + } + + return result; + } + @Override public List allManifests(FileIO fileIO) { if (allManifests == null) { diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 8f51df749e33..a936cc4fb654 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -58,6 +58,10 @@ class ManifestInfoStruct extends SupportsIndexProjection implements ManifestInfo super(BASE_TYPE, type); } + ManifestInfoStruct() { + super(BASE_TYPE.fields().size()); + } + private ManifestInfoStruct(ManifestInfoStruct toCopy) { super(toCopy); this.addedFilesCount = toCopy.addedFilesCount; diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index e351009a9ea6..2a6a3baeb585 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -57,6 +57,7 @@ import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.metrics.CommitMetrics; import org.apache.iceberg.metrics.CommitMetricsResult; @@ -72,6 +73,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.relocated.com.google.common.math.IntMath; +import org.apache.iceberg.types.Types; import org.apache.iceberg.util.Exceptions; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.PropertyUtil; @@ -285,6 +287,23 @@ public Snapshot apply() { List manifests = apply(base, parentSnapshot); + ManifestFile[] manifestFiles = new ManifestFile[manifests.size()]; + + Tasks.range(manifestFiles.length) + .stopOnFailure() + .throwFailureWhenFinished() + .executeWith(workerPool()) + .run(index -> manifestFiles[index] = manifestsWithMetadata.get(manifests.get(index))); + + if (base.formatVersion() >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS) { + return applyV4(manifestFiles, sequenceNumber, parentSnapshotId); + } else { + return applyV3(manifestFiles, sequenceNumber, parentSnapshotId); + } + } + + private Snapshot applyV3( + ManifestFile[] manifestFiles, long sequenceNumber, Long parentSnapshotId) { OutputFile manifestList = manifestListPath(); ManifestListWriter writer = @@ -298,17 +317,7 @@ public Snapshot apply() { base.nextRowId()); try (writer) { - // keep track of the manifest lists created manifestLists.add(manifestList.location()); - - ManifestFile[] manifestFiles = new ManifestFile[manifests.size()]; - - Tasks.range(manifestFiles.length) - .stopOnFailure() - .throwFailureWhenFinished() - .executeWith(workerPool()) - .run(index -> manifestFiles[index] = manifestsWithMetadata.get(manifests.get(index))); - writer.addAll(Arrays.asList(manifestFiles)); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write manifest list file"); @@ -321,6 +330,59 @@ public Snapshot apply() { assignedRows = writer.nextRowId() - base.nextRowId(); } + validateReplace(); + + return new BaseSnapshot( + sequenceNumber, + snapshotId(), + parentSnapshotId, + System.currentTimeMillis(), + operation(), + summary(base), + base.currentSchemaId(), + manifestList.location(), + nextRowId, + assignedRows, + writer.toManifestListFile().encryptionKeyID()); + } + + private Snapshot applyV4( + ManifestFile[] manifestFiles, long sequenceNumber, Long parentSnapshotId) { + OutputFile rootManifest = rootManifestPath(); + writeRootManifest(rootManifest, manifestFiles, snapshotId(), sequenceNumber); + manifestLists.add(rootManifest.location()); + + // compute nextRowId by summing added rows across all data manifests + long addedDataRows = 0L; + for (ManifestFile mf : manifestFiles) { + if (mf.content() == ManifestContent.DATA + && mf.snapshotId() != null + && mf.snapshotId() == snapshotId() + && mf.addedRowsCount() != null) { + addedDataRows += mf.addedRowsCount(); + } + } + + Long nextRowId = base.nextRowId(); + Long assignedRows = addedDataRows; + + validateReplace(); + + return new BaseSnapshot( + sequenceNumber, + snapshotId(), + parentSnapshotId, + System.currentTimeMillis(), + operation(), + summary(base), + base.currentSchemaId(), + rootManifest.location(), + nextRowId, + assignedRows, + null); + } + + private void validateReplace() { Map summary = summary(); String operation = operation(); @@ -337,19 +399,29 @@ public Snapshot apply() { addedRecords, replacedRecords); } + } - return new BaseSnapshot( - sequenceNumber, - snapshotId(), - parentSnapshotId, - System.currentTimeMillis(), - operation(), - summary(base), - base.currentSchemaId(), - manifestList.location(), - nextRowId, - assignedRows, - writer.toManifestListFile().encryptionKeyID()); + private void writeRootManifest( + OutputFile output, + ManifestFile[] manifests, + long commitSnapshotId, + long commitSequenceNumber) { + Schema schema = V4Metadata.entrySchema(Types.StructType.of()); + try (FileAppender writer = + InternalData.write(FileFormat.PARQUET, output) + .schema(schema) + .named("tracked_file") + .meta("format-version", "4") + .meta("content", "root") + .overwrite() + .build()) { + for (ManifestFile manifest : manifests) { + writer.add( + V4Metadata.manifestFileToTrackedFile(manifest, commitSnapshotId, commitSequenceNumber)); + } + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to write root manifest file"); + } } private void runValidations(Snapshot parentSnapshot) { @@ -605,6 +677,14 @@ protected OutputFile manifestListPath() { commitUUID)))); } + protected OutputFile rootManifestPath() { + return ops.io() + .newOutputFile( + ops.metadataFileLocation( + FileFormat.PARQUET.addExtension( + commitUUID + "-root-" + attempt.incrementAndGet()))); + } + protected EncryptedOutputFile newManifestOutputFile() { String manifestFileLocation = ops.metadataFileLocation( diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 2333a625fbb2..86c2c375541b 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -261,6 +261,124 @@ public ManifestFile copy() { } } + private static final Types.StructType ROOT_MANIFEST_WRITE_TYPE = + entrySchema(Types.StructType.of()).asStruct(); + + /** + * Converts a {@link ManifestFile} to a {@link TrackedFileStruct} for writing into a root + * manifest. + * + *

The returned struct uses the entry schema projection so that field positions match the write + * schema (which excludes content_stats). + */ + static TrackedFileStruct manifestFileToTrackedFile( + ManifestFile manifest, long commitSnapshotId, long commitSequenceNumber) { + long seqNum = resolveSeqNum(manifest.sequenceNumber(), commitSequenceNumber); + long minSeqNum = resolveSeqNum(manifest.minSequenceNumber(), commitSequenceNumber); + + TrackingStruct tracking = buildTracking(manifest, commitSnapshotId, seqNum); + ManifestInfoStruct info = buildManifestInfo(manifest, minSeqNum); + + FileContent contentType = + manifest.content() == ManifestContent.DATA + ? FileContent.DATA_MANIFEST + : FileContent.DELETE_MANIFEST; + + int totalEntries = + intOrZero(manifest.addedFilesCount()) + + intOrZero(manifest.existingFilesCount()) + + intOrZero(manifest.deletedFilesCount()); + + // use the entry schema as projection so positions match the write schema + TrackedFileStruct tf = new TrackedFileStruct(ROOT_MANIFEST_WRITE_TYPE); + tf.set(0, tracking); + tf.set(1, contentType.id()); + tf.set(2, manifest.path()); + tf.set(3, FileFormat.PARQUET.toString()); + tf.set(4, (long) totalEntries); + tf.set(5, manifest.length()); + tf.set(6, manifest.partitionSpecId()); + tf.set(9, info); + + if (manifest.keyMetadata() != null) { + tf.set(10, manifest.keyMetadata()); + } + + return tf; + } + + /** Converts a {@link TrackedFile} read from a root manifest back to a {@link ManifestFile}. */ + static ManifestFile trackedFileToManifestFile(TrackedFile tf) { + ManifestInfo info = tf.manifestInfo(); + Tracking tracking = tf.tracking(); + ManifestContent content = + tf.contentType() == FileContent.DATA_MANIFEST + ? ManifestContent.DATA + : ManifestContent.DELETES; + + return new GenericManifestFile( + tf.location(), + tf.fileSizeInBytes(), + tf.specId() != null ? tf.specId() : 0, + content, + sequenceNumberFrom(tracking), + info != null ? info.minSequenceNumber() : 0L, + tracking != null ? tracking.snapshotId() : null, + null, + tf.keyMetadata(), + info != null ? info.addedFilesCount() : 0, + info != null ? info.addedRowsCount() : 0L, + info != null ? info.existingFilesCount() : 0, + info != null ? info.existingRowsCount() : 0L, + info != null ? info.deletedFilesCount() : 0, + info != null ? info.deletedRowsCount() : 0L, + null); + } + + private static long resolveSeqNum(long seqNum, long commitSequenceNumber) { + return seqNum == ManifestWriter.UNASSIGNED_SEQ ? commitSequenceNumber : seqNum; + } + + private static TrackingStruct buildTracking( + ManifestFile manifest, long commitSnapshotId, long seqNum) { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(1, manifest.snapshotId() != null ? manifest.snapshotId() : commitSnapshotId); + tracking.set(2, seqNum); + tracking.set(3, seqNum); + return tracking; + } + + private static ManifestInfoStruct buildManifestInfo(ManifestFile manifest, long minSeqNum) { + ManifestInfoStruct info = new ManifestInfoStruct(); + info.set(0, intOrZero(manifest.addedFilesCount())); + info.set(1, intOrZero(manifest.existingFilesCount())); + info.set(2, intOrZero(manifest.deletedFilesCount())); + info.set(3, 0); + info.set(4, longOrZero(manifest.addedRowsCount())); + info.set(5, longOrZero(manifest.existingRowsCount())); + info.set(6, longOrZero(manifest.deletedRowsCount())); + info.set(7, 0L); + info.set(8, minSeqNum); + return info; + } + + private static int intOrZero(Integer value) { + return value != null ? value : 0; + } + + private static long longOrZero(Long value) { + return value != null ? value : 0L; + } + + private static long sequenceNumberFrom(Tracking tracking) { + if (tracking != null && tracking.dataSequenceNumber() != null) { + return tracking.dataSequenceNumber(); + } + + return 0L; + } + static Schema entrySchema(Types.StructType partitionType) { return new Schema( TrackedFile.TRACKING, diff --git a/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java index ce9f7b0829c2..ba51b63fac2e 100644 --- a/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java +++ b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java @@ -29,7 +29,9 @@ import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.inmemory.InMemoryFileIO; import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -218,6 +220,56 @@ public void testV4ManifestDataFileAdapter() throws IOException { assertThat(adapted.splitOffsets()).isEqualTo(OFFSETS); } + @Test + public void testRootManifestRoundTrip() throws IOException { + ManifestFile leafManifest = writeV4Manifest(DATA_FILE); + + // write a root manifest containing this leaf manifest + String rootFilename = FileFormat.PARQUET.addExtension("root-" + System.nanoTime()); + OutputFile rootOutput = io.newOutputFile(rootFilename); + + long commitSequenceNumber = 1L; + Schema schema = V4Metadata.entrySchema(Types.StructType.of()); + try (FileAppender writer = + InternalData.write(FileFormat.PARQUET, rootOutput) + .schema(schema) + .named("tracked_file") + .meta("format-version", "4") + .meta("content", "root") + .overwrite() + .build()) { + writer.add( + V4Metadata.manifestFileToTrackedFile(leafManifest, SNAPSHOT_ID, commitSequenceNumber)); + } + + // read back the root manifest and convert to ManifestFile + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(rootFilename), SPECS_BY_ID); + List manifests = Lists.newArrayList(); + try (CloseableIterable entries = reader.liveEntries()) { + for (TrackedFile tf : entries) { + if (tf.contentType() == FileContent.DATA_MANIFEST + || tf.contentType() == FileContent.DELETE_MANIFEST) { + manifests.add(V4Metadata.trackedFileToManifestFile(tf.copy())); + } + } + } + + assertThat(manifests).hasSize(1); + + ManifestFile result = manifests.get(0); + assertThat(result.path()).isEqualTo(leafManifest.path()); + assertThat(result.length()).isEqualTo(leafManifest.length()); + assertThat(result.partitionSpecId()).isEqualTo(leafManifest.partitionSpecId()); + assertThat(result.content()).isEqualTo(ManifestContent.DATA); + assertThat(result.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(result.addedFilesCount()).isEqualTo(leafManifest.addedFilesCount()); + assertThat(result.existingFilesCount()).isEqualTo(leafManifest.existingFilesCount()); + assertThat(result.deletedFilesCount()).isEqualTo(leafManifest.deletedFilesCount()); + assertThat(result.addedRowsCount()).isEqualTo(leafManifest.addedRowsCount()); + assertThat(result.existingRowsCount()).isEqualTo(leafManifest.existingRowsCount()); + assertThat(result.deletedRowsCount()).isEqualTo(leafManifest.deletedRowsCount()); + } + private ManifestFile writeV4Manifest(DataFile... files) throws IOException { String filename = FileFormat.PARQUET.addExtension("manifest-" + System.nanoTime()); EncryptedOutputFile outputFile = diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java index 934d988a7cd9..d9065e46b276 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java @@ -23,6 +23,8 @@ import java.util.List; import org.apache.iceberg.ParameterizedTestExtension; import org.apache.iceberg.Parameters; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.TestBaseWithCatalog; import org.junit.jupiter.api.AfterEach; @@ -86,6 +88,26 @@ public void testV4MetadataTableQuery() { assertThat((long) files.get(0)[1]).isGreaterThanOrEqualTo(1L); // at least 1 data file } + @TestTemplate + public void testV4RootManifestFormat() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + // verify data is readable + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(3); + + // verify no snap-*.avro manifest list files exist (v4 uses root manifests in Parquet) + Table table = validationCatalog.loadTable(tableIdent); + Snapshot snapshot = table.currentSnapshot(); + assertThat(snapshot.manifestListLocation()).endsWith(".parquet"); + assertThat(snapshot.manifestListLocation()).doesNotContain("snap-"); + } + @TestTemplate public void testV4MultiSnapshot() { sql( From 1f11a88efbb5dee7b5ad20d157f88d28e83470c6 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 15:54:03 -0700 Subject: [PATCH 15/22] Store relative paths in v4 metadata JSON and root manifests --- .../java/org/apache/iceberg/BaseSnapshot.java | 9 ++- .../org/apache/iceberg/SnapshotParser.java | 48 ++++++++++---- .../org/apache/iceberg/SnapshotProducer.java | 35 ++++++----- .../apache/iceberg/TableMetadataParser.java | 7 ++- .../java/org/apache/iceberg/V4Metadata.java | 12 ++-- .../org/apache/iceberg/util/LocationUtil.java | 39 ++++++++++++ .../org/apache/iceberg/TestTableMetadata.java | 2 +- .../iceberg/TestV4ManifestReadWrite.java | 62 ++++++++++++++++++- .../apache/iceberg/util/TestLocationUtil.java | 60 ++++++++++++++++++ .../spark/source/TestV4ReadEndToEnd.java | 51 ++++++++++++++- 10 files changed, 284 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java index a260773cf421..03d54be246ff 100644 --- a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java +++ b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java @@ -48,6 +48,9 @@ class BaseSnapshot implements Snapshot { private final Long addedRows; private final String keyId; + // set by SnapshotProducer or SnapshotParser for resolving relative paths in v4 root manifests + private String tableLocation; + // lazily initialized private transient List allManifests = null; private transient List dataManifests = null; @@ -117,6 +120,10 @@ class BaseSnapshot implements Snapshot { this.keyId = null; } + void setTableLocation(String location) { + this.tableLocation = location; + } + @Override public long sequenceNumber() { return sequenceNumber; @@ -213,7 +220,7 @@ private List readRootManifest(FileIO fileIO) { for (TrackedFile tf : entries) { if (tf.contentType() == FileContent.DATA_MANIFEST || tf.contentType() == FileContent.DELETE_MANIFEST) { - result.add(V4Metadata.trackedFileToManifestFile(tf.copy())); + result.add(V4Metadata.trackedFileToManifestFile(tf.copy(), tableLocation)); } } } catch (IOException e) { diff --git a/core/src/main/java/org/apache/iceberg/SnapshotParser.java b/core/src/main/java/org/apache/iceberg/SnapshotParser.java index 53cec16dcd87..a40785ba5d5a 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotParser.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotParser.java @@ -31,6 +31,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.util.JsonUtil; +import org.apache.iceberg.util.LocationUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,14 +57,21 @@ private SnapshotParser() {} private static final String KEY_ID = "key-id"; static void toJson(Snapshot snapshot, JsonGenerator generator) throws IOException { + toJson(snapshot, generator, null); + } + + static void toJson(Snapshot snapshot, JsonGenerator generator, String tableLocation) + throws IOException { generator.writeStartObject(); if (snapshot.sequenceNumber() > TableMetadata.INITIAL_SEQUENCE_NUMBER) { generator.writeNumberField(SEQUENCE_NUMBER, snapshot.sequenceNumber()); } + generator.writeNumberField(SNAPSHOT_ID, snapshot.snapshotId()); if (snapshot.parentId() != null) { generator.writeNumberField(PARENT_SNAPSHOT_ID, snapshot.parentId()); } + generator.writeNumberField(TIMESTAMP_MS, snapshot.timestampMillis()); // if there is an operation, write the summary map @@ -76,16 +84,19 @@ static void toJson(Snapshot snapshot, JsonGenerator generator) throws IOExceptio if (OPERATION.equals(entry.getKey())) { continue; } + generator.writeStringField(entry.getKey(), entry.getValue()); } } + generator.writeEndObject(); } String manifestList = snapshot.manifestListLocation(); if (manifestList != null) { // write just the location. manifests should not be embedded in JSON along with a list - generator.writeStringField(MANIFEST_LIST, manifestList); + generator.writeStringField( + MANIFEST_LIST, LocationUtil.relativize(manifestList, tableLocation)); } else { // embed the manifest list in the JSON, v1 only JsonUtil.writeStringArray( @@ -122,6 +133,10 @@ public static String toJson(Snapshot snapshot, boolean pretty) { } static Snapshot fromJson(JsonNode node) { + return fromJson(node, null); + } + + static Snapshot fromJson(JsonNode node, String tableLocation) { Preconditions.checkArgument( node.isObject(), "Cannot parse table version from a non-object: %s", node); @@ -129,11 +144,13 @@ static Snapshot fromJson(JsonNode node) { if (node.has(SEQUENCE_NUMBER)) { sequenceNumber = JsonUtil.getLong(SEQUENCE_NUMBER, node); } + long snapshotId = JsonUtil.getLong(SNAPSHOT_ID, node); Long parentId = null; if (node.has(PARENT_SNAPSHOT_ID)) { parentId = JsonUtil.getLong(PARENT_SNAPSHOT_ID, node); } + long timestamp = JsonUtil.getLong(TIMESTAMP_MS, node); Map summary = null; @@ -156,6 +173,7 @@ static Snapshot fromJson(JsonNode node) { builder.put(field, JsonUtil.getString(field, sNode)); } } + summary = builder.build(); // When the operation is not found, default to overwrite @@ -179,18 +197,22 @@ static Snapshot fromJson(JsonNode node) { if (node.has(MANIFEST_LIST)) { // the manifest list is stored in a manifest list file String manifestList = JsonUtil.getString(MANIFEST_LIST, node); - return new BaseSnapshot( - sequenceNumber, - snapshotId, - parentId, - timestamp, - operation, - summary, - schemaId, - manifestList, - firstRowId, - addedRows, - keyId); + manifestList = LocationUtil.resolve(manifestList, tableLocation); + BaseSnapshot snapshot = + new BaseSnapshot( + sequenceNumber, + snapshotId, + parentId, + timestamp, + operation, + summary, + schemaId, + manifestList, + firstRowId, + addedRows, + keyId); + snapshot.setTableLocation(tableLocation); + return snapshot; } else { // fall back to an embedded manifest list. pass in the manifest's InputFile so length can be diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 2a6a3baeb585..0310158be9de 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -349,7 +349,7 @@ private Snapshot applyV3( private Snapshot applyV4( ManifestFile[] manifestFiles, long sequenceNumber, Long parentSnapshotId) { OutputFile rootManifest = rootManifestPath(); - writeRootManifest(rootManifest, manifestFiles, snapshotId(), sequenceNumber); + writeRootManifest(rootManifest, manifestFiles, snapshotId(), sequenceNumber, base.location()); manifestLists.add(rootManifest.location()); // compute nextRowId by summing added rows across all data manifests @@ -368,18 +368,21 @@ private Snapshot applyV4( validateReplace(); - return new BaseSnapshot( - sequenceNumber, - snapshotId(), - parentSnapshotId, - System.currentTimeMillis(), - operation(), - summary(base), - base.currentSchemaId(), - rootManifest.location(), - nextRowId, - assignedRows, - null); + BaseSnapshot snapshot = + new BaseSnapshot( + sequenceNumber, + snapshotId(), + parentSnapshotId, + System.currentTimeMillis(), + operation(), + summary(base), + base.currentSchemaId(), + rootManifest.location(), + nextRowId, + assignedRows, + null); + snapshot.setTableLocation(base.location()); + return snapshot; } private void validateReplace() { @@ -405,7 +408,8 @@ private void writeRootManifest( OutputFile output, ManifestFile[] manifests, long commitSnapshotId, - long commitSequenceNumber) { + long commitSequenceNumber, + String tableLocation) { Schema schema = V4Metadata.entrySchema(Types.StructType.of()); try (FileAppender writer = InternalData.write(FileFormat.PARQUET, output) @@ -417,7 +421,8 @@ private void writeRootManifest( .build()) { for (ManifestFile manifest : manifests) { writer.add( - V4Metadata.manifestFileToTrackedFile(manifest, commitSnapshotId, commitSequenceNumber)); + V4Metadata.manifestFileToTrackedFile( + manifest, commitSnapshotId, commitSequenceNumber, tableLocation)); } } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write root manifest file"); diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index eeeeeab8a699..43d581ad42aa 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -241,10 +241,12 @@ public static void toJson(TableMetadata metadata, JsonGenerator generator) throw toJson(metadata.refs(), generator); + String snapshotTableLocation = metadata.formatVersion() >= 4 ? metadata.location() : null; generator.writeArrayFieldStart(SNAPSHOTS); for (Snapshot snapshot : metadata.snapshots()) { - SnapshotParser.toJson(snapshot, generator); + SnapshotParser.toJson(snapshot, generator, snapshotTableLocation); } + generator.writeEndArray(); generator.writeArrayFieldStart(STATISTICS); @@ -510,7 +512,8 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); Iterator iterator = snapshotArray.elements(); while (iterator.hasNext()) { - snapshots.add(SnapshotParser.fromJson(iterator.next())); + snapshots.add( + SnapshotParser.fromJson(iterator.next(), formatVersion >= 4 ? location : null)); } } else { snapshots = ImmutableList.of(); diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 86c2c375541b..0b6e32a4d520 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.LocationUtil; class V4Metadata { private V4Metadata() {} @@ -272,7 +273,10 @@ public ManifestFile copy() { * schema (which excludes content_stats). */ static TrackedFileStruct manifestFileToTrackedFile( - ManifestFile manifest, long commitSnapshotId, long commitSequenceNumber) { + ManifestFile manifest, + long commitSnapshotId, + long commitSequenceNumber, + String tableLocation) { long seqNum = resolveSeqNum(manifest.sequenceNumber(), commitSequenceNumber); long minSeqNum = resolveSeqNum(manifest.minSequenceNumber(), commitSequenceNumber); @@ -293,7 +297,7 @@ static TrackedFileStruct manifestFileToTrackedFile( TrackedFileStruct tf = new TrackedFileStruct(ROOT_MANIFEST_WRITE_TYPE); tf.set(0, tracking); tf.set(1, contentType.id()); - tf.set(2, manifest.path()); + tf.set(2, LocationUtil.relativize(manifest.path(), tableLocation)); tf.set(3, FileFormat.PARQUET.toString()); tf.set(4, (long) totalEntries); tf.set(5, manifest.length()); @@ -308,7 +312,7 @@ static TrackedFileStruct manifestFileToTrackedFile( } /** Converts a {@link TrackedFile} read from a root manifest back to a {@link ManifestFile}. */ - static ManifestFile trackedFileToManifestFile(TrackedFile tf) { + static ManifestFile trackedFileToManifestFile(TrackedFile tf, String tableLocation) { ManifestInfo info = tf.manifestInfo(); Tracking tracking = tf.tracking(); ManifestContent content = @@ -317,7 +321,7 @@ static ManifestFile trackedFileToManifestFile(TrackedFile tf) { : ManifestContent.DELETES; return new GenericManifestFile( - tf.location(), + LocationUtil.resolve(tf.location(), tableLocation), tf.fileSizeInBytes(), tf.specId() != null ? tf.specId() : 0, content, diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java index 4c0d401c74b9..a6dd2b24c57e 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java @@ -33,6 +33,7 @@ public static String stripTrailingSlash(String path) { while (!result.endsWith("://") && result.endsWith("/")) { result = result.substring(0, result.length() - 1); } + return result; } @@ -57,4 +58,42 @@ public static String tableLocation(TableIdentifier tableIdentifier, boolean useU return tableIdentifier.name(); } } + + /** Returns true if the path is an absolute URI (contains a scheme like {@code s3://}). */ + public static boolean isAbsolute(String path) { + return path != null && path.contains("://"); + } + + /** + * Resolves a path against a table location. Relative paths (produced by {@link #relativize}) are + * resolved by direct concatenation with the table location. Absolute paths are returned as-is. + * + *

Resolution only applies when the table location has a URI scheme. Paths are never resolved + * against bare local paths. + */ + public static String resolve(String path, String tableLocation) { + if (isAbsolute(path) || tableLocation == null || !isAbsolute(tableLocation)) { + return path; + } + + return tableLocation + path; + } + + /** + * Relativizes a path against a table location. If the path starts with the table location, the + * table location prefix is stripped, leaving a relative path that starts with {@code /}. If the + * path is not under the table location, it is returned as-is. + * + *

Relativization only applies when both the path and table location have URI schemes. + */ + public static String relativize(String path, String tableLocation) { + if (path != null + && tableLocation != null + && isAbsolute(tableLocation) + && path.startsWith(tableLocation + "/")) { + return path.substring(tableLocation.length()); + } + + return path; + } } diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index cb1decd2d8dc..e5f5e6d682a0 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1870,7 +1870,7 @@ private String createManifestListWithManifestFile( new GenericManifestFile(localInput(manifestFile), SPEC_5.specId(), snapshotId))); } - return localInput(manifestList).location(); + return "file://" + localInput(manifestList).location(); } @Test diff --git a/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java index ba51b63fac2e..26987a155a47 100644 --- a/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java +++ b/core/src/test/java/org/apache/iceberg/TestV4ManifestReadWrite.java @@ -239,7 +239,8 @@ public void testRootManifestRoundTrip() throws IOException { .overwrite() .build()) { writer.add( - V4Metadata.manifestFileToTrackedFile(leafManifest, SNAPSHOT_ID, commitSequenceNumber)); + V4Metadata.manifestFileToTrackedFile( + leafManifest, SNAPSHOT_ID, commitSequenceNumber, null)); } // read back the root manifest and convert to ManifestFile @@ -249,7 +250,7 @@ public void testRootManifestRoundTrip() throws IOException { for (TrackedFile tf : entries) { if (tf.contentType() == FileContent.DATA_MANIFEST || tf.contentType() == FileContent.DELETE_MANIFEST) { - manifests.add(V4Metadata.trackedFileToManifestFile(tf.copy())); + manifests.add(V4Metadata.trackedFileToManifestFile(tf.copy(), null)); } } } @@ -270,10 +271,65 @@ public void testRootManifestRoundTrip() throws IOException { assertThat(result.deletedRowsCount()).isEqualTo(leafManifest.deletedRowsCount()); } + @Test + public void testRootManifestRelativePathRoundTrip() throws IOException { + String tableLocation = "s3://bucket/table"; + String manifestPath = + tableLocation + + "/metadata/" + + FileFormat.PARQUET.addExtension("manifest-" + System.nanoTime()); + ManifestFile leafManifest = writeV4ManifestAt(manifestPath, DATA_FILE); + + // write a root manifest with paths relativized against the table location + String rootFilename = + tableLocation + "/metadata/" + FileFormat.PARQUET.addExtension("root-" + System.nanoTime()); + OutputFile rootOutput = io.newOutputFile(rootFilename); + + long commitSequenceNumber = 1L; + Schema schema = V4Metadata.entrySchema(Types.StructType.of()); + try (FileAppender writer = + InternalData.write(FileFormat.PARQUET, rootOutput) + .schema(schema) + .named("tracked_file") + .meta("format-version", "4") + .meta("content", "root") + .overwrite() + .build()) { + writer.add( + V4Metadata.manifestFileToTrackedFile( + leafManifest, SNAPSHOT_ID, commitSequenceNumber, tableLocation)); + } + + // read back the root manifest and verify the stored path is relative + V4ManifestReader reader = new V4ManifestReader(io.newInputFile(rootFilename), SPECS_BY_ID); + try (CloseableIterable entries = reader.liveEntries()) { + for (TrackedFile tf : entries) { + // the stored location should not be absolute (no scheme) + assertThat(tf.location()).doesNotContain("://"); + } + } + + // resolve back and verify the full path matches the original + reader = new V4ManifestReader(io.newInputFile(rootFilename), SPECS_BY_ID); + List manifests = Lists.newArrayList(); + try (CloseableIterable entries = reader.liveEntries()) { + for (TrackedFile tf : entries) { + manifests.add(V4Metadata.trackedFileToManifestFile(tf.copy(), tableLocation)); + } + } + + assertThat(manifests).hasSize(1); + assertThat(manifests.get(0).path()).isEqualTo(leafManifest.path()); + } + private ManifestFile writeV4Manifest(DataFile... files) throws IOException { String filename = FileFormat.PARQUET.addExtension("manifest-" + System.nanoTime()); + return writeV4ManifestAt(filename, files); + } + + private ManifestFile writeV4ManifestAt(String path, DataFile... files) throws IOException { EncryptedOutputFile outputFile = - PlaintextEncryptionManager.instance().encrypt(io.newOutputFile(filename)); + PlaintextEncryptionManager.instance().encrypt(io.newOutputFile(path)); ManifestWriter writer = ManifestFiles.newWriter(4, SPEC, outputFile, SNAPSHOT_ID, FIRST_ROW_ID); try { diff --git a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java index 9a7b2768d995..46e8394ee90e 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java @@ -84,4 +84,64 @@ void testStripTrailingSlashForRootPathWithTrailingSlashes() { .as("Should be root path") .isEqualTo(rootPath); } + + @Test + public void testIsAbsolute() { + assertThat(LocationUtil.isAbsolute("s3://bucket/table/data/file.parquet")).isTrue(); + assertThat(LocationUtil.isAbsolute("file:///tmp/table/data/file.parquet")).isTrue(); + assertThat(LocationUtil.isAbsolute("hdfs://namenode/table/data/file.parquet")).isTrue(); + assertThat(LocationUtil.isAbsolute("/metadata/file.parquet")).isFalse(); + assertThat(LocationUtil.isAbsolute("metadata/file.parquet")).isFalse(); + assertThat(LocationUtil.isAbsolute(null)).isFalse(); + } + + @Test + public void testResolve() { + String tableLocation = "s3://bucket/table"; + + // relative paths are resolved by direct concatenation + assertThat(LocationUtil.resolve("/metadata/file.parquet", tableLocation)) + .isEqualTo("s3://bucket/table/metadata/file.parquet"); + + // absolute paths are returned as-is + assertThat(LocationUtil.resolve("s3://other/bucket/file.parquet", tableLocation)) + .isEqualTo("s3://other/bucket/file.parquet"); + + // null tableLocation returns the path as-is + assertThat(LocationUtil.resolve("/metadata/file.parquet", null)) + .isEqualTo("/metadata/file.parquet"); + } + + @Test + public void testRelativize() { + String tableLocation = "s3://bucket/table"; + + // paths under the table location are relativized with leading / + assertThat(LocationUtil.relativize("s3://bucket/table/metadata/file.parquet", tableLocation)) + .isEqualTo("/metadata/file.parquet"); + + // paths not under the table location are returned as-is + assertThat(LocationUtil.relativize("s3://other/bucket/file.parquet", tableLocation)) + .isEqualTo("s3://other/bucket/file.parquet"); + + // null tableLocation returns the path as-is + assertThat(LocationUtil.relativize("s3://bucket/table/metadata/file.parquet", null)) + .isEqualTo("s3://bucket/table/metadata/file.parquet"); + + // path equal to table location (no trailing content) is returned as-is + assertThat(LocationUtil.relativize("s3://bucket/table", tableLocation)) + .isEqualTo("s3://bucket/table"); + } + + @Test + public void testRelativizeResolveRoundTrip() { + String tableLocation = "s3://bucket/table"; + String absolutePath = "s3://bucket/table/metadata/root-manifest.parquet"; + + String relativized = LocationUtil.relativize(absolutePath, tableLocation); + assertThat(relativized).isEqualTo("/metadata/root-manifest.parquet"); + + String resolved = LocationUtil.resolve(relativized, tableLocation); + assertThat(resolved).isEqualTo(absolutePath); + } } diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java index d9065e46b276..57730ee11eb7 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java @@ -20,13 +20,21 @@ import static org.assertj.core.api.Assertions.assertThat; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.io.InputStream; import java.util.List; +import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ParameterizedTestExtension; import org.apache.iceberg.Parameters; import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.io.FileIO; import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.TestBaseWithCatalog; +import org.apache.iceberg.util.JsonUtil; +import org.apache.iceberg.util.LocationUtil; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.TestTemplate; import org.junit.jupiter.api.extension.ExtendWith; @@ -81,8 +89,7 @@ public void testV4MetadataTableQuery() { sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); - List files = - sql("SELECT sum(record_count), count(*) FROM %s.files", tableName); + List files = sql("SELECT sum(record_count), count(*) FROM %s.files", tableName); assertThat(files).hasSize(1); assertThat(files.get(0)[0]).isEqualTo(3L); // total record count assertThat((long) files.get(0)[1]).isGreaterThanOrEqualTo(1L); // at least 1 data file @@ -127,4 +134,44 @@ public void testV4MultiSnapshot() { assertThat(files).hasSize(1); assertThat(files.get(0)[0]).isEqualTo(4L); } + + @TestTemplate + public void testV4RelativePathsInMetadata() throws IOException { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b')", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + TableMetadata metadata = ((HasTableOperations) table).operations().current(); + FileIO io = table.io(); + + // read the raw metadata JSON and verify manifest-list is a relative path + JsonNode metadataJson; + try (InputStream input = io.newInputFile(metadata.metadataFileLocation()).newStream()) { + metadataJson = JsonUtil.mapper().readTree(input); + } + + JsonNode snapshots = metadataJson.get("snapshots"); + assertThat(snapshots).isNotNull(); + assertThat(snapshots.size()).isGreaterThanOrEqualTo(1); + + for (JsonNode snap : snapshots) { + String manifestList = snap.get("manifest-list").asText(); + // the stored path should be relative (no URI scheme) + assertThat(LocationUtil.isAbsolute(manifestList)) + .as("manifest-list should be a relative path in v4 metadata: %s", manifestList) + .isFalse(); + assertThat(manifestList).startsWith("/"); + } + + // verify the resolved paths work (data is still readable) + Snapshot snapshot = table.currentSnapshot(); + assertThat(LocationUtil.isAbsolute(snapshot.manifestListLocation())).isTrue(); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(2); + } } From 8277e4446bf05f44cdf7defe3fb2a83370b1ba73 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 17:30:21 -0700 Subject: [PATCH 16/22] Core: Relativize all location fields in v4 metadata --- .../iceberg/BaseDistributedDataScan.java | 31 ++++++++ .../org/apache/iceberg/DataTableScan.java | 1 + .../org/apache/iceberg/ManifestExpander.java | 12 ++- .../org/apache/iceberg/ManifestFiles.java | 75 ++++++++++++++++++- .../org/apache/iceberg/ManifestWriter.java | 12 ++- .../org/apache/iceberg/SnapshotProducer.java | 6 +- .../apache/iceberg/TableMetadataParser.java | 8 +- .../apache/iceberg/TrackedFileAdapters.java | 38 +++++++--- .../iceberg/TrackedFileEntryAdapter.java | 14 +++- .../java/org/apache/iceberg/V4Metadata.java | 7 +- .../org/apache/iceberg/util/LocationUtil.java | 12 ++- .../apache/iceberg/util/TestLocationUtil.java | 1 + 12 files changed, 184 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java index e951ae830737..a7e15ba89039 100644 --- a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java @@ -144,6 +144,37 @@ protected PlanningMode deletePlanningMode() { @Override protected CloseableIterable doPlanFiles() { + if (TableUtil.formatVersion(table()) >= 4) { + return doPlanFilesV4(); + } + + return doPlanFilesV3(); + } + + private CloseableIterable doPlanFilesV4() { + Snapshot snapshot = snapshot(); + List dataManifests = snapshot.dataManifests(table().io()); + scanMetrics().totalDataManifests().increment((long) dataManifests.size()); + + ManifestExpander expander = + new ManifestExpander(table().io(), dataManifests, specs()) + .tableLocation(table().location()) + .caseSensitive(isCaseSensitive()) + .filterData(filter()) + .scanMetrics(scanMetrics()); + + if (shouldIgnoreResiduals()) { + expander = expander.ignoreResiduals(); + } + + if (dataManifests.size() > 1) { + expander = expander.planWith(planExecutor()); + } + + return CloseableIterable.transform(expander.planFiles(), task -> (ScanTask) task); + } + + private CloseableIterable doPlanFilesV3() { Snapshot snapshot = snapshot(); List deleteManifests = findMatchingDeleteManifests(snapshot); diff --git a/core/src/main/java/org/apache/iceberg/DataTableScan.java b/core/src/main/java/org/apache/iceberg/DataTableScan.java index 7ad0d85fb6ff..cf29018ff5e7 100644 --- a/core/src/main/java/org/apache/iceberg/DataTableScan.java +++ b/core/src/main/java/org/apache/iceberg/DataTableScan.java @@ -79,6 +79,7 @@ private CloseableIterable doPlanFilesV4() { ManifestExpander expander = new ManifestExpander(io, dataManifests, specsById) + .tableLocation(table().location()) .caseSensitive(isCaseSensitive()) .filterData(filter()) .scanMetrics(scanMetrics()); diff --git a/core/src/main/java/org/apache/iceberg/ManifestExpander.java b/core/src/main/java/org/apache/iceberg/ManifestExpander.java index 1cd736a6af7b..327c1d863a7a 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestExpander.java +++ b/core/src/main/java/org/apache/iceberg/ManifestExpander.java @@ -32,6 +32,7 @@ import org.apache.iceberg.io.InputFile; import org.apache.iceberg.metrics.ScanMetrics; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.LocationUtil; import org.apache.iceberg.util.ParallelIterable; /** @@ -56,6 +57,7 @@ class ManifestExpander extends CloseableGroup { private ScanMetrics scanMetrics = ScanMetrics.noop(); private ExecutorService executorService = null; + private String tableLocation; ManifestExpander( FileIO io, Iterable manifests, Map specsById) { @@ -64,6 +66,11 @@ class ManifestExpander extends CloseableGroup { this.specsById = specsById; } + ManifestExpander tableLocation(String newTableLocation) { + this.tableLocation = newTableLocation; + return this; + } + ManifestExpander filterData(Expression newDataFilter) { this.dataFilter = Expressions.and(dataFilter, newDataFilter); return this; @@ -148,7 +155,8 @@ private List> expandManifest(ManifestFile manife } private CloseableIterable expandLeafManifest(TrackedFile manifestEntry) { - InputFile leafFile = io.newInputFile(manifestEntry.location()); + String leafLocation = LocationUtil.resolve(manifestEntry.location(), tableLocation); + InputFile leafFile = io.newInputFile(leafLocation); V4ManifestReader leafReader = new V4ManifestReader(leafFile, specsById); addCloseable(leafReader); @@ -161,7 +169,7 @@ private CloseableIterable expandLeafManifest(TrackedFile manifestE private FileScanTask createTask(TrackedFile trackedFile) { int specId = trackedFile.specId() != null ? trackedFile.specId() : 0; PartitionSpec spec = specsById.get(specId); - DataFile dataFile = TrackedFileAdapters.asDataFile(trackedFile, spec); + DataFile dataFile = TrackedFileAdapters.asDataFile(trackedFile, spec, tableLocation); Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : dataFilter; ResidualEvaluator residuals = ResidualEvaluator.of(spec, filter, caseSensitive); diff --git a/core/src/main/java/org/apache/iceberg/ManifestFiles.java b/core/src/main/java/org/apache/iceberg/ManifestFiles.java index ffeff9c99145..511b152bee58 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestFiles.java +++ b/core/src/main/java/org/apache/iceberg/ManifestFiles.java @@ -231,6 +231,23 @@ public static ManifestWriter write( return newWriter(formatVersion, spec, encryptedOutputFile, snapshotId, null, writerProperties); } + public static ManifestWriter write( + int formatVersion, + PartitionSpec spec, + EncryptedOutputFile encryptedOutputFile, + Long snapshotId, + Map writerProperties, + String tableLocation) { + return newWriter( + formatVersion, + spec, + encryptedOutputFile, + snapshotId, + null, + writerProperties, + tableLocation); + } + /** * Create a new {@link ManifestWriter} for the given format version. * @@ -267,13 +284,24 @@ public static ManifestWriter write( OutputFile outputFile, Long snapshotId, Map writerProperties) { + return write(formatVersion, spec, outputFile, snapshotId, writerProperties, null); + } + + public static ManifestWriter write( + int formatVersion, + PartitionSpec spec, + OutputFile outputFile, + Long snapshotId, + Map writerProperties, + String tableLocation) { return newWriter( formatVersion, spec, EncryptedFiles.plainAsEncryptedOutput(outputFile), snapshotId, null, - writerProperties); + writerProperties, + tableLocation); } @VisibleForTesting @@ -284,6 +312,18 @@ static ManifestWriter newWriter( Long snapshotId, Long firstRowId, Map writerProperties) { + return newWriter( + formatVersion, spec, encryptedOutputFile, snapshotId, firstRowId, writerProperties, null); + } + + static ManifestWriter newWriter( + int formatVersion, + PartitionSpec spec, + EncryptedOutputFile encryptedOutputFile, + Long snapshotId, + Long firstRowId, + Map writerProperties, + String tableLocation) { switch (formatVersion) { case 1: return new ManifestWriter.V1Writer(spec, encryptedOutputFile, snapshotId, writerProperties); @@ -294,8 +334,9 @@ static ManifestWriter newWriter( spec, encryptedOutputFile, snapshotId, firstRowId, writerProperties); case 4: return new ManifestWriter.V4Writer( - spec, encryptedOutputFile, snapshotId, firstRowId, writerProperties); + spec, encryptedOutputFile, snapshotId, firstRowId, writerProperties, tableLocation); } + throw new UnsupportedOperationException( "Cannot write manifest for table version: " + formatVersion); } @@ -359,6 +400,22 @@ public static ManifestWriter writeDeleteManifest( writerProperties); } + public static ManifestWriter writeDeleteManifest( + int formatVersion, + PartitionSpec spec, + OutputFile outputFile, + Long snapshotId, + Map writerProperties, + String tableLocation) { + return writeDeleteManifest( + formatVersion, + spec, + EncryptedFiles.plainAsEncryptedOutput(outputFile), + snapshotId, + writerProperties, + tableLocation); + } + /** * Create a new {@link ManifestWriter} for the given format version. * @@ -389,6 +446,16 @@ public static ManifestWriter writeDeleteManifest( EncryptedOutputFile outputFile, Long snapshotId, Map writerProperties) { + return writeDeleteManifest(formatVersion, spec, outputFile, snapshotId, writerProperties, null); + } + + public static ManifestWriter writeDeleteManifest( + int formatVersion, + PartitionSpec spec, + EncryptedOutputFile outputFile, + Long snapshotId, + Map writerProperties, + String tableLocation) { switch (formatVersion) { case 1: throw new IllegalArgumentException("Cannot write delete files in a v1 table"); @@ -397,8 +464,10 @@ public static ManifestWriter writeDeleteManifest( case 3: return new ManifestWriter.V3DeleteWriter(spec, outputFile, snapshotId, writerProperties); case 4: - return new ManifestWriter.V4DeleteWriter(spec, outputFile, snapshotId, writerProperties); + return new ManifestWriter.V4DeleteWriter( + spec, outputFile, snapshotId, writerProperties, tableLocation); } + throw new UnsupportedOperationException( "Cannot write manifest for table version: " + formatVersion); } diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index de0ccf43a3fb..650e0334c794 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -274,9 +274,11 @@ static class V4Writer extends ManifestWriter { EncryptedOutputFile file, Long snapshotId, Long firstRowId, - Map writerProperties) { + Map writerProperties, + String tableLocation) { super(spec, file, snapshotId, firstRowId, writerProperties); - this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType()); + this.entryWrapper = + new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType(), tableLocation); } @Override @@ -314,9 +316,11 @@ static class V4DeleteWriter extends ManifestWriter { PartitionSpec spec, EncryptedOutputFile file, Long snapshotId, - Map writerProperties) { + Map writerProperties, + String tableLocation) { super(spec, file, snapshotId, null, writerProperties); - this.entryWrapper = new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType()); + this.entryWrapper = + new V4Metadata.ManifestEntryWrapper<>(snapshotId, spec.partitionType(), tableLocation); } @Override diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 0310158be9de..10449dbfdf81 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -704,7 +704,8 @@ protected ManifestWriter newManifestWriter(PartitionSpec spec) { spec, newManifestOutputFile(), snapshotId(), - manifestWriterProps); + manifestWriterProps, + base.location()); } protected ManifestWriter newDeleteManifestWriter(PartitionSpec spec) { @@ -713,7 +714,8 @@ protected ManifestWriter newDeleteManifestWriter(PartitionSpec spec) spec, newManifestOutputFile(), snapshotId(), - manifestWriterProps); + manifestWriterProps, + base.location()); } protected RollingManifestWriter newRollingManifestWriter(PartitionSpec spec) { diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 43d581ad42aa..c6f3209e8dc5 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -44,6 +44,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.util.JsonUtil; +import org.apache.iceberg.util.LocationUtil; public class TableMetadataParser { @@ -274,9 +275,11 @@ public static void toJson(TableMetadata metadata, JsonGenerator generator) throw for (MetadataLogEntry logEntry : metadata.previousFiles()) { generator.writeStartObject(); generator.writeNumberField(TIMESTAMP_MS, logEntry.timestampMillis()); - generator.writeStringField(METADATA_FILE, logEntry.file()); + generator.writeStringField( + METADATA_FILE, LocationUtil.relativize(logEntry.file(), snapshotTableLocation)); generator.writeEndObject(); } + generator.writeEndArray(); generator.writeEndObject(); @@ -550,10 +553,11 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { Iterator logIterator = node.get(METADATA_LOG).elements(); while (logIterator.hasNext()) { JsonNode entryNode = logIterator.next(); + String metadataFile = JsonUtil.getString(METADATA_FILE, entryNode); metadataEntries.add( new MetadataLogEntry( JsonUtil.getLong(TIMESTAMP_MS, entryNode), - JsonUtil.getString(METADATA_FILE, entryNode))); + LocationUtil.resolve(metadataFile, formatVersion >= 4 ? location : null))); } } diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index 63a9efadf562..14a875e8a112 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -28,6 +28,7 @@ import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.LocationUtil; /** * Adapts {@link TrackedFile} entries to the {@link DataFile} and {@link DeleteFile} APIs. @@ -90,11 +91,15 @@ static GenericDataFile asGenericDataFile(TrackedFile file, PartitionSpec spec) { } static DataFile asDataFile(TrackedFile file, PartitionSpec spec) { + return asDataFile(file, spec, null); + } + + static DataFile asDataFile(TrackedFile file, PartitionSpec spec, String tableLocation) { Preconditions.checkState( file.contentType() == FileContent.DATA, "Cannot convert tracked file to DataFile: content type is %s, not DATA", file.contentType()); - return new TrackedDataFile(file, spec); + return new TrackedDataFile(file, spec, tableLocation); } static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { @@ -108,12 +113,17 @@ static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { } static DeleteFile asEqualityDeleteFile(TrackedFile file, PartitionSpec spec) { + return asEqualityDeleteFile(file, spec, null); + } + + static DeleteFile asEqualityDeleteFile( + TrackedFile file, PartitionSpec spec, String tableLocation) { Preconditions.checkState( file.contentType() == FileContent.EQUALITY_DELETES || file.contentType() == FileContent.POSITION_DELETES, "Cannot convert tracked file to DeleteFile: content type is %s", file.contentType()); - return new TrackedDeleteFile(file, spec); + return new TrackedDeleteFile(file, spec, tableLocation); } // TODO: TrackedFile will likely get an explicit partition tuple field (using a union partition @@ -242,11 +252,13 @@ private static class TrackedDataFile implements DataFile, StructLike, java.io.Se private final TrackedFile file; private final Tracking tracking; private final PartitionSpec spec; + private final String tableLocation; - private TrackedDataFile(TrackedFile file, PartitionSpec spec) { + private TrackedDataFile(TrackedFile file, PartitionSpec spec, String tableLocation) { this.file = file; this.tracking = file.tracking(); this.spec = spec; + this.tableLocation = tableLocation; } @Override @@ -335,7 +347,7 @@ public FileContent content() { @SuppressWarnings("deprecation") @Override public CharSequence path() { - return file.location(); + return LocationUtil.resolve(file.location(), tableLocation); } @Override @@ -430,7 +442,7 @@ public Map upperBounds() { @Override public DataFile copy() { - return new TrackedDataFile(file.copy(), spec); + return new TrackedDataFile(file.copy(), spec, tableLocation); } @Override @@ -440,12 +452,12 @@ public DataFile copy(boolean withStats) { @Override public DataFile copyWithoutStats() { - return new TrackedDataFile(file.copyWithoutStats(), spec); + return new TrackedDataFile(file.copyWithoutStats(), spec, tableLocation); } @Override public DataFile copyWithStats(Set requestedColumnIds) { - return new TrackedDataFile(file.copyWithStats(requestedColumnIds), spec); + return new TrackedDataFile(file.copyWithStats(requestedColumnIds), spec, tableLocation); } } @@ -454,11 +466,13 @@ private static class TrackedDeleteFile implements DeleteFile, java.io.Serializab private final TrackedFile file; private final Tracking tracking; private final PartitionSpec spec; + private final String tableLocation; - private TrackedDeleteFile(TrackedFile file, PartitionSpec spec) { + private TrackedDeleteFile(TrackedFile file, PartitionSpec spec, String tableLocation) { this.file = file; this.tracking = file.tracking(); this.spec = spec; + this.tableLocation = tableLocation; } @Override @@ -480,7 +494,7 @@ public FileContent content() { @SuppressWarnings("deprecation") @Override public CharSequence path() { - return file.location(); + return LocationUtil.resolve(file.location(), tableLocation); } @Override @@ -575,7 +589,7 @@ public Map upperBounds() { @Override public DeleteFile copy() { - return new TrackedDeleteFile(file.copy(), spec); + return new TrackedDeleteFile(file.copy(), spec, tableLocation); } @Override @@ -585,12 +599,12 @@ public DeleteFile copy(boolean withStats) { @Override public DeleteFile copyWithoutStats() { - return new TrackedDeleteFile(file.copyWithoutStats(), spec); + return new TrackedDeleteFile(file.copyWithoutStats(), spec, tableLocation); } @Override public DeleteFile copyWithStats(Set requestedColumnIds) { - return new TrackedDeleteFile(file.copyWithStats(requestedColumnIds), spec); + return new TrackedDeleteFile(file.copyWithStats(requestedColumnIds), spec, tableLocation); } } diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java index 2bdae8933d9d..f4a5d72b00d2 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java @@ -40,18 +40,24 @@ class TrackedFileEntryAdapter> implements ManifestEntry @SuppressWarnings("unchecked") TrackedFileEntryAdapter(TrackedFile trackedFile, PartitionSpec spec) { + this(trackedFile, spec, null); + } + + @SuppressWarnings("unchecked") + TrackedFileEntryAdapter(TrackedFile trackedFile, PartitionSpec spec, String tableLocation) { this.trackedFile = trackedFile; this.spec = spec; - this.adapted = (F) adaptFile(trackedFile, spec); + this.adapted = (F) adaptFile(trackedFile, spec, tableLocation); } - private static ContentFile adaptFile(TrackedFile file, PartitionSpec spec) { + private static ContentFile adaptFile( + TrackedFile file, PartitionSpec spec, String tableLocation) { if (file.contentType() == FileContent.DATA) { - return TrackedFileAdapters.asDataFile(file, spec); + return TrackedFileAdapters.asDataFile(file, spec, tableLocation); } // for EQUALITY_DELETES and POSITION_DELETES, use a minimal delete file adapter - return TrackedFileAdapters.asDeleteFile(file, spec); + return TrackedFileAdapters.asEqualityDeleteFile(file, spec, tableLocation); } @Override diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 0b6e32a4d520..e00e3a95b539 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -405,10 +405,13 @@ static class ManifestEntryWrapper> private static final int ENTRY_FIELD_COUNT = 13; private final TrackingWriteWrapper trackingWrapper; + private final String tableLocation; private ManifestEntry wrapped = null; - ManifestEntryWrapper(Long commitSnapshotId, Types.StructType partitionType) { + ManifestEntryWrapper( + Long commitSnapshotId, Types.StructType partitionType, String tableLocation) { this.trackingWrapper = new TrackingWriteWrapper(commitSnapshotId); + this.tableLocation = tableLocation; } public ManifestEntryWrapper wrap(ManifestEntry entry) { @@ -438,7 +441,7 @@ private Object get(int pos) { case 1: return wrapped.file().content().id(); case 2: - return wrapped.file().location(); + return LocationUtil.relativize(wrapped.file().location(), tableLocation); case 3: return wrapped.file().format() != null ? wrapped.file().format().toString() : null; case 4: diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java index a6dd2b24c57e..177cbb7a094f 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java @@ -59,9 +59,17 @@ public static String tableLocation(TableIdentifier tableIdentifier, boolean useU } } - /** Returns true if the path is an absolute URI (contains a scheme like {@code s3://}). */ + /** + * Returns true if the path has a URI scheme (e.g. {@code s3://}, {@code file:/}, {@code + * hdfs://}). + */ public static boolean isAbsolute(String path) { - return path != null && path.contains("://"); + if (path == null) { + return false; + } + + int colonIndex = path.indexOf(':'); + return colonIndex > 0 && path.charAt(colonIndex - 1) != '/'; } /** diff --git a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java index 46e8394ee90e..50d7fdf864f7 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java @@ -89,6 +89,7 @@ void testStripTrailingSlashForRootPathWithTrailingSlashes() { public void testIsAbsolute() { assertThat(LocationUtil.isAbsolute("s3://bucket/table/data/file.parquet")).isTrue(); assertThat(LocationUtil.isAbsolute("file:///tmp/table/data/file.parquet")).isTrue(); + assertThat(LocationUtil.isAbsolute("file:/tmp/table/data/file.parquet")).isTrue(); assertThat(LocationUtil.isAbsolute("hdfs://namenode/table/data/file.parquet")).isTrue(); assertThat(LocationUtil.isAbsolute("/metadata/file.parquet")).isFalse(); assertThat(LocationUtil.isAbsolute("metadata/file.parquet")).isFalse(); From 395365a93b2e062711a79acdc94b610533b76c81 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 17:47:12 -0700 Subject: [PATCH 17/22] Apply MDVs --- .../org/apache/iceberg/ManifestExpander.java | 63 ++++- .../apache/iceberg/TestManifestExpander.java | 264 ++++++++++++++++++ 2 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 core/src/test/java/org/apache/iceberg/TestManifestExpander.java diff --git a/core/src/main/java/org/apache/iceberg/ManifestExpander.java b/core/src/main/java/org/apache/iceberg/ManifestExpander.java index 327c1d863a7a..e795e152e6df 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestExpander.java +++ b/core/src/main/java/org/apache/iceberg/ManifestExpander.java @@ -18,11 +18,16 @@ */ package org.apache.iceberg; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.ResidualEvaluator; @@ -34,6 +39,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.util.LocationUtil; import org.apache.iceberg.util.ParallelIterable; +import org.roaringbitmap.RoaringBitmap; /** * V4 replacement for {@link ManifestGroup}. @@ -160,10 +166,59 @@ private CloseableIterable expandLeafManifest(TrackedFile manifestE V4ManifestReader leafReader = new V4ManifestReader(leafFile, specsById); addCloseable(leafReader); - return CloseableIterable.transform( - CloseableIterable.filter( - leafReader.liveEntries(), tf -> tf.contentType() == FileContent.DATA), - tf -> createTask(tf.copy())); + RoaringBitmap deletedPositions = deletedPositions(manifestEntry); + CloseableIterable entries; + if (deletedPositions != null) { + // use all entries (not just live) so position counter matches manifest ordinals + AtomicInteger position = new AtomicInteger(0); + entries = + CloseableIterable.filter( + leafReader.entries(), + tf -> !deletedPositions.contains(position.getAndIncrement()) && isLiveData(tf)); + } else { + entries = + CloseableIterable.filter( + leafReader.liveEntries(), tf -> tf.contentType() == FileContent.DATA); + } + + return CloseableIterable.transform(entries, tf -> createTask(tf.copy())); + } + + private static boolean isLiveData(TrackedFile tf) { + if (tf == null || tf.contentType() != FileContent.DATA) { + return false; + } + + Tracking tracking = tf.tracking(); + return tracking != null && tracking.isLive(); + } + + private static RoaringBitmap deletedPositions(TrackedFile manifestEntry) { + Tracking tracking = manifestEntry.tracking(); + if (tracking == null) { + return null; + } + + ByteBuffer deleted = tracking.deletedPositions(); + if (deleted == null) { + return null; + } + + return deserializeBitmap(deleted); + } + + private static RoaringBitmap deserializeBitmap(ByteBuffer buffer) { + byte[] bytes = new byte[buffer.remaining()]; + buffer.asReadOnlyBuffer().get(bytes); + + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(new DataInputStream(new ByteArrayInputStream(bytes))); + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to deserialize metadata deletion vector"); + } + + return bitmap; } private FileScanTask createTask(TrackedFile trackedFile) { diff --git a/core/src/test/java/org/apache/iceberg/TestManifestExpander.java b/core/src/test/java/org/apache/iceberg/TestManifestExpander.java new file mode 100644 index 000000000000..40a54846fac7 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestManifestExpander.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; +import org.apache.iceberg.inmemory.InMemoryFileIO; +import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; +import org.roaringbitmap.RoaringBitmap; + +public class TestManifestExpander { + private final FileIO io = new InMemoryFileIO(); + + private static final Schema SCHEMA = + new Schema( + required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); + + private static final PartitionSpec SPEC = PartitionSpec.unpartitioned(); + + private static final Map SPECS_BY_ID = + ImmutableMap.of(SPEC.specId(), SPEC); + + private static final long SNAPSHOT_ID = 100L; + private static final long FIRST_ROW_ID = 0L; + + @Test + public void testExpandSingleLeafManifest() throws IOException { + ManifestFile leaf = writeLeafManifest("a.parquet", "b.parquet", "c.parquet"); + List tasks = expand(writeRootManifest(leaf, null)); + + assertThat(tasks).hasSize(3); + assertThat(filePaths(tasks)).containsExactly("a.parquet", "b.parquet", "c.parquet"); + } + + @Test + public void testExpandMultipleLeafManifests() throws IOException { + ManifestFile leaf1 = writeLeafManifest("a.parquet"); + ManifestFile leaf2 = writeLeafManifest("b.parquet", "c.parquet"); + String rootPath = writeRootManifestMulti(ImmutableList.of(leaf1, leaf2), ImmutableList.of()); + + List tasks = expand(rootPath); + + assertThat(tasks).hasSize(3); + assertThat(filePaths(tasks)).containsExactly("a.parquet", "b.parquet", "c.parquet"); + } + + @Test + public void testMetadataDVFiltersSinglePosition() throws IOException { + ManifestFile leaf = writeLeafManifest("a.parquet", "b.parquet", "c.parquet"); + List tasks = expand(writeRootManifest(leaf, bitmap(1))); + + assertThat(tasks).hasSize(2); + assertThat(filePaths(tasks)).containsExactly("a.parquet", "c.parquet"); + } + + @Test + public void testMetadataDVFiltersMultiplePositions() throws IOException { + ManifestFile leaf = writeLeafManifest("a.parquet", "b.parquet", "c.parquet", "d.parquet"); + List tasks = expand(writeRootManifest(leaf, bitmap(0, 2))); + + assertThat(tasks).hasSize(2); + assertThat(filePaths(tasks)).containsExactly("b.parquet", "d.parquet"); + } + + @Test + public void testMetadataDVDeletesAllPositions() throws IOException { + ManifestFile leaf = writeLeafManifest("a.parquet", "b.parquet"); + List tasks = expand(writeRootManifest(leaf, bitmap(0, 1))); + + assertThat(tasks).isEmpty(); + } + + @Test + public void testMetadataDVOnOneLeafNotAnother() throws IOException { + ManifestFile leaf1 = writeLeafManifest("a.parquet", "b.parquet"); + ManifestFile leaf2 = writeLeafManifest("c.parquet", "d.parquet"); + + // delete position 0 from leaf1, no DV on leaf2 + String rootPath = + writeRootManifestMulti(ImmutableList.of(leaf1, leaf2), Lists.newArrayList(bitmap(0), null)); + + List tasks = expand(rootPath); + + assertThat(tasks).hasSize(3); + assertThat(filePaths(tasks)).containsExactly("b.parquet", "c.parquet", "d.parquet"); + } + + // --- helpers --- + + private List expand(String rootPath) { + ManifestFile rootManifestFile = rootManifestFile(rootPath); + ManifestExpander expander = + new ManifestExpander(io, ImmutableList.of(rootManifestFile), SPECS_BY_ID); + return Lists.newArrayList(expander.planFiles()); + } + + private List filePaths(List tasks) { + return Lists.transform(tasks, t -> t.file().location()); + } + + private ManifestFile rootManifestFile(String rootPath) { + return new GenericManifestFile( + rootPath, + io.newInputFile(rootPath).getLength(), + 0, + ManifestContent.DATA, + 1L, + 0L, + SNAPSHOT_ID, + null, + null, + 0, + 0L, + 0, + 0L, + 0, + 0L, + null); + } + + private ManifestFile writeLeafManifest(String... filenames) throws IOException { + String path = FileFormat.PARQUET.addExtension("leaf-" + System.nanoTime()); + ManifestWriter writer = + ManifestFiles.newWriter( + 4, + SPEC, + PlaintextEncryptionManager.instance().encrypt(io.newOutputFile(path)), + SNAPSHOT_ID, + FIRST_ROW_ID); + try { + for (String filename : filenames) { + writer.add( + DataFiles.builder(SPEC) + .withPath(filename) + .withFileSizeInBytes(1024) + .withRecordCount(10) + .build()); + } + } finally { + writer.close(); + } + + return writer.toManifestFile(); + } + + private String writeRootManifest(ManifestFile leaf, ByteBuffer deletedPositions) + throws IOException { + return writeRootManifestMulti(ImmutableList.of(leaf), Lists.newArrayList(deletedPositions)); + } + + private String writeRootManifestMulti( + List leaves, List deletedPositionsList) throws IOException { + String rootPath = FileFormat.PARQUET.addExtension("root-" + System.nanoTime()); + OutputFile rootOutput = io.newOutputFile(rootPath); + Schema schema = V4Metadata.entrySchema(Types.StructType.of()); + + try (FileAppender writer = + InternalData.write(FileFormat.PARQUET, rootOutput) + .schema(schema) + .named("tracked_file") + .meta("format-version", "4") + .meta("content", "root") + .overwrite() + .build()) { + for (int i = 0; i < leaves.size(); i = i + 1) { + ManifestFile leaf = leaves.get(i); + ByteBuffer deleted = i < deletedPositionsList.size() ? deletedPositionsList.get(i) : null; + writer.add(buildRootEntry(leaf, deleted, schema)); + } + } + + return rootPath; + } + + private static TrackedFileStruct buildRootEntry( + ManifestFile leaf, ByteBuffer deletedPositions, Schema schema) { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(1, SNAPSHOT_ID); + tracking.set(2, 1L); + tracking.set(3, 1L); + if (deletedPositions != null) { + tracking.set(6, deletedPositions); + } + + int totalEntries = + intOrZero(leaf.addedFilesCount()) + + intOrZero(leaf.existingFilesCount()) + + intOrZero(leaf.deletedFilesCount()); + + ManifestInfoStruct info = new ManifestInfoStruct(); + info.set(0, intOrZero(leaf.addedFilesCount())); + info.set(1, intOrZero(leaf.existingFilesCount())); + info.set(2, intOrZero(leaf.deletedFilesCount())); + info.set(3, 0); + info.set(4, longOrZero(leaf.addedRowsCount())); + info.set(5, longOrZero(leaf.existingRowsCount())); + info.set(6, longOrZero(leaf.deletedRowsCount())); + info.set(7, 0L); + info.set(8, 1L); + + TrackedFileStruct tf = new TrackedFileStruct(schema.asStruct()); + tf.set(0, tracking); + tf.set(1, FileContent.DATA_MANIFEST.id()); + tf.set(2, leaf.path()); + tf.set(3, FileFormat.PARQUET.toString()); + tf.set(4, (long) totalEntries); + tf.set(5, leaf.length()); + tf.set(6, leaf.partitionSpecId()); + tf.set(9, info); + + return tf; + } + + private static ByteBuffer bitmap(int... positions) throws IOException { + RoaringBitmap bm = new RoaringBitmap(); + for (int pos : positions) { + bm.add(pos); + } + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + bm.serialize(new DataOutputStream(baos)); + return ByteBuffer.wrap(baos.toByteArray()); + } + + private static int intOrZero(Integer value) { + return value != null ? value : 0; + } + + private static long longOrZero(Long value) { + return value != null ? value : 0L; + } +} From f32caec9e9a6caf935c81f63334d28ed7a0c6f2f Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 08:57:34 -0700 Subject: [PATCH 18/22] Fix bug --- .../org/apache/iceberg/ManifestExpander.java | 13 +- .../apache/iceberg/TrackedFileAdapters.java | 21 +- .../iceberg/TrackedFileEntryAdapter.java | 8 +- .../java/org/apache/iceberg/V4Metadata.java | 194 ++++++++---------- .../org/apache/iceberg/util/LocationUtil.java | 27 ++- 5 files changed, 136 insertions(+), 127 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ManifestExpander.java b/core/src/main/java/org/apache/iceberg/ManifestExpander.java index e795e152e6df..827f4acfd7e6 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestExpander.java +++ b/core/src/main/java/org/apache/iceberg/ManifestExpander.java @@ -48,7 +48,8 @@ * leaf manifests (DATA_MANIFEST entries), and converts DATA entries to {@link FileScanTask} * instances using {@link TrackedFileAdapters}. * - *

DV support is deferred to a future phase. Delete files are not matched to data files. + *

Inline deletion vectors on DATA entries are attached as {@link DeleteFile} instances on the + * resulting {@link FileScanTask}. Equality delete matching is deferred to a future phase. */ class ManifestExpander extends CloseableGroup { private final FileIO io; @@ -226,12 +227,20 @@ private FileScanTask createTask(TrackedFile trackedFile) { PartitionSpec spec = specsById.get(specId); DataFile dataFile = TrackedFileAdapters.asDataFile(trackedFile, spec, tableLocation); + DeleteFile[] deletes; + if (trackedFile.deletionVector() != null) { + deletes = + new DeleteFile[] {TrackedFileAdapters.asDVDeleteFile(trackedFile, spec, tableLocation)}; + } else { + deletes = new DeleteFile[0]; + } + Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : dataFilter; ResidualEvaluator residuals = ResidualEvaluator.of(spec, filter, caseSensitive); return new BaseFileScanTask( dataFile, - new DeleteFile[0], + deletes, SchemaParser.toJson(spec.schema()), PartitionSpecParser.toJson(spec), residuals); diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index 14a875e8a112..9f1bf3d5439f 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -103,13 +103,17 @@ static DataFile asDataFile(TrackedFile file, PartitionSpec spec, String tableLoc } static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec) { + return asDVDeleteFile(file, spec, null); + } + + static DeleteFile asDVDeleteFile(TrackedFile file, PartitionSpec spec, String tableLocation) { Preconditions.checkState( file.contentType() == FileContent.DATA, "Cannot extract DV from tracked file: content type is %s, not DATA", file.contentType()); Preconditions.checkState( file.deletionVector() != null, "Cannot extract DV from tracked file: no deletion vector"); - return new TrackedDVDeleteFile(file, spec); + return new TrackedDVDeleteFile(file, spec, tableLocation); } static DeleteFile asEqualityDeleteFile(TrackedFile file, PartitionSpec spec) { @@ -119,9 +123,8 @@ static DeleteFile asEqualityDeleteFile(TrackedFile file, PartitionSpec spec) { static DeleteFile asEqualityDeleteFile( TrackedFile file, PartitionSpec spec, String tableLocation) { Preconditions.checkState( - file.contentType() == FileContent.EQUALITY_DELETES - || file.contentType() == FileContent.POSITION_DELETES, - "Cannot convert tracked file to DeleteFile: content type is %s", + file.contentType() == FileContent.EQUALITY_DELETES, + "Cannot convert tracked file to DeleteFile: content type is %s, not EQUALITY_DELETES", file.contentType()); return new TrackedDeleteFile(file, spec, tableLocation); } @@ -621,12 +624,14 @@ private static class TrackedDVDeleteFile implements DeleteFile { private final DeletionVector dv; private final Tracking tracking; private final PartitionSpec spec; + private final String tableLocation; - private TrackedDVDeleteFile(TrackedFile file, PartitionSpec spec) { + private TrackedDVDeleteFile(TrackedFile file, PartitionSpec spec, String tableLocation) { this.file = file; this.dv = file.deletionVector(); this.tracking = file.tracking(); this.spec = spec; + this.tableLocation = tableLocation; } @Override @@ -647,7 +652,7 @@ public FileContent content() { @SuppressWarnings("deprecation") @Override public CharSequence path() { - return dv.location(); + return LocationUtil.resolve(dv.location(), tableLocation); } @Override @@ -711,7 +716,7 @@ public List equalityFieldIds() { @Override public String referencedDataFile() { - return file.location(); + return LocationUtil.resolve(file.location(), tableLocation); } @Override @@ -761,7 +766,7 @@ public Map upperBounds() { @Override public DeleteFile copy() { - return new TrackedDVDeleteFile(file.copy(), spec); + return new TrackedDVDeleteFile(file.copy(), spec, tableLocation); } @Override diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java index f4a5d72b00d2..09908141ea7b 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileEntryAdapter.java @@ -56,8 +56,12 @@ private static ContentFile adaptFile( return TrackedFileAdapters.asDataFile(file, spec, tableLocation); } - // for EQUALITY_DELETES and POSITION_DELETES, use a minimal delete file adapter - return TrackedFileAdapters.asEqualityDeleteFile(file, spec, tableLocation); + if (file.contentType() == FileContent.EQUALITY_DELETES) { + return TrackedFileAdapters.asEqualityDeleteFile(file, spec, tableLocation); + } + + // DATA entries with a deletion vector are adapted as DV delete files + return TrackedFileAdapters.asDVDeleteFile(file, spec, tableLocation); } @Override diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index e00e3a95b539..e02b52f7cbbb 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -383,6 +383,9 @@ private static long sequenceNumberFrom(Tracking tracking) { return 0L; } + private static final Types.StructType ENTRY_WRITE_TYPE = + entrySchema(Types.StructType.of()).asStruct(); + static Schema entrySchema(Types.StructType partitionType) { return new Schema( TrackedFile.TRACKING, @@ -400,71 +403,114 @@ static Schema entrySchema(Types.StructType partitionType) { TrackedFile.EQUALITY_IDS); } + /** + * Converts a {@link ManifestEntry} to a {@link TrackedFileStruct} for writing into a v4 data or + * delete manifest. + * + *

The returned struct uses the entry schema projection so that field positions match the write + * schema (which excludes content_stats). + */ + static TrackedFileStruct entryToTrackedFile( + ManifestEntry entry, Long commitSnapshotId, String tableLocation) { + TrackingStruct tracking = buildEntryTracking(entry, commitSnapshotId); + ContentFile file = entry.file(); + + TrackedFileStruct tf = new TrackedFileStruct(ENTRY_WRITE_TYPE); + tf.set(0, tracking); + tf.set(1, file.content().id()); + tf.set(2, LocationUtil.relativize(file.location(), tableLocation)); + tf.set(3, file.format() != null ? file.format().toString() : null); + tf.set(4, file.recordCount()); + tf.set(5, file.fileSizeInBytes()); + tf.set(6, file.specId()); + tf.set(7, file.sortOrderId()); + // positions 8 (deletion_vector) and 9 (manifest_info) default to null + + if (file.keyMetadata() != null) { + tf.set(10, file.keyMetadata()); + } + + if (file.splitOffsets() != null) { + tf.set(11, file.splitOffsets()); + } + + if (file.equalityFieldIds() != null) { + tf.set(12, file.equalityFieldIds()); + } + + return tf; + } + + private static TrackingStruct buildEntryTracking( + ManifestEntry entry, Long commitSnapshotId) { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(0, entry.status().id()); + tracking.set(1, entry.snapshotId()); + + if (entry.dataSequenceNumber() == null) { + Preconditions.checkState( + entry.snapshotId() == null || entry.snapshotId().equals(commitSnapshotId), + "Found unassigned sequence number for an entry from snapshot: %s", + entry.snapshotId()); + Preconditions.checkState( + entry.status() == ManifestEntry.Status.ADDED, + "Only entries with status ADDED can have null sequence number"); + // leave sequence number as null for ADDED entries (assigned at commit) + } else { + tracking.set(2, entry.dataSequenceNumber()); + } + + if (entry.fileSequenceNumber() != null) { + tracking.set(3, entry.fileSequenceNumber()); + } + + if (entry.file().content() == FileContent.DATA && entry.file().firstRowId() != null) { + tracking.set(5, entry.file().firstRowId()); + } + + return tracking; + } + + /** + * Wraps a {@link ManifestEntry} for v4 manifest writing. + * + *

Implements {@link ManifestEntry} for type compatibility with {@link ManifestWriter}, and + * delegates {@link StructLike} to an internal {@link TrackedFileStruct} built via {@link + * #entryToTrackedFile}. + */ static class ManifestEntryWrapper> implements ManifestEntry, StructLike { - private static final int ENTRY_FIELD_COUNT = 13; - private final TrackingWriteWrapper trackingWrapper; + private final Long commitSnapshotId; private final String tableLocation; private ManifestEntry wrapped = null; + private TrackedFileStruct tracked = null; ManifestEntryWrapper( Long commitSnapshotId, Types.StructType partitionType, String tableLocation) { - this.trackingWrapper = new TrackingWriteWrapper(commitSnapshotId); + this.commitSnapshotId = commitSnapshotId; this.tableLocation = tableLocation; } public ManifestEntryWrapper wrap(ManifestEntry entry) { this.wrapped = entry; + this.tracked = entryToTrackedFile(entry, commitSnapshotId, tableLocation); return this; } @Override public int size() { - return ENTRY_FIELD_COUNT; + return tracked.size(); } @Override public void set(int pos, T value) { - throw new UnsupportedOperationException("Cannot modify ManifestEntryWrapper wrapper via set"); + throw new UnsupportedOperationException("ManifestEntryWrapper is read-only"); } @Override public T get(int pos, Class javaClass) { - return javaClass.cast(get(pos)); - } - - private Object get(int pos) { - switch (pos) { - case 0: - return trackingWrapper.wrap(wrapped); - case 1: - return wrapped.file().content().id(); - case 2: - return LocationUtil.relativize(wrapped.file().location(), tableLocation); - case 3: - return wrapped.file().format() != null ? wrapped.file().format().toString() : null; - case 4: - return wrapped.file().recordCount(); - case 5: - return wrapped.file().fileSizeInBytes(); - case 6: - return wrapped.file().specId(); - case 7: - return wrapped.file().sortOrderId(); - case 8: - return null; // deletion_vector (future) - case 9: - return null; // manifest_info (null for data files) - case 10: - return wrapped.file().keyMetadata(); - case 11: - return wrapped.file().splitOffsets(); - case 12: - return wrapped.file().equalityFieldIds(); - default: - throw new UnsupportedOperationException("Unknown field ordinal: " + pos); - } + return tracked.get(pos, javaClass); } @Override @@ -517,74 +563,4 @@ public ManifestEntry copyWithoutStats() { return wrapped.copyWithoutStats(); } } - - /** Wrapper that writes tracking fields from a ManifestEntry as a StructLike. */ - static class TrackingWriteWrapper implements StructLike { - private static final int TRACKING_FIELD_COUNT = 8; - - private final Long commitSnapshotId; - private ManifestEntry entry = null; - - TrackingWriteWrapper(Long commitSnapshotId) { - this.commitSnapshotId = commitSnapshotId; - } - - TrackingWriteWrapper wrap(ManifestEntry newEntry) { - this.entry = newEntry; - return this; - } - - @Override - public int size() { - return TRACKING_FIELD_COUNT; - } - - @Override - public void set(int pos, T value) { - throw new UnsupportedOperationException("Cannot modify TrackingWriteWrapper wrapper via set"); - } - - @Override - public T get(int pos, Class javaClass) { - return javaClass.cast(get(pos)); - } - - private Object get(int pos) { - switch (pos) { - case 0: - return entry.status().id(); - case 1: - return entry.snapshotId(); - case 2: - if (entry.dataSequenceNumber() == null) { - Preconditions.checkState( - entry.snapshotId() == null || entry.snapshotId().equals(commitSnapshotId), - "Found unassigned sequence number for an entry from snapshot: %s", - entry.snapshotId()); - Preconditions.checkState( - entry.status() == ManifestEntry.Status.ADDED, - "Only entries with status ADDED can have null sequence number"); - return null; - } - - return entry.dataSequenceNumber(); - case 3: - return entry.fileSequenceNumber(); - case 4: - return null; // dv_snapshot_id (future) - case 5: - if (entry.file().content() == FileContent.DATA) { - return entry.file().firstRowId(); - } else { - return null; - } - case 6: - return null; // deleted_positions (future) - case 7: - return null; // replaced_positions (future) - default: - throw new UnsupportedOperationException("Unknown field ordinal: " + pos); - } - } - } } diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java index 177cbb7a094f..28fbbe35c953 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java @@ -84,7 +84,7 @@ public static String resolve(String path, String tableLocation) { return path; } - return tableLocation + path; + return normalizeScheme(tableLocation) + path; } /** @@ -95,11 +95,26 @@ public static String resolve(String path, String tableLocation) { *

Relativization only applies when both the path and table location have URI schemes. */ public static String relativize(String path, String tableLocation) { - if (path != null - && tableLocation != null - && isAbsolute(tableLocation) - && path.startsWith(tableLocation + "/")) { - return path.substring(tableLocation.length()); + if (path == null || tableLocation == null || !isAbsolute(tableLocation)) { + return path; + } + + String normalizedLocation = normalizeScheme(tableLocation); + String normalizedPath = normalizeScheme(path); + if (normalizedPath.startsWith(normalizedLocation + "/")) { + return normalizedPath.substring(normalizedLocation.length()); + } + + return path; + } + + /** + * Normalizes URI scheme variants. Converts {@code file:///path} and {@code file:/path} to {@code + * file:/path} for consistent comparison. + */ + private static String normalizeScheme(String path) { + if (path.startsWith("file:///")) { + return "file:" + path.substring("file://".length()); } return path; From c475d6313d69eecec9e48ce2f1980bf9428937a8 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 29 Apr 2026 10:04:25 -0700 Subject: [PATCH 19/22] Add testing guide --- V4_Testing_Guide.md | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 V4_Testing_Guide.md diff --git a/V4_Testing_Guide.md b/V4_Testing_Guide.md new file mode 100644 index 000000000000..e52c9395aadf --- /dev/null +++ b/V4_Testing_Guide.md @@ -0,0 +1,72 @@ +# Testing V4 Iceberg with Spark + +## Build the Iceberg Spark runtime jar + +```bash +git checkout v4-amt +./gradlew :iceberg-spark:iceberg-spark-runtime-4.1_2.13:shadowJar +``` + +The jar is at: +``` +spark/v4.1/spark-runtime/build/libs/iceberg-spark-runtime-4.1_2.13-1.11.0-SNAPSHOT.jar +``` + +## Download Spark 4.1.1 + +```bash +curl -L -o spark-4.1.1-bin-hadoop3.tgz \ + https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz +tar xzf spark-4.1.1-bin-hadoop3.tgz +``` + +## Start spark-sql + +```bash +spark-4.1.1-bin-hadoop3/bin/spark-sql \ + --jars /path/to/iceberg-spark-runtime-4.1_2.13-1.11.0-SNAPSHOT.jar \ + --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.local.type=hadoop \ + --conf spark.sql.catalog.local.warehouse=file:///tmp/iceberg-warehouse +``` + +## Create a v4 table and query it + +```sql +CREATE TABLE local.default.test (id bigint, data string) + USING iceberg TBLPROPERTIES ('format-version' = '4'); + +INSERT INTO local.default.test VALUES (1, 'a'), (2, 'b'), (3, 'c'); + +SELECT * FROM local.default.test ORDER BY id; +``` + +## Inspect the metadata + +All paths in v4 metadata are stored as relative: + +```bash +# metadata JSON -- manifest-list and metadata-log use relative paths +python3 -m json.tool /tmp/iceberg-warehouse/default/test/metadata/v2.metadata.json + +# root manifest and leaf manifests are Parquet -- read with spark-sql +# (replace the UUID with the actual filename) +SELECT * FROM parquet.`file:///tmp/iceberg-warehouse/default/test/metadata/*-root-*.parquet`; +SELECT * FROM parquet.`file:///tmp/iceberg-warehouse/default/test/metadata/*-m0.parquet`; +``` + +## What's implemented + +- V4 Adaptive Metadata Tree: root manifest (Parquet) replaces Avro manifest list +- Relative paths at all levels: metadata JSON, root manifest, leaf manifests +- Metadata deletion vectors (inline bitmaps on tracking struct) +- V4 scan path through ManifestExpander (bypasses ManifestGroup) +- FastAppend write path (INSERT INTO) + +## Limitations + +- Only FastAppend is wired for v4 (INSERT INTO). Overwrites, deletes, and + compaction still use the v2/v3 path. +- Data deletion vectors (colocated with data files) are not yet implemented. +- Metadata deletion vectors are applied on read but there is no write path + that produces them yet. From 48248883ba1519ba449ceef92404ea0412553d3d Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Thu, 30 Apr 2026 08:39:41 -0700 Subject: [PATCH 20/22] more fixes --- .../iceberg/BaseDistributedDataScan.java | 31 ++++++++++++++----- .../org/apache/iceberg/DataTableScan.java | 31 ++++++++++++++----- .../org/apache/iceberg/V4ManifestReader.java | 13 +++++++- .../java/org/apache/iceberg/V4Metadata.java | 3 +- .../org/apache/iceberg/util/LocationUtil.java | 7 ++++- 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java index a7e15ba89039..3af4cdd3b8a5 100644 --- a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java @@ -36,6 +36,7 @@ import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.metrics.ScanMetricsUtil; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.ContentFileUtil; @@ -153,11 +154,31 @@ protected CloseableIterable doPlanFiles() { private CloseableIterable doPlanFilesV4() { Snapshot snapshot = snapshot(); - List dataManifests = snapshot.dataManifests(table().io()); - scanMetrics().totalDataManifests().increment((long) dataManifests.size()); + + // Pass the root manifest directly to ManifestExpander, not pre-filtered dataManifests. + // The root manifest may contain both DATA entries (inlined) and DATA_MANIFEST entries. + String rootManifestPath = snapshot.manifestListLocation(); + ManifestFile rootManifest = + new GenericManifestFile( + rootManifestPath, + 0, + 0, + ManifestContent.DATA, + 0L, + 0L, + null, + null, + null, + 0, + 0L, + 0, + 0L, + 0, + 0L, + null); ManifestExpander expander = - new ManifestExpander(table().io(), dataManifests, specs()) + new ManifestExpander(table().io(), ImmutableList.of(rootManifest), specs()) .tableLocation(table().location()) .caseSensitive(isCaseSensitive()) .filterData(filter()) @@ -167,10 +188,6 @@ private CloseableIterable doPlanFilesV4() { expander = expander.ignoreResiduals(); } - if (dataManifests.size() > 1) { - expander = expander.planWith(planExecutor()); - } - return CloseableIterable.transform(expander.planFiles(), task -> (ScanTask) task); } diff --git a/core/src/main/java/org/apache/iceberg/DataTableScan.java b/core/src/main/java/org/apache/iceberg/DataTableScan.java index cf29018ff5e7..2008d9f6786a 100644 --- a/core/src/main/java/org/apache/iceberg/DataTableScan.java +++ b/core/src/main/java/org/apache/iceberg/DataTableScan.java @@ -23,6 +23,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; public class DataTableScan extends BaseTableScan { protected DataTableScan(Table table, Schema schema, TableScanContext context) { @@ -73,12 +74,32 @@ public CloseableIterable doPlanFiles() { private CloseableIterable doPlanFilesV4() { Snapshot snapshot = snapshot(); FileIO io = table().io(); - List dataManifests = snapshot.dataManifests(io); Map specsById = specs(); - scanMetrics().totalDataManifests().increment((long) dataManifests.size()); + + // Pass the root manifest to ManifestExpander, not pre-filtered dataManifests. + // The root manifest may contain both inlined DATA entries and DATA_MANIFEST entries. + String rootManifestPath = snapshot.manifestListLocation(); + ManifestFile rootManifest = + new GenericManifestFile( + rootManifestPath, + 0, + 0, + ManifestContent.DATA, + 0L, + 0L, + null, + null, + null, + 0, + 0L, + 0, + 0L, + 0, + 0L, + null); ManifestExpander expander = - new ManifestExpander(io, dataManifests, specsById) + new ManifestExpander(io, ImmutableList.of(rootManifest), specsById) .tableLocation(table().location()) .caseSensitive(isCaseSensitive()) .filterData(filter()) @@ -88,10 +109,6 @@ private CloseableIterable doPlanFilesV4() { expander = expander.ignoreResiduals(); } - if (shouldPlanWithExecutor() && dataManifests.size() > 1) { - expander = expander.planWith(planExecutor()); - } - return expander.planFiles(); } diff --git a/core/src/main/java/org/apache/iceberg/V4ManifestReader.java b/core/src/main/java/org/apache/iceberg/V4ManifestReader.java index 077d0d8c9b41..c45ee599f743 100644 --- a/core/src/main/java/org/apache/iceberg/V4ManifestReader.java +++ b/core/src/main/java/org/apache/iceberg/V4ManifestReader.java @@ -75,7 +75,18 @@ private CloseableIterable open() { Preconditions.checkArgument( format != null, "Unable to determine format of manifest: %s", file.location()); - Schema readSchema = V4Metadata.entrySchema(Types.StructType.of()); + // Hack: Exclude SPLIT_OFFSETS and EQUALITY_IDS from read projection to tolerate + // manifests that don't write list element field IDs + // TODO: Fix it + Schema fullSchema = V4Metadata.entrySchema(Types.StructType.of()); + Schema readSchema = + new Schema( + fullSchema.columns().stream() + .filter( + f -> + f.fieldId() != TrackedFile.SPLIT_OFFSETS.fieldId() + && f.fieldId() != TrackedFile.EQUALITY_IDS.fieldId()) + .collect(java.util.stream.Collectors.toList())); CloseableIterable reader = InternalData.read(format, file) diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index e02b52f7cbbb..1e7aa6737218 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -441,8 +441,7 @@ static TrackedFileStruct entryToTrackedFile( return tf; } - private static TrackingStruct buildEntryTracking( - ManifestEntry entry, Long commitSnapshotId) { + private static TrackingStruct buildEntryTracking(ManifestEntry entry, Long commitSnapshotId) { TrackingStruct tracking = new TrackingStruct(); tracking.set(0, entry.status().id()); tracking.set(1, entry.snapshotId()); diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java index 28fbbe35c953..b67f8dc7dd76 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java @@ -84,7 +84,12 @@ public static String resolve(String path, String tableLocation) { return path; } - return normalizeScheme(tableLocation) + path; + String normalized = normalizeScheme(tableLocation); + if (path.startsWith("/")) { + return normalized + path; + } + + return normalized + "/" + path; } /** From 412fbb97b39d2d0d4e2ea321d4367c27d6556f9f Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 6 May 2026 17:52:21 -0700 Subject: [PATCH 21/22] Add Spark 3.5 instructions and tests --- V4_Testing_Guide.md | 54 +++++- .../spark/source/TestV4ReadEndToEnd.java | 177 ++++++++++++++++++ 2 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java diff --git a/V4_Testing_Guide.md b/V4_Testing_Guide.md index e52c9395aadf..0a3a9e57d4cd 100644 --- a/V4_Testing_Guide.md +++ b/V4_Testing_Guide.md @@ -2,6 +2,8 @@ ## Build the Iceberg Spark runtime jar +### Spark 4.1 + ```bash git checkout v4-amt ./gradlew :iceberg-spark:iceberg-spark-runtime-4.1_2.13:shadowJar @@ -12,7 +14,21 @@ The jar is at: spark/v4.1/spark-runtime/build/libs/iceberg-spark-runtime-4.1_2.13-1.11.0-SNAPSHOT.jar ``` -## Download Spark 4.1.1 +### Spark 3.5 + +```bash +git checkout v4-amt +./gradlew -DsparkVersions=3.5 :iceberg-spark:iceberg-spark-runtime-3.5_2.12:shadowJar +``` + +The jar is at: +``` +spark/v3.5/spark-runtime/build/libs/iceberg-spark-runtime-3.5_2.12-1.11.0-SNAPSHOT.jar +``` + +## Download Spark + +### Spark 4.1.1 ```bash curl -L -o spark-4.1.1-bin-hadoop3.tgz \ @@ -20,8 +36,18 @@ curl -L -o spark-4.1.1-bin-hadoop3.tgz \ tar xzf spark-4.1.1-bin-hadoop3.tgz ``` +### Spark 3.5.8 + +```bash +curl -L -o spark-3.5.8-bin-hadoop3.tgz \ + https://archive.apache.org/dist/spark/spark-3.5.8/spark-3.5.8-bin-hadoop3.tgz +tar xzf spark-3.5.8-bin-hadoop3.tgz +``` + ## Start spark-sql +### Spark 4.1 + ```bash spark-4.1.1-bin-hadoop3/bin/spark-sql \ --jars /path/to/iceberg-spark-runtime-4.1_2.13-1.11.0-SNAPSHOT.jar \ @@ -30,6 +56,16 @@ spark-4.1.1-bin-hadoop3/bin/spark-sql \ --conf spark.sql.catalog.local.warehouse=file:///tmp/iceberg-warehouse ``` +### Spark 3.5 + +```bash +spark-3.5.8-bin-hadoop3/bin/spark-sql \ + --jars /path/to/iceberg-spark-runtime-3.5_2.12-1.11.0-SNAPSHOT.jar \ + --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.local.type=hadoop \ + --conf spark.sql.catalog.local.warehouse=file:///tmp/iceberg-warehouse +``` + ## Create a v4 table and query it ```sql @@ -55,6 +91,22 @@ SELECT * FROM parquet.`file:///tmp/iceberg-warehouse/default/test/metadata/*-roo SELECT * FROM parquet.`file:///tmp/iceberg-warehouse/default/test/metadata/*-m0.parquet`; ``` +## Run automated V4 tests + +### Spark 4.1 + +```bash +./gradlew :iceberg-spark:iceberg-spark-4.1_2.13:test \ + --tests "org.apache.iceberg.spark.source.TestV4ReadEndToEnd" +``` + +### Spark 3.5 + +```bash +./gradlew -DsparkVersions=3.5 :iceberg-spark:iceberg-spark-3.5_2.12:test \ + --tests "org.apache.iceberg.spark.source.TestV4ReadEndToEnd" +``` + ## What's implemented - V4 Adaptive Metadata Tree: root manifest (Parquet) replaces Avro manifest list diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java new file mode 100644 index 000000000000..57730ee11eb7 --- /dev/null +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestV4ReadEndToEnd.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.source; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.spark.TestBaseWithCatalog; +import org.apache.iceberg.util.JsonUtil; +import org.apache.iceberg.util.LocationUtil; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +/** + * End-to-end tests for v4 table reads using the Adaptive Metadata Tree format. + * + *

V4 manifests use TrackedFile schema in Parquet format. These tests verify that the full + * pipeline works: Spark INSERT -> v4 Parquet manifest write -> v4 manifest read -> Spark SELECT. + */ +@ExtendWith(ParameterizedTestExtension.class) +public class TestV4ReadEndToEnd extends TestBaseWithCatalog { + + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + protected static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + SparkCatalogConfig.HADOOP.properties() + } + }; + } + + @AfterEach + public void dropTable() { + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @TestTemplate + public void testV4DataQuery() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(3); + assertThat(rows.get(0)).isEqualTo(row(1L, "a")); + assertThat(rows.get(1)).isEqualTo(row(2L, "b")); + assertThat(rows.get(2)).isEqualTo(row(3L, "c")); + } + + @TestTemplate + public void testV4MetadataTableQuery() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + List files = sql("SELECT sum(record_count), count(*) FROM %s.files", tableName); + assertThat(files).hasSize(1); + assertThat(files.get(0)[0]).isEqualTo(3L); // total record count + assertThat((long) files.get(0)[1]).isGreaterThanOrEqualTo(1L); // at least 1 data file + } + + @TestTemplate + public void testV4RootManifestFormat() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + + // verify data is readable + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(3); + + // verify no snap-*.avro manifest list files exist (v4 uses root manifests in Parquet) + Table table = validationCatalog.loadTable(tableIdent); + Snapshot snapshot = table.currentSnapshot(); + assertThat(snapshot.manifestListLocation()).endsWith(".parquet"); + assertThat(snapshot.manifestListLocation()).doesNotContain("snap-"); + } + + @TestTemplate + public void testV4MultiSnapshot() { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c')", tableName); + sql("INSERT INTO %s VALUES (4, 'd')", tableName); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(4); + assertThat(rows.get(0)).isEqualTo(row(1L, "a")); + assertThat(rows.get(3)).isEqualTo(row(4L, "d")); + + List files = sql("SELECT sum(record_count) FROM %s.files", tableName); + assertThat(files).hasSize(1); + assertThat(files.get(0)[0]).isEqualTo(4L); + } + + @TestTemplate + public void testV4RelativePathsInMetadata() throws IOException { + sql( + "CREATE TABLE %s (id bigint, data string) USING iceberg " + + "TBLPROPERTIES ('format-version' = '4')", + tableName); + + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b')", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + TableMetadata metadata = ((HasTableOperations) table).operations().current(); + FileIO io = table.io(); + + // read the raw metadata JSON and verify manifest-list is a relative path + JsonNode metadataJson; + try (InputStream input = io.newInputFile(metadata.metadataFileLocation()).newStream()) { + metadataJson = JsonUtil.mapper().readTree(input); + } + + JsonNode snapshots = metadataJson.get("snapshots"); + assertThat(snapshots).isNotNull(); + assertThat(snapshots.size()).isGreaterThanOrEqualTo(1); + + for (JsonNode snap : snapshots) { + String manifestList = snap.get("manifest-list").asText(); + // the stored path should be relative (no URI scheme) + assertThat(LocationUtil.isAbsolute(manifestList)) + .as("manifest-list should be a relative path in v4 metadata: %s", manifestList) + .isFalse(); + assertThat(manifestList).startsWith("/"); + } + + // verify the resolved paths work (data is still readable) + Snapshot snapshot = table.currentSnapshot(); + assertThat(LocationUtil.isAbsolute(snapshot.manifestListLocation())).isTrue(); + + List rows = sql("SELECT * FROM %s ORDER BY id", tableName); + assertThat(rows).hasSize(2); + } +} From 925d3100d2a93b36791deadb77b8d15ac9d643c2 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 12 May 2026 22:31:08 -0700 Subject: [PATCH 22/22] fix: carry forward data file entries from flat root manifests When a v4 root manifest contains data/delete file entries directly (flat tree), SnapshotProducer now reads those entries from the parent snapshot's root manifest and writes them into the new root manifest alongside leaf manifest references. This ensures FastAppend and other operations preserve all data. --- .../org/apache/iceberg/SnapshotProducer.java | 61 +++- .../java/org/apache/iceberg/V4Metadata.java | 41 +++ .../iceberg/TestV4MetadataConversions.java | 322 ++++++++++++++++++ 3 files changed, 421 insertions(+), 3 deletions(-) create mode 100644 core/src/test/java/org/apache/iceberg/TestV4MetadataConversions.java diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 10449dbfdf81..7b9420cc0e7e 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -50,6 +50,7 @@ import java.util.function.Function; import org.apache.iceberg.encryption.EncryptedOutputFile; import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.events.CreateSnapshotEvent; import org.apache.iceberg.events.Listeners; import org.apache.iceberg.exceptions.CleanableFailure; @@ -296,7 +297,7 @@ public Snapshot apply() { .run(index -> manifestFiles[index] = manifestsWithMetadata.get(manifests.get(index))); if (base.formatVersion() >= TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS) { - return applyV4(manifestFiles, sequenceNumber, parentSnapshotId); + return applyV4(manifestFiles, sequenceNumber, parentSnapshotId, parentSnapshot); } else { return applyV3(manifestFiles, sequenceNumber, parentSnapshotId); } @@ -347,9 +348,22 @@ private Snapshot applyV3( } private Snapshot applyV4( - ManifestFile[] manifestFiles, long sequenceNumber, Long parentSnapshotId) { + ManifestFile[] manifestFiles, + long sequenceNumber, + Long parentSnapshotId, + Snapshot parentSnapshot) { + // Read data/delete file entries from the parent snapshot's root manifest (flat root). + // These entries must be carried forward into the new root manifest. + List parentDataEntries = readParentDataEntries(parentSnapshot); + OutputFile rootManifest = rootManifestPath(); - writeRootManifest(rootManifest, manifestFiles, snapshotId(), sequenceNumber, base.location()); + writeRootManifest( + rootManifest, + manifestFiles, + parentDataEntries, + snapshotId(), + sequenceNumber, + base.location()); manifestLists.add(rootManifest.location()); // compute nextRowId by summing added rows across all data manifests @@ -407,6 +421,7 @@ private void validateReplace() { private void writeRootManifest( OutputFile output, ManifestFile[] manifests, + List dataEntries, long commitSnapshotId, long commitSequenceNumber, String tableLocation) { @@ -424,11 +439,51 @@ private void writeRootManifest( V4Metadata.manifestFileToTrackedFile( manifest, commitSnapshotId, commitSequenceNumber, tableLocation)); } + + // Carry forward data/delete file entries from the parent's flat root manifest, + // re-projecting to the root manifest write schema. + for (TrackedFile entry : dataEntries) { + writer.add(V4Metadata.dataEntryForRootManifest(entry)); + } } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write root manifest file"); } } + /** + * Reads data and delete file entries (not manifest references) from the parent snapshot's root + * manifest. Returns an empty list if the parent has no root manifest or contains only manifest + * references. + */ + private List readParentDataEntries(Snapshot parentSnapshot) { + List dataEntries = Lists.newArrayList(); + if (parentSnapshot == null || parentSnapshot.manifestListLocation() == null) { + return dataEntries; + } + + FileFormat format = FileFormat.fromFileName(parentSnapshot.manifestListLocation()); + if (format != FileFormat.PARQUET) { + return dataEntries; + } + + V4ManifestReader reader = + new V4ManifestReader( + ops().io().newInputFile(parentSnapshot.manifestListLocation()), + ImmutableMap.of()); + try (CloseableIterable entries = reader.liveEntries()) { + for (TrackedFile tf : entries) { + if (tf.contentType() != FileContent.DATA_MANIFEST + && tf.contentType() != FileContent.DELETE_MANIFEST) { + dataEntries.add(tf.copy()); + } + } + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to read parent root manifest"); + } + + return dataEntries; + } + private void runValidations(Snapshot parentSnapshot) { validate(base, parentSnapshot); diff --git a/core/src/main/java/org/apache/iceberg/V4Metadata.java b/core/src/main/java/org/apache/iceberg/V4Metadata.java index 1e7aa6737218..b5c58740b165 100644 --- a/core/src/main/java/org/apache/iceberg/V4Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V4Metadata.java @@ -311,6 +311,47 @@ static TrackedFileStruct manifestFileToTrackedFile( return tf; } + /** + * Re-projects a data/delete file {@link TrackedFile} entry for writing into a root manifest. + * + *

Entries read from a flat root manifest may carry content_stats with a schema that differs + * from the root write schema. This method creates a clean {@link TrackedFileStruct} using the + * root manifest projection, copying only the standard fields. + */ + static TrackedFileStruct dataEntryForRootManifest(TrackedFile tf) { + // ROOT_MANIFEST_WRITE_TYPE uses entrySchema which excludes content_stats. + // Positions match entrySchema field order: + // 0=tracking, 1=content_type, 2=location, 3=file_format, 4=record_count, + // 5=file_size_in_bytes, 6=spec_id, 7=sort_order_id, 8=deletion_vector, + // 9=manifest_info, 10=key_metadata, 11=split_offsets, 12=equality_ids + TrackedFileStruct out = new TrackedFileStruct(ROOT_MANIFEST_WRITE_TYPE); + out.set(0, tf.tracking()); + out.set(1, tf.contentType().id()); + out.set(2, tf.location()); + out.set(3, tf.fileFormat().toString()); + out.set(4, tf.recordCount()); + out.set(5, tf.fileSizeInBytes()); + if (tf.specId() != null) { + out.set(6, tf.specId()); + } + if (tf.sortOrderId() != null) { + out.set(7, tf.sortOrderId()); + } + if (tf.deletionVector() != null) { + out.set(8, tf.deletionVector()); + } + if (tf.keyMetadata() != null) { + out.set(10, tf.keyMetadata()); + } + if (tf.splitOffsets() != null) { + out.set(11, tf.splitOffsets()); + } + if (tf.equalityIds() != null) { + out.set(12, tf.equalityIds()); + } + return out; + } + /** Converts a {@link TrackedFile} read from a root manifest back to a {@link ManifestFile}. */ static ManifestFile trackedFileToManifestFile(TrackedFile tf, String tableLocation) { ManifestInfo info = tf.manifestInfo(); diff --git a/core/src/test/java/org/apache/iceberg/TestV4MetadataConversions.java b/core/src/test/java/org/apache/iceberg/TestV4MetadataConversions.java new file mode 100644 index 000000000000..b89a7d1d446f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestV4MetadataConversions.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestV4MetadataConversions { + private static final Schema SCHEMA = + new Schema( + required(3, "id", Types.IntegerType.get()), required(4, "data", Types.StringType.get())); + + private static final PartitionSpec SPEC = + PartitionSpec.builderFor(SCHEMA).bucket("data", 16).build(); + + @Test + public void testManifestFileToTrackedFileRoundTrip() { + long snapshotId = 12345L; + long sequenceNumber = 10L; + + ManifestFile manifest = + new GenericManifestFile( + "/path/to/manifest.parquet", + 5878L, + 0, + ManifestContent.DATA, + sequenceNumber, + 5L, + snapshotId, + null, + null, + 3, + 30L, + 5, + 50L, + 1, + 10L, + null); + + TrackedFileStruct tf = + V4Metadata.manifestFileToTrackedFile(manifest, snapshotId, sequenceNumber, null); + + assertThat(tf.contentType()).isEqualTo(FileContent.DATA_MANIFEST); + assertThat(tf.location()).isEqualTo("/path/to/manifest.parquet"); + assertThat(tf.fileSizeInBytes()).isEqualTo(5878L); + assertThat(tf.specId()).isEqualTo(0); + assertThat(tf.recordCount()).isEqualTo(9L); // 3 + 5 + 1 + + Tracking tracking = tf.tracking(); + assertThat(tracking).isNotNull(); + assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); + assertThat(tracking.snapshotId()).isEqualTo(snapshotId); + assertThat(tracking.dataSequenceNumber()).isEqualTo(sequenceNumber); + + ManifestInfo info = tf.manifestInfo(); + assertThat(info).isNotNull(); + assertThat(info.addedFilesCount()).isEqualTo(3); + assertThat(info.addedRowsCount()).isEqualTo(30L); + assertThat(info.existingFilesCount()).isEqualTo(5); + assertThat(info.existingRowsCount()).isEqualTo(50L); + assertThat(info.deletedFilesCount()).isEqualTo(1); + assertThat(info.deletedRowsCount()).isEqualTo(10L); + assertThat(info.minSequenceNumber()).isEqualTo(5L); + + // round-trip back to ManifestFile + ManifestFile roundTripped = V4Metadata.trackedFileToManifestFile(tf, null); + + assertThat(roundTripped.path()).isEqualTo(manifest.path()); + assertThat(roundTripped.length()).isEqualTo(manifest.length()); + assertThat(roundTripped.partitionSpecId()).isEqualTo(manifest.partitionSpecId()); + assertThat(roundTripped.content()).isEqualTo(manifest.content()); + assertThat(roundTripped.sequenceNumber()).isEqualTo(manifest.sequenceNumber()); + assertThat(roundTripped.minSequenceNumber()).isEqualTo(manifest.minSequenceNumber()); + assertThat(roundTripped.snapshotId()).isEqualTo(manifest.snapshotId()); + assertThat(roundTripped.addedFilesCount()).isEqualTo(manifest.addedFilesCount()); + assertThat(roundTripped.addedRowsCount()).isEqualTo(manifest.addedRowsCount()); + assertThat(roundTripped.existingFilesCount()).isEqualTo(manifest.existingFilesCount()); + assertThat(roundTripped.existingRowsCount()).isEqualTo(manifest.existingRowsCount()); + assertThat(roundTripped.deletedFilesCount()).isEqualTo(manifest.deletedFilesCount()); + assertThat(roundTripped.deletedRowsCount()).isEqualTo(manifest.deletedRowsCount()); + } + + @Test + public void testManifestFileToTrackedFileDeleteManifest() { + long snapshotId = 99L; + long sequenceNumber = 7L; + + ManifestFile deleteManifest = + new GenericManifestFile( + "/path/to/delete-manifest.parquet", + 1024L, + 1, + ManifestContent.DELETES, + sequenceNumber, + 3L, + snapshotId, + null, + null, + 2, + 20L, + 0, + 0L, + 0, + 0L, + null); + + TrackedFileStruct tf = + V4Metadata.manifestFileToTrackedFile(deleteManifest, snapshotId, sequenceNumber, null); + + assertThat(tf.contentType()).isEqualTo(FileContent.DELETE_MANIFEST); + assertThat(tf.location()).isEqualTo("/path/to/delete-manifest.parquet"); + assertThat(tf.specId()).isEqualTo(1); + } + + @Test + public void testManifestFileToTrackedFileUnassignedSequenceNumber() { + long snapshotId = 42L; + long commitSeqNum = 15L; + + ManifestFile manifest = + new GenericManifestFile( + "/path/to/manifest.parquet", + 2048L, + 0, + ManifestContent.DATA, + ManifestWriter.UNASSIGNED_SEQ, + ManifestWriter.UNASSIGNED_SEQ, + snapshotId, + null, + null, + 1, + 10L, + 0, + 0L, + 0, + 0L, + null); + + TrackedFileStruct tf = + V4Metadata.manifestFileToTrackedFile(manifest, snapshotId, commitSeqNum, null); + + Tracking tracking = tf.tracking(); + assertThat(tracking.dataSequenceNumber()).isEqualTo(commitSeqNum); + + ManifestInfo info = tf.manifestInfo(); + assertThat(info.minSequenceNumber()).isEqualTo(commitSeqNum); + } + + @Test + public void testManifestFileToTrackedFileWithKeyMetadata() { + long snapshotId = 1L; + long sequenceNumber = 1L; + ByteBuffer keyMetadata = ByteBuffer.wrap(new byte[] {1, 2, 3, 4}); + + ManifestFile manifest = + new GenericManifestFile( + "/path/to/manifest.parquet", + 1024L, + 0, + ManifestContent.DATA, + sequenceNumber, + sequenceNumber, + snapshotId, + null, + keyMetadata, + 1, + 10L, + 0, + 0L, + 0, + 0L, + null); + + TrackedFileStruct tf = + V4Metadata.manifestFileToTrackedFile(manifest, snapshotId, sequenceNumber, null); + + assertThat(tf.keyMetadata()).isNotNull(); + } + + @Test + public void testEntryToTrackedFileDataFile() { + DataFile dataFile = + DataFiles.builder(SPEC) + .withPath("/path/to/data.parquet") + .withFileSizeInBytes(350) + .withPartitionPath("data_bucket=0") + .withRecordCount(10) + .withSplitOffsets(ImmutableList.of(4L)) + .build(); + + Schema entrySchema = V4Metadata.entrySchema(SPEC.partitionType()); + GenericManifestEntry entry = + new GenericManifestEntry<>(AvroSchemaUtil.convert(entrySchema, "manifest_entry")); + entry.wrapAppend(100L, 5L, dataFile); + + TrackedFileStruct tf = V4Metadata.entryToTrackedFile(entry, 100L, null); + + assertThat(tf.contentType()).isEqualTo(FileContent.DATA); + assertThat(tf.location()).isEqualTo("/path/to/data.parquet"); + assertThat(tf.fileSizeInBytes()).isEqualTo(350L); + assertThat(tf.recordCount()).isEqualTo(10L); + assertThat(tf.specId()).isEqualTo(SPEC.specId()); + assertThat(tf.splitOffsets()).isEqualTo(ImmutableList.of(4L)); + + Tracking tracking = tf.tracking(); + assertThat(tracking).isNotNull(); + assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); + assertThat(tracking.snapshotId()).isEqualTo(100L); + assertThat(tracking.dataSequenceNumber()).isEqualTo(5L); + } + + @Test + public void testEntryToTrackedFileAddedWithNullSequenceNumber() { + DataFile dataFile = + DataFiles.builder(SPEC) + .withPath("/path/to/data.parquet") + .withFileSizeInBytes(100) + .withPartitionPath("data_bucket=0") + .withRecordCount(5) + .build(); + + Schema entrySchema = V4Metadata.entrySchema(SPEC.partitionType()); + GenericManifestEntry entry = + new GenericManifestEntry<>(AvroSchemaUtil.convert(entrySchema, "manifest_entry")); + entry.wrapAppend(200L, dataFile); + + TrackedFileStruct tf = V4Metadata.entryToTrackedFile(entry, 200L, null); + + Tracking tracking = tf.tracking(); + assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); + assertThat(tracking.snapshotId()).isEqualTo(200L); + assertThat(tracking.dataSequenceNumber()).isNull(); + } + + @Test + public void testEntryToTrackedFileEqualityDeleteFile() { + DeleteFile deleteFile = + FileMetadata.deleteFileBuilder(SPEC) + .ofEqualityDeletes(3) + .withPath("/path/to/eq-deletes.parquet") + .withFileSizeInBytes(200) + .withPartitionPath("data_bucket=0") + .withRecordCount(3) + .build(); + + Schema entrySchema = V4Metadata.entrySchema(SPEC.partitionType()); + GenericManifestEntry entry = + new GenericManifestEntry<>(AvroSchemaUtil.convert(entrySchema, "manifest_entry")); + entry.wrapAppend(300L, 8L, deleteFile); + + TrackedFileStruct tf = V4Metadata.entryToTrackedFile(entry, 300L, null); + + assertThat(tf.contentType()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(tf.location()).isEqualTo("/path/to/eq-deletes.parquet"); + assertThat(tf.equalityIds()).isEqualTo(ImmutableList.of(3)); + } + + @Test + public void testEntryToTrackedFileExistingEntry() { + DataFile dataFile = + DataFiles.builder(SPEC) + .withPath("/path/to/data.parquet") + .withFileSizeInBytes(100) + .withPartitionPath("data_bucket=0") + .withRecordCount(5) + .build(); + + Schema entrySchema = V4Metadata.entrySchema(SPEC.partitionType()); + GenericManifestEntry entry = + new GenericManifestEntry<>(AvroSchemaUtil.convert(entrySchema, "manifest_entry")); + entry.wrapExisting(400L, 12L, 12L, dataFile); + + TrackedFileStruct tf = V4Metadata.entryToTrackedFile(entry, 400L, null); + + Tracking tracking = tf.tracking(); + assertThat(tracking.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(tracking.snapshotId()).isEqualTo(400L); + assertThat(tracking.dataSequenceNumber()).isEqualTo(12L); + assertThat(tracking.fileSequenceNumber()).isEqualTo(12L); + } + + @Test + public void testEntrySchemaHasTrackedFileFields() { + Schema schema = V4Metadata.entrySchema(Types.StructType.of()); + + assertThat(schema.findField(TrackedFile.TRACKING.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.CONTENT_TYPE.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.LOCATION.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.FILE_FORMAT.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.RECORD_COUNT.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.FILE_SIZE_IN_BYTES.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.SPEC_ID.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.SORT_ORDER_ID.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.DELETION_VECTOR.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.MANIFEST_INFO.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.KEY_METADATA.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.SPLIT_OFFSETS.fieldId())).isNotNull(); + assertThat(schema.findField(TrackedFile.EQUALITY_IDS.fieldId())).isNotNull(); + } +}