Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/src/main/java/org/apache/iceberg/ManifestFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ public static ManifestReader<DataFile> read(ManifestFile manifest, FileIO io) {
*/
public static ManifestReader<DataFile> read(
ManifestFile manifest, FileIO io, Map<Integer, PartitionSpec> specsById) {
return read(manifest, io, specsById, true);
}

static ManifestReader<DataFile> read(
ManifestFile manifest,
FileIO io,
Map<Integer, PartitionSpec> specsById,
boolean isCommitted) {
Preconditions.checkArgument(
manifest.content() == ManifestContent.DATA,
"Cannot read a delete manifest with a ManifestReader: %s",
Expand All @@ -163,6 +171,7 @@ public static ManifestReader<DataFile> read(
specsById,
inheritableMetadata,
manifest.firstRowId(),
isCommitted,
FileType.DATA_FILES);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ abstract class ManifestMergeManager<F extends ContentFile<F>> {

protected abstract ManifestReader<F> newManifestReader(ManifestFile manifest);

protected ManifestReader<F> newManifestReader(ManifestFile manifest, boolean isCommitted) {
return newManifestReader(manifest);
}

Iterable<ManifestFile> mergeManifests(Iterable<ManifestFile> manifests) {
Iterator<ManifestFile> manifestIter = manifests.iterator();
if (!mergeEnabled || !manifestIter.hasNext()) {
Expand Down Expand Up @@ -192,7 +196,9 @@ private ManifestFile createManifest(int specId, List<ManifestFile> bin) {
boolean threw = true;
try {
for (ManifestFile manifest : bin) {
try (ManifestReader<F> reader = newManifestReader(manifest)) {
boolean isCommitted =
manifest.snapshotId() != null && snapshotId() != manifest.snapshotId();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks reasonable to me.

try (ManifestReader<F> reader = newManifestReader(manifest, isCommitted)) {
for (ManifestEntry<F> entry : reader.entries()) {
if (entry.status() == Status.DELETED) {
// suppress deletes from previous snapshots. only files deleted by this snapshot
Expand Down
24 changes: 21 additions & 3 deletions core/src/main/java/org/apache/iceberg/ManifestReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ private Class<? extends StructLike> fileClass() {
private final InputFile file;
private final InheritableMetadata inheritableMetadata;
private final Long firstRowId;
private final boolean isCommitted;
private final FileType content;
private final PartitionSpec spec;
private final Schema fileSchema;
Expand Down Expand Up @@ -125,12 +126,24 @@ protected ManifestReader(
InheritableMetadata inheritableMetadata,
Long firstRowId,
FileType content) {
this(file, specId, specsById, inheritableMetadata, firstRowId, true, content);
}

protected ManifestReader(
InputFile file,
int specId,
Map<Integer, PartitionSpec> specsById,
InheritableMetadata inheritableMetadata,
Long firstRowId,
boolean isCommitted,
FileType content) {
Preconditions.checkArgument(
firstRowId == null || content == FileType.DATA_FILES,
"First row ID is not valid for delete manifests");
this.file = file;
this.inheritableMetadata = inheritableMetadata;
this.firstRowId = firstRowId;
this.isCommitted = isCommitted;
this.content = content;

if (specsById != null) {
Expand Down Expand Up @@ -308,7 +321,7 @@ private CloseableIterable<ManifestEntry<F>> open(Schema projection) {

CloseableIterable<ManifestEntry<F>> withMetadata =
CloseableIterable.transform(reader, inheritableMetadata::apply);
return CloseableIterable.transform(withMetadata, idAssigner(firstRowId));
return CloseableIterable.transform(withMetadata, idAssigner(firstRowId, isCommitted));
}

CloseableIterable<ManifestEntry<F>> liveEntries() {
Expand Down Expand Up @@ -398,7 +411,7 @@ static List<String> withStatsColumns(Collection<String> columns) {
}

private static <F extends ContentFile<F>> Function<ManifestEntry<F>, ManifestEntry<F>> idAssigner(
Long firstRowId) {
Long firstRowId, boolean isCommitted) {
if (firstRowId != null) {
return new Function<>() {
private long nextRowId = firstRowId;
Expand All @@ -416,8 +429,13 @@ public ManifestEntry<F> apply(ManifestEntry<F> entry) {
return entry;
}
};
} else if (!isCommitted) {
// Preserve firstRowId for entries in uncommitted manifests, including EXISTING entries that
// may be merged later
return Function.identity();
Copy link
Copy Markdown
Contributor

@dramaticlly dramaticlly May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, maybe we can also add a direct UT in TestManifestReader to cover idAssigner on both if/else branch?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about this change.

It looks like this case covers when the manifest's first_row_id is null. That should only happen when reading a snapshot from an older version without row IDs. In that case, the snapshot's first-row-id is null and we don't assign first_row_id to manifests or to data files. This is covered by Row Lineage for Upgraded Tables in the spec.

If first-row-id is assigned, then every manifest should have a first_row_id assigned. Then this branch shouldn't be triggering.

Is it possible that the problem is further up and a manifest is somehow missing a first_row_id?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, from going through the repro test, I think this is a legitimate case because manifest compaction is using a ManifestFile to read that doesn't have an assigned first_row_id. That means the assumption here is violated because we can have manifests that don't have an assigned first_row_id and we should still read and pass through the first_row_id from files.

I think the fix needs to distinguish between these cases. We need to have an idAssigner for committed manifests (this one) and an idAssigner used for uncommitted manifests that preserves the first_row_id like this does. We can't commit this fix because it breaks the defensive assignment we added for the v2/v1 snapshot case.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on two assigners.

I agree we should preserve the defensive mode for the committed-manifest path, but I don't think it should be a silent null assignment. If the invariant is "a committed v3 manifest with firstRowId != null shouldn't contain an entry with a non-null first_row_id that wasn't already there," we should encode that as a precondition / checkState and fail loudly. Silent re-assignment masks exactly the class of bug we're fixing here. If there's a real case where re-assigning is necessary, that seems like it should be opt-in behind a config flag rather than the default.

Copy link
Copy Markdown
Contributor Author

@amogh-jahagirdar amogh-jahagirdar May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I think I get the rationale for the defensive null assignment now. Preserving the entry made the upgrade test cases work because those will anyways have null first row IDs but this is to be defensive against someone producing older manifests with a first row ID somehow?

In either case, I've updated to pass in a committed flag to a pakcage private ManifestFiles.read API , and if it's true and firstRowId is null we do the null assignment, and if not, we preserve the entry as it is.

@RussellSpitzer I think you're saying we should add a Preconditions check for the committed case that the firstRowId is indeed null. I can get behind that, that seemed to be what @rdblue was implying in our offline conversation, if that's our expectation we should have a precondition check.

Copy link
Copy Markdown
Contributor Author

@amogh-jahagirdar amogh-jahagirdar May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since I think we have a reliable mechanism to distinguish commited vs uncommited manifests , I left out Preconditions checks for now just to keep things simple. Let me know if people feel strongly about this. I think whatever expectations we know we have on the entry at different points probably should have a Precondition check but at the same time I don't want to be over specific on our checks and cause needless failures when reading the manifests.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do, but I think I'm currently overruled by the majority. I don't like it when we change objects to "fix" them

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do, but I think I'm currently overruled by the majority. I don't like it when we change objects to "fix" them

I think this is reasonable. I'm okay with a precondition if that's what others think we should do. My argument for the opposite is that usually when we can read correctly, we should. We know that in the case where there is no manifest first_row_id that this should be null. Is it worth failing a read?

It might be. Russell is correct that it signals a problem somewhere. It probably doesn't matter much either way, since this should be rare.

} else {
// data file's first_row_id is null when the manifest's first_row_id is null
Copy link
Copy Markdown
Contributor Author

@amogh-jahagirdar amogh-jahagirdar May 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still checking if changing idAssigner like this for all cases is really the right way to fix this or if during manifest merging we should actually pass in our own id assignment transformer that preserves the existing case.

Here we're changing the manifest reader because

// data file's first_row_id is null when the manifest's first_row_id is null

isn't true in the context of reading merged manifests that have EXISTING entries which need to have first row IDs preserved but the merged manifest does not have a first row ID.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The global change reads correctly. The else branch only runs when manifest.firstRowId is null, and in every such case the on-disk file.firstRowId is what should be preserved:

  • pre-v3 manifests: file.firstRowId is null in the data, identity preserves null → unchanged behavior
  • v3 EXISTING entries in a transient filtered/merged manifest: identity preserves the original assignment → the fix
  • v3 ADDED entries in a freshly-written manifest: file.firstRowId is null in the data (assigned at read time once the manifest gets a firstRowId), identity preserves null → unchanged behavior

I am also wondering about the reason for the previous behavior change. Maybe it is a defensive check covering: ManifestFiles.copyAppendManifest, the user-facing appendManifest() flow. A user can construct a ManifestFile externally with a non-null file-level first_row_id set on an ADDED entry — nothing in the public API stops that.

Under the previous reader-side clobber, the value was stripped before the writer saw it. Under Function.identity(), the value reaches the writer — but writer.add(entry)wrapAppendDelegates.suppressFirstRowId, so the wrapper's firstRowId() returns null and the bytes written for the ADDED entry have null on disk. Identical on-disk result.

The reader-side clobber was redundant on this path; the writer-side suppression in MergingSnapshotProducer.add and GenericManifestEntry.wrapAppend carries the "no override from client" enforcement. The global change preserves that property.

Copy link
Copy Markdown
Contributor Author

@amogh-jahagirdar amogh-jahagirdar May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah @stevenzwu that's my analysis of all the call points as well. Fundamentally, I would expect the idTransformer that's set by default on the ManifestReader should do no transformation of the first row ID on the entry. That implicitly covers the 3 cases you mentioned.

And for the coppyAppendManifest case, if we really want to be defensive, I think the right solution is that the public writer APIs Iceberg exposes (that produce the manifests) should suppress the firstRowId assignment on the ADDED entry. The read side should just consume what's there in my opinion.

// committed manifest with null manifest-level firstRowId (pre-v3 upgrade path)
// defensively set the first row ID for every entry to be null
return entry -> {
if (entry.file() instanceof BaseFile) {
((BaseFile<?>) entry.file()).setFirstRowId(null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,13 @@ protected ManifestWriter<DataFile> newManifestWriter(PartitionSpec manifestSpec)

@Override
protected ManifestReader<DataFile> newManifestReader(ManifestFile manifest) {
return MergingSnapshotProducer.this.newManifestReader(manifest);
return newManifestReader(manifest, true);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Preserves the existing behavior for newManifestReader. While the "isCommitted" is only passed through for the Data file manifest merging case, it's a bit more involved to just put it on that implementation rather than the base class. I think this is the cleanest way (and besides this is all private/package private classes).

Copy link
Copy Markdown
Contributor

@rdblue rdblue May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with this default, but we should probably start requiring isCommitted to be passed. Can we deprecate the old one and start using this exclusively?

I'm fine with this as a follow-up since we want to unblock the release.

}

@Override
protected ManifestReader<DataFile> newManifestReader(
ManifestFile manifest, boolean isCommitted) {
return ManifestFiles.read(manifest, ops().io(), ops().current().specsById(), isCommitted);
}
}

Expand Down
36 changes: 36 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestManifestReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,42 @@ public void testDVs() throws IOException {
}
}

@TestTemplate
public void testReadCommitedManifestNullifiesEntryRowId() throws IOException {
long firstRowId = 42L;
DataFile fileWithRowId =
DataFiles.builder(SPEC).copy(FILE_A).withFirstRowId(firstRowId).build();
// the manifest has no manifest-level first_row_id (the v3+ entry still carries one)
ManifestFile manifest =
writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, fileWithRowId));
assertThat(manifest.firstRowId()).isNull();

try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO, table.specs())) {
assertThat(Iterables.getOnlyElement(reader).firstRowId()).isNull();
}
}

@TestTemplate
public void testReadUncommittedManifestPreservesEntryRowId() throws IOException {
assumeThat(formatVersion)
.as("first_row_id is only written in v3+ manifests")
.isGreaterThanOrEqualTo(3);

long firstRowId = 42L;
DataFile fileWithRowId =
DataFiles.builder(SPEC).copy(FILE_A).withFirstRowId(firstRowId).build();
// the manifest has no manifest-level first_row_id, but the entry has one
ManifestFile manifest =
writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, fileWithRowId));
assertThat(manifest.firstRowId()).isNull();
assertThat(fileWithRowId.firstRowId()).isEqualTo(firstRowId);

try (ManifestReader<DataFile> reader =
ManifestFiles.read(manifest, FILE_IO, table.specs(), false /* isCommitted */)) {
assertThat(Iterables.getOnlyElement(reader).firstRowId()).isEqualTo(firstRowId);
}
}

@TestTemplate
public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException {
DataFile invalidOffset =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,33 @@ public void testUpgradeAssignmentWithManifestCompaction(@TempDir File altLocatio
FILE_C.recordCount() + FILE_B.recordCount());
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While we are at it we should add in a test for the "set row_id" to null assignment we do. The fact that we could change the behavior (in your previous commit) and not fail anything in the test suite is an issue. Especially if we think that is an important behavior.

Copy link
Copy Markdown
Contributor Author

@amogh-jahagirdar amogh-jahagirdar May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So there's a nuance to "change the behavior" which is why all the tests in the previous commit passed. Even with the previous commit, we ended up correctly setting row ID to null because the entries in older manifest wouldn't even have a first row ID to begin with, so the behavior was preserved for the main case we care about for older manifests, and there's quite a few tests there for the upgrade path.

The case we're talking about though is a case where we (somehow?) produce a pre-v3 manifest with first row IDs and we should ignore those by defensively setting those to null; in that specific case, there was a behavior change because we'd just keep those row IDs. I'll see about adding a test but I'm not sure if we really even expose the mechanisms to produce a V2 manifest with first row IDs.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but, if we think it's important to handle that use case we should be testing it imho

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok so I've added 2 tests to ManifestReader; 1 for testing the uncommitted read path which preserves firstRowId (same path already being tested in the TestRowLinegeAssignment) case, and 1 for testing the committed manifest read path where we expect to null out the entry's first row ID. Since we're now distinguishing between uncommitted and committed, we have a good abstraction boundary to test.

The committed test is something that would detect if the behavior to defensively null out entries changed (verified by commenting out the most recent changes we made, and going back to what we had before).


@Test
public void testRewritePreservesExistingFileFirstRowIds() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understand the logic a little bit: seems during merge or filter manifest, we may have null first_row_id at manifest while the existing file already has row_id which should be kept.

Tried for filtering case and the fix works. Maybe we can add the test coverage for filtering as well.

// apply a row filter so the read happens through the "filtered manifest" path
reader.filterRows(Expressions.greaterThan("id", 0));

table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

// FILE_A gets firstRowId=0, FILE_B gets firstRowId=FILE_A.recordCount()=125; assert before
// rewrite
ManifestFile preRewriteManifest =
Iterables.getOnlyElement(table.currentSnapshot().dataManifests(table.io()));
checkDataFileAssignment(table, preRewriteManifest, 0L, FILE_A.recordCount());

// set low to trigger an internal manifest merge during the rewrite
table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit();
// FILE_A and FILE_C must be removed and added in the same operation. Removing FILE_A creates
// an uncommitted manifest containing FILE_B as an existing entry. Adding FILE_C triggers the
// internal manifest merge to read that uncommitted manifest before its firstRowId is assigned.
table.newRewrite().deleteFile(FILE_A).addFile(FILE_C).commit();
Comment thread
amogh-jahagirdar marked this conversation as resolved.

// merged manifest live files: [FILE_C (added), FILE_B (existing)]
ManifestFile manifest =
Iterables.getOnlyElement(table.currentSnapshot().dataManifests(table.io()));
checkDataFileAssignment(
table,
manifest,
FILE_A.recordCount() + FILE_B.recordCount(), // FILE_C gets 225
FILE_A.recordCount()); // FILE_B must retain its original firstRowId (125)
}

private static ManifestContent content(int ordinal) {
return ManifestContent.values()[ordinal];
}
Expand Down