Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions src/iceberg/test/expire_snapshots_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,11 @@ TEST_F(ExpireSnapshotsCleanupTest, IgnoresExpiredDeleteManifestReadFailures) {

std::vector<std::string> deleted_files;
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
// Force ReachableFileCleanup: the empty current manifest list makes this an
// unreachable-orphan scenario, which Incremental cannot detect (added-in-ancestor
// files are preserved unless an explicit DELETED entry exists). Specifying the
// snapshot id forces the dispatch to Reachable.
update->ExpireSnapshotId(kExpiredSnapshotId);
update->DeleteWith(
[&deleted_files](const std::string& path) { deleted_files.push_back(path); });

Expand Down Expand Up @@ -388,6 +393,8 @@ TEST_F(ExpireSnapshotsCleanupTest, DeletesExpiredFiles) {

std::vector<std::string> deleted_files;
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
// See note above; same scenario.
update->ExpireSnapshotId(kExpiredSnapshotId);
update->DeleteWith(
[&deleted_files](const std::string& path) { deleted_files.push_back(path); });

Expand Down Expand Up @@ -573,4 +580,75 @@ TEST_F(ExpireSnapshotsCleanupTest, KeepsReusedPartitionStats) {
EXPECT_THAT(deleted_files, testing::Not(testing::Contains(reused_statistics_path)));
}

// Linear-ancestry, no specified ID: dispatch must pick IncrementalFileCleanup.
// Since the expired snapshot is an ancestor and the expired manifest only
// contains ADDED entries (no DELETED), incremental correctly preserves the
// data file (matches Java IncrementalFileCleanup semantics) and only removes
// the manifest, manifest list, and (if any) delete manifest.
TEST_F(ExpireSnapshotsCleanupTest, IncrementalDispatchPreservesAncestorAddedFiles) {
const auto expired_data_file_path = table_location_ + "/data/expired-data.parquet";
const auto expired_data_manifest_path = table_location_ + "/metadata/expired-data.avro";
const auto expired_manifest_list_path =
table_location_ + "/metadata/expired-manifest-list.avro";
const auto current_manifest_list_path =
table_location_ + "/metadata/current-manifest-list.avro";

auto expired_data_manifest = WriteDataManifest(
expired_data_manifest_path, kExpiredSnapshotId,
{MakeEntry(ManifestStatus::kAdded, kExpiredSnapshotId, kExpiredSequenceNumber,
MakeDataFile(expired_data_file_path))});
WriteManifestList(expired_manifest_list_path, kExpiredSnapshotId,
/*parent_snapshot_id=*/0, kExpiredSequenceNumber,
{expired_data_manifest});
WriteManifestList(current_manifest_list_path, kCurrentSnapshotId, kExpiredSnapshotId,
kCurrentSequenceNumber, {});
RewriteTableWithManifestLists(expired_manifest_list_path, current_manifest_list_path);

std::vector<std::string> deleted_files;
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
// No ExpireSnapshotId -> incremental dispatch.
update->DeleteWith(
[&deleted_files](const std::string& path) { deleted_files.push_back(path); });

EXPECT_THAT(update->Commit(), IsOk());
// Manifest + manifest list deleted, data file preserved.
EXPECT_THAT(deleted_files, testing::Contains(expired_data_manifest_path));
EXPECT_THAT(deleted_files, testing::Contains(expired_manifest_list_path));
EXPECT_THAT(deleted_files, testing::Not(testing::Contains(expired_data_file_path)));
}

// Same fixture, but force the Reachable path with ExpireSnapshotId. Reachable
// detects the data file as unreachable from the (empty) current state and
// deletes it -- demonstrating the dispatch actually selects different code paths.
TEST_F(ExpireSnapshotsCleanupTest, ReachableDispatchDeletesUnreachableData) {
const auto expired_data_file_path = table_location_ + "/data/expired-data.parquet";
const auto expired_data_manifest_path = table_location_ + "/metadata/expired-data.avro";
const auto expired_manifest_list_path =
table_location_ + "/metadata/expired-manifest-list.avro";
const auto current_manifest_list_path =
table_location_ + "/metadata/current-manifest-list.avro";

auto expired_data_manifest = WriteDataManifest(
expired_data_manifest_path, kExpiredSnapshotId,
{MakeEntry(ManifestStatus::kAdded, kExpiredSnapshotId, kExpiredSequenceNumber,
MakeDataFile(expired_data_file_path))});
WriteManifestList(expired_manifest_list_path, kExpiredSnapshotId,
/*parent_snapshot_id=*/0, kExpiredSequenceNumber,
{expired_data_manifest});
WriteManifestList(current_manifest_list_path, kCurrentSnapshotId, kExpiredSnapshotId,
kCurrentSequenceNumber, {});
RewriteTableWithManifestLists(expired_manifest_list_path, current_manifest_list_path);

std::vector<std::string> deleted_files;
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
update->ExpireSnapshotId(kExpiredSnapshotId);
update->DeleteWith(
[&deleted_files](const std::string& path) { deleted_files.push_back(path); });

EXPECT_THAT(update->Commit(), IsOk());
EXPECT_THAT(deleted_files, testing::UnorderedElementsAre(expired_data_file_path,
expired_data_manifest_path,
expired_manifest_list_path));
}

} // namespace iceberg
Loading
Loading