-
Notifications
You must be signed in to change notification settings - Fork 382
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Apache Iceberg Rust version
None
Describe the bug
The spec requires that manifests written in format version 2+ must include the content field in the Avro file key-value metadata.
Currently the writer only writes the content metadata for V2 manifests. V3 manifests are missing this required field. This causes a roundtrip problem where V3 delete manifests written by iceberg-rust are read back as data manifests.
Probably the fix is just updating the condition that writes the content metadata field to include V3.
To Reproduce
Add the following test at crates/iceberg/src/spec/manifest/mod.rs
#[tokio::test]
async fn test_v3_delete_manifest_delte_file_roundtrip() {
let schema = Arc::new(
Schema::builder()
.with_fields(vec![
Arc::new(NestedField::optional(
1,
"id",
Type::Primitive(PrimitiveType::Long),
)),
Arc::new(NestedField::optional(
2,
"data",
Type::Primitive(PrimitiveType::String),
)),
])
.build()
.unwrap(),
);
let partition_spec = PartitionSpec::builder(schema.clone())
.with_spec_id(0)
.build()
.unwrap();
// Create a position delete file entry
let delete_entry = ManifestEntry {
status: ManifestStatus::Added,
snapshot_id: None,
sequence_number: None,
file_sequence_number: None,
data_file: DataFile {
content: DataContentType::PositionDeletes,
file_path: "s3://bucket/table/data/delete-00000.parquet".to_string(),
file_format: DataFileFormat::Parquet,
partition: Struct::empty(),
record_count: 10,
file_size_in_bytes: 1024,
column_sizes: HashMap::new(),
value_counts: HashMap::new(),
null_value_counts: HashMap::new(),
nan_value_counts: HashMap::new(),
lower_bounds: HashMap::new(),
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: None,
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
referenced_data_file: None,
content_offset: None,
content_size_in_bytes: None,
},
};
// Write a V3 delete manifest
let tmp_dir = TempDir::new().unwrap();
let path = tmp_dir.path().join("v3_delete_manifest.avro");
let io = FileIOBuilder::new_fs_io().build().unwrap();
let output_file = io.new_output(path.to_str().unwrap()).unwrap();
let mut writer = ManifestWriterBuilder::new(
output_file,
Some(1),
None,
schema.clone(),
partition_spec.clone(),
)
.build_v3_deletes();
writer.add_entry(delete_entry).unwrap();
let manifest_file = writer.write_manifest_file().await.unwrap();
// The returned ManifestFile correctly reports Deletes content
assert_eq!(manifest_file.content, ManifestContentType::Deletes);
// Read back the manifest file
let actual_manifest =
Manifest::parse_avro(fs::read(&path).expect("read_file must succeed").as_slice())
.unwrap();
// The content type reads as Data due to the bug.
assert_eq!(
actual_manifest.metadata().content,
ManifestContentType::Data,
);
// Expected:
// assert_eq!(
// actual_manifest.metadata().content,
// ManifestContentType::Deletes,
// );
}Expected behavior
No response
Willingness to contribute
None
CTTYCTTY
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working