From 9f4651d08480919c301818d00eb9ec10dd2a9bf7 Mon Sep 17 00:00:00 2001 From: piyushh Date: Thu, 4 Sep 2025 15:25:45 +0530 Subject: [PATCH] refactor(test): remove MockLocationGenerator and its usages (#1645) --- .../writer/base_writer/data_file_writer.rs | 15 +++-- .../base_writer/equality_delete_writer.rs | 15 +++-- .../writer/file_writer/location_generator.rs | 43 +++++---------- .../src/writer/file_writer/parquet_writer.rs | 55 +++++++++++-------- .../src/writer/file_writer/rolling_writer.rs | 15 +++-- 5 files changed, 73 insertions(+), 70 deletions(-) diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index ff35b9ae4..f215673df 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -120,16 +120,18 @@ mod test { }; use crate::writer::base_writer::data_file_writer::DataFileWriterBuilder; use crate::writer::file_writer::ParquetWriterBuilder; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; + use crate::writer::file_writer::location_generator::{ + DefaultFileNameGenerator, DefaultLocationGenerator, + }; use crate::writer::{IcebergWriter, IcebergWriterBuilder, RecordBatch}; #[tokio::test] async fn test_parquet_writer() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -201,8 +203,9 @@ mod test { async fn test_parquet_writer_with_partition() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new( "test_partitioned".to_string(), None, diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 058b74378..f710544d9 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -198,8 +198,9 @@ mod test { EqualityDeleteFileWriterBuilder, EqualityDeleteWriterConfig, }; use crate::writer::file_writer::ParquetWriterBuilder; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; + use crate::writer::file_writer::location_generator::{ + DefaultFileNameGenerator, DefaultLocationGenerator, + }; use crate::writer::{IcebergWriter, IcebergWriterBuilder}; async fn check_parquet_data_file_with_equality_delete_write( @@ -282,8 +283,9 @@ mod test { async fn test_equality_delete_writer() -> Result<(), anyhow::Error> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -518,8 +520,9 @@ mod test { async fn test_equality_delete_with_primitive_type() -> Result<(), anyhow::Error> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index e0692d130..4a73fc148 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -67,6 +67,15 @@ impl DefaultLocationGenerator { }; Ok(Self { data_location }) } + + /// Create a new `DefaultLocationGenerator` with a specified data location. + /// + /// # Arguments + /// + /// * `data_location` - The data location to use for generating file locations. + pub fn with_data_location(data_location: String) -> Self { + Self { data_location } + } } impl LocationGenerator for DefaultLocationGenerator { @@ -144,35 +153,10 @@ pub(crate) mod test { Struct, StructType, TableMetadata, Transform, Type, }; use crate::writer::file_writer::location_generator::{ - FileNameGenerator, WRITE_DATA_LOCATION, WRITE_FOLDER_STORAGE_LOCATION, + DefaultLocationGenerator, FileNameGenerator, WRITE_DATA_LOCATION, + WRITE_FOLDER_STORAGE_LOCATION, }; - #[derive(Clone)] - pub(crate) struct MockLocationGenerator { - root: String, - } - - impl MockLocationGenerator { - pub(crate) fn new(root: String) -> Self { - Self { root } - } - } - - impl LocationGenerator for MockLocationGenerator { - fn generate_location(&self, partition: Option<&PartitionKey>, file_name: &str) -> String { - if PartitionKey::is_effectively_none(partition) { - format!("{}/{}", self.root, file_name) - } else { - format!( - "{}/{}/{}", - self.root, - partition.unwrap().to_path(), - file_name - ) - } - } - } - #[test] fn test_default_location_generate() { let mut table_metadata = TableMetadata { @@ -283,10 +267,9 @@ pub(crate) mod test { // Create a partition key let partition_key = PartitionKey::new(partition_spec, schema, partition_data); - // Test with MockLocationGenerator - let mock_location_gen = MockLocationGenerator::new("/base/path".to_string()); + let location_gen = DefaultLocationGenerator::with_data_location("/base/path".to_string()); let file_name = "data-00000.parquet"; - let location = mock_location_gen.generate_location(Some(&partition_key), file_name); + let location = location_gen.generate_location(Some(&partition_key), file_name); assert_eq!(location, "/base/path/id=42/name=alice/data-00000.parquet"); // Create a table metadata for DefaultLocationGenerator diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 90c0d28c0..0a8a095ea 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -702,8 +702,9 @@ mod tests { use crate::arrow::schema_to_arrow_schema; use crate::io::FileIOBuilder; use crate::spec::{PrimitiveLiteral, Struct, *}; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; + use crate::writer::file_writer::location_generator::{ + DefaultFileNameGenerator, DefaultLocationGenerator, + }; use crate::writer::tests::check_parquet_data_file; fn schema_for_all_type() -> Schema { @@ -862,8 +863,9 @@ mod tests { async fn test_parquet_writer() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -933,8 +935,9 @@ mod tests { async fn test_parquet_writer_with_complex_schema() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1158,8 +1161,9 @@ mod tests { async fn test_all_type_for_write() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let loccation_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let loccation_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1401,8 +1405,9 @@ mod tests { async fn test_decimal_bound() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let loccation_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let loccation_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1654,8 +1659,9 @@ mod tests { async fn test_empty_write() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1709,8 +1715,9 @@ mod tests { async fn test_nan_val_cnts_primitive_type() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1797,8 +1804,9 @@ mod tests { async fn test_nan_val_cnts_struct_type() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -1937,8 +1945,9 @@ mod tests { async fn test_nan_val_cnts_list_type() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -2139,8 +2148,9 @@ mod tests { async fn test_nan_val_cnts_map_type() -> Result<()> { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -2295,8 +2305,9 @@ mod tests { async fn test_write_empty_parquet_file() { let temp_dir = TempDir::new().unwrap(); let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 24aa49e4a..181205c37 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -156,8 +156,9 @@ mod tests { use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, Type}; use crate::writer::base_writer::data_file_writer::DataFileWriterBuilder; use crate::writer::file_writer::ParquetWriterBuilder; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; + use crate::writer::file_writer::location_generator::{ + DefaultFileNameGenerator, DefaultLocationGenerator, + }; use crate::writer::tests::check_parquet_data_file; use crate::writer::{IcebergWriter, IcebergWriterBuilder, RecordBatch}; @@ -188,8 +189,9 @@ mod tests { async fn test_rolling_writer_basic() -> Result<()> { let temp_dir = TempDir::new()?; let file_io = FileIOBuilder::new_fs_io().build()?; - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); @@ -248,8 +250,9 @@ mod tests { async fn test_rolling_writer_with_rolling() -> Result<()> { let temp_dir = TempDir::new()?; let file_io = FileIOBuilder::new_fs_io().build()?; - let location_gen = - MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string()); + let location_gen = DefaultLocationGenerator::with_data_location( + temp_dir.path().to_str().unwrap().to_string(), + ); let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);