Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions crates/iceberg/src/writer/base_writer/data_file_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,18 @@ mod test {
};
use crate::writer::base_writer::data_file_writer::DataFileWriterBuilder;
use crate::writer::file_writer::ParquetWriterBuilder;
use crate::writer::file_writer::location_generator::DefaultFileNameGenerator;
use crate::writer::file_writer::location_generator::test::MockLocationGenerator;
use crate::writer::file_writer::location_generator::{
DefaultFileNameGenerator, DefaultLocationGenerator,
};
use crate::writer::{IcebergWriter, IcebergWriterBuilder, RecordBatch};

#[tokio::test]
async fn test_parquet_writer() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -201,8 +203,9 @@ mod test {
async fn test_parquet_writer_with_partition() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen = DefaultFileNameGenerator::new(
"test_partitioned".to_string(),
None,
Expand Down
15 changes: 9 additions & 6 deletions crates/iceberg/src/writer/base_writer/equality_delete_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,9 @@ mod test {
EqualityDeleteFileWriterBuilder, EqualityDeleteWriterConfig,
};
use crate::writer::file_writer::ParquetWriterBuilder;
use crate::writer::file_writer::location_generator::DefaultFileNameGenerator;
use crate::writer::file_writer::location_generator::test::MockLocationGenerator;
use crate::writer::file_writer::location_generator::{
DefaultFileNameGenerator, DefaultLocationGenerator,
};
use crate::writer::{IcebergWriter, IcebergWriterBuilder};

async fn check_parquet_data_file_with_equality_delete_write(
Expand Down Expand Up @@ -282,8 +283,9 @@ mod test {
async fn test_equality_delete_writer() -> Result<(), anyhow::Error> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -518,8 +520,9 @@ mod test {
async fn test_equality_delete_with_primitive_type() -> Result<(), anyhow::Error> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down
43 changes: 13 additions & 30 deletions crates/iceberg/src/writer/file_writer/location_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ impl DefaultLocationGenerator {
};
Ok(Self { data_location })
}

/// Create a new `DefaultLocationGenerator` with a specified data location.
///
/// # Arguments
///
/// * `data_location` - The data location to use for generating file locations.
pub fn with_data_location(data_location: String) -> Self {
Self { data_location }
}
}

impl LocationGenerator for DefaultLocationGenerator {
Expand Down Expand Up @@ -144,35 +153,10 @@ pub(crate) mod test {
Struct, StructType, TableMetadata, Transform, Type,
};
use crate::writer::file_writer::location_generator::{
FileNameGenerator, WRITE_DATA_LOCATION, WRITE_FOLDER_STORAGE_LOCATION,
DefaultLocationGenerator, FileNameGenerator, WRITE_DATA_LOCATION,
WRITE_FOLDER_STORAGE_LOCATION,
};

#[derive(Clone)]
pub(crate) struct MockLocationGenerator {
root: String,
}

impl MockLocationGenerator {
pub(crate) fn new(root: String) -> Self {
Self { root }
}
}

impl LocationGenerator for MockLocationGenerator {
fn generate_location(&self, partition: Option<&PartitionKey>, file_name: &str) -> String {
if PartitionKey::is_effectively_none(partition) {
format!("{}/{}", self.root, file_name)
} else {
format!(
"{}/{}/{}",
self.root,
partition.unwrap().to_path(),
file_name
)
}
}
}

#[test]
fn test_default_location_generate() {
let mut table_metadata = TableMetadata {
Expand Down Expand Up @@ -283,10 +267,9 @@ pub(crate) mod test {
// Create a partition key
let partition_key = PartitionKey::new(partition_spec, schema, partition_data);

// Test with MockLocationGenerator
let mock_location_gen = MockLocationGenerator::new("/base/path".to_string());
let location_gen = DefaultLocationGenerator::with_data_location("/base/path".to_string());
let file_name = "data-00000.parquet";
let location = mock_location_gen.generate_location(Some(&partition_key), file_name);
let location = location_gen.generate_location(Some(&partition_key), file_name);
assert_eq!(location, "/base/path/id=42/name=alice/data-00000.parquet");

// Create a table metadata for DefaultLocationGenerator
Expand Down
55 changes: 33 additions & 22 deletions crates/iceberg/src/writer/file_writer/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,8 +702,9 @@ mod tests {
use crate::arrow::schema_to_arrow_schema;
use crate::io::FileIOBuilder;
use crate::spec::{PrimitiveLiteral, Struct, *};
use crate::writer::file_writer::location_generator::DefaultFileNameGenerator;
use crate::writer::file_writer::location_generator::test::MockLocationGenerator;
use crate::writer::file_writer::location_generator::{
DefaultFileNameGenerator, DefaultLocationGenerator,
};
use crate::writer::tests::check_parquet_data_file;

fn schema_for_all_type() -> Schema {
Expand Down Expand Up @@ -862,8 +863,9 @@ mod tests {
async fn test_parquet_writer() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -933,8 +935,9 @@ mod tests {
async fn test_parquet_writer_with_complex_schema() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1158,8 +1161,9 @@ mod tests {
async fn test_all_type_for_write() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let loccation_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let loccation_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1401,8 +1405,9 @@ mod tests {
async fn test_decimal_bound() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let loccation_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let loccation_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1654,8 +1659,9 @@ mod tests {
async fn test_empty_write() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1709,8 +1715,9 @@ mod tests {
async fn test_nan_val_cnts_primitive_type() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1797,8 +1804,9 @@ mod tests {
async fn test_nan_val_cnts_struct_type() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -1937,8 +1945,9 @@ mod tests {
async fn test_nan_val_cnts_list_type() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -2139,8 +2148,9 @@ mod tests {
async fn test_nan_val_cnts_map_type() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -2295,8 +2305,9 @@ mod tests {
async fn test_write_empty_parquet_file() {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down
15 changes: 9 additions & 6 deletions crates/iceberg/src/writer/file_writer/rolling_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,9 @@ mod tests {
use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, Type};
use crate::writer::base_writer::data_file_writer::DataFileWriterBuilder;
use crate::writer::file_writer::ParquetWriterBuilder;
use crate::writer::file_writer::location_generator::DefaultFileNameGenerator;
use crate::writer::file_writer::location_generator::test::MockLocationGenerator;
use crate::writer::file_writer::location_generator::{
DefaultFileNameGenerator, DefaultLocationGenerator,
};
use crate::writer::tests::check_parquet_data_file;
use crate::writer::{IcebergWriter, IcebergWriterBuilder, RecordBatch};

Expand Down Expand Up @@ -188,8 +189,9 @@ mod tests {
async fn test_rolling_writer_basic() -> Result<()> {
let temp_dir = TempDir::new()?;
let file_io = FileIOBuilder::new_fs_io().build()?;
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down Expand Up @@ -248,8 +250,9 @@ mod tests {
async fn test_rolling_writer_with_rolling() -> Result<()> {
let temp_dir = TempDir::new()?;
let file_io = FileIOBuilder::new_fs_io().build()?;
let location_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let location_gen = DefaultLocationGenerator::with_data_location(
temp_dir.path().to_str().unwrap().to_string(),
);
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet);

Expand Down
Loading