Skip to content

Commit

Permalink
Add coerce_types flag to parquet WriterProperties (#1938)
Browse files Browse the repository at this point in the history
  • Loading branch information
getChan committed Apr 13, 2024
1 parent a999fb8 commit 42322f8
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions parquet/src/file/properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
/// Default values for [`WriterProperties::statistics_truncate_length`]
pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = None;
/// Default values for [`WriterProperties::coerce_types`]
pub const DEFAULT_COERCE_TYPES: bool = false;

/// Parquet writer version.
///
Expand Down Expand Up @@ -139,6 +141,7 @@ pub struct WriterProperties {
sorting_columns: Option<Vec<SortingColumn>>,
column_index_truncate_length: Option<usize>,
statistics_truncate_length: Option<usize>,
coerce_types: bool,
}

impl Default for WriterProperties {
Expand Down Expand Up @@ -251,6 +254,13 @@ impl WriterProperties {
self.statistics_truncate_length
}

/// Returns `coerce_types` boolean
///
/// `true` if type coercion enabled.
pub fn coerce_types(&self) -> bool {
self.coerce_types
}

/// Returns encoding for a data page, when dictionary encoding is enabled.
/// This is not configurable.
#[inline]
Expand Down Expand Up @@ -345,6 +355,7 @@ pub struct WriterPropertiesBuilder {
sorting_columns: Option<Vec<SortingColumn>>,
column_index_truncate_length: Option<usize>,
statistics_truncate_length: Option<usize>,
coerce_types: bool,
}

impl WriterPropertiesBuilder {
Expand All @@ -364,6 +375,7 @@ impl WriterPropertiesBuilder {
sorting_columns: None,
column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH,
coerce_types: DEFAULT_COERCE_TYPES,
}
}

Expand All @@ -383,6 +395,7 @@ impl WriterPropertiesBuilder {
sorting_columns: self.sorting_columns,
column_index_truncate_length: self.column_index_truncate_length,
statistics_truncate_length: self.statistics_truncate_length,
coerce_types: self.coerce_types,
}
}

Expand Down Expand Up @@ -667,6 +680,13 @@ impl WriterPropertiesBuilder {
self.statistics_truncate_length = max_length;
self
}

/// Sets flag to enable/disable type coercion.
/// Takes precedence over globally defined settings.
pub fn set_coerce_types(mut self, coerce_types: bool) -> Self {
self.coerce_types = coerce_types;
self
}
}

/// Controls the level of statistics to be computed by the writer
Expand Down

0 comments on commit 42322f8

Please sign in to comment.