Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions datafusion/datasource-parquet/src/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,7 @@ pub async fn fetch_statistics(
since = "50.0.0",
note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead"
)]
#[expect(clippy::needless_pass_by_value)]
pub fn statistics_from_parquet_meta_calc(
metadata: &ParquetMetaData,
table_schema: SchemaRef,
Expand Down Expand Up @@ -1500,7 +1501,7 @@ fn spawn_parquet_parallel_serialization_task(
serialize_tx: Sender<SpawnedTask<RBStreamSerializeResult>>,
schema: Arc<Schema>,
writer_props: Arc<WriterProperties>,
parallel_options: ParallelParquetWriterOptions,
parallel_options: Arc<ParallelParquetWriterOptions>,
pool: Arc<dyn MemoryPool>,
) -> SpawnedTask<Result<(), DataFusionError>> {
SpawnedTask::spawn(async move {
Expand Down Expand Up @@ -1671,7 +1672,7 @@ async fn output_single_parquet_file_parallelized(
serialize_tx,
Arc::clone(&output_schema),
Arc::clone(&arc_props),
parallel_options,
parallel_options.into(),
Arc::clone(&pool),
);
let parquet_meta_data = concatenate_parallel_row_groups(
Expand Down
32 changes: 16 additions & 16 deletions datafusion/datasource-parquet/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ impl<'a> DFParquetMetadata<'a> {

get_col_stats(
table_schema,
null_counts_array,
&null_counts_array,
&mut max_accs,
&mut min_accs,
&mut is_max_value_exact,
Expand Down Expand Up @@ -362,7 +362,7 @@ fn create_max_min_accs(

fn get_col_stats(
schema: &Schema,
null_counts: Vec<Precision<usize>>,
null_counts: &[Precision<usize>],
max_values: &mut [Option<MaxAccumulator>],
min_values: &mut [Option<MinAccumulator>],
is_max_value_exact: &mut [Option<bool>],
Expand Down Expand Up @@ -432,19 +432,19 @@ fn summarize_min_max_null_counts(
max_acc.update_batch(&[Arc::clone(&max_values)])?;
let mut cur_max_acc = max_acc.clone();
accumulators.is_max_value_exact[arrow_schema_index] = has_any_exact_match(
cur_max_acc.evaluate()?,
max_values,
is_max_value_exact_stat,
&cur_max_acc.evaluate()?,
&max_values,
&is_max_value_exact_stat,
);
}

if let Some(min_acc) = &mut accumulators.min_accs[arrow_schema_index] {
min_acc.update_batch(&[Arc::clone(&min_values)])?;
let mut cur_min_acc = min_acc.clone();
accumulators.is_min_value_exact[arrow_schema_index] = has_any_exact_match(
cur_min_acc.evaluate()?,
min_values,
is_min_value_exact_stat,
&cur_min_acc.evaluate()?,
&min_values,
&is_min_value_exact_stat,
);
}

Expand Down Expand Up @@ -475,13 +475,13 @@ fn summarize_min_max_null_counts(
/// values are `[true, false, false]`. Since at least one is `true`, the
/// function returns `Some(true)`.
fn has_any_exact_match(
value: ScalarValue,
array: ArrayRef,
exactness: BooleanArray,
value: &ScalarValue,
array: &ArrayRef,
exactness: &BooleanArray,
) -> Option<bool> {
let scalar_array = value.to_scalar().ok()?;
let eq_mask = eq(&scalar_array, &array).ok()?;
let combined_mask = and(&eq_mask, &exactness).ok()?;
let combined_mask = and(&eq_mask, exactness).ok()?;
Some(combined_mask.true_count() > 0)
}

Expand Down Expand Up @@ -531,7 +531,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![true, false, false, false, false, false]);

let result = has_any_exact_match(computed_min, row_group_mins, exactness);
let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness);
assert_eq!(result, Some(true));
}
// Case 2: All inexact matches
Expand All @@ -542,7 +542,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![false, false, false, false, false, false]);

let result = has_any_exact_match(computed_min, row_group_mins, exactness);
let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness);
assert_eq!(result, Some(false));
}
// Case 3: All exact matches
Expand All @@ -553,7 +553,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![false, true, true, true, false, true]);

let result = has_any_exact_match(computed_max, row_group_maxes, exactness);
let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness);
assert_eq!(result, Some(true));
}
// Case 4: All maxes are null values
Expand All @@ -563,7 +563,7 @@ mod tests {
Arc::new(Int32Array::from(vec![None, None, None, None])) as ArrayRef;
let exactness = BooleanArray::from(vec![None, Some(true), None, Some(false)]);

let result = has_any_exact_match(computed_max, row_group_maxes, exactness);
let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness);
assert_eq!(result, Some(false));
}
}
Expand Down
3 changes: 3 additions & 0 deletions datafusion/datasource-parquet/src/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
// Make sure fast / cheap clones on Arc are explicit:
// https://github.com/apache/datafusion/issues/11143
#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
// https://github.com/apache/datafusion/issues/18503
#![deny(clippy::needless_pass_by_value)]
#![cfg_attr(test, allow(clippy::needless_pass_by_value))]

pub mod access_plan;
pub mod file_format;
Expand Down
1 change: 1 addition & 0 deletions datafusion/datasource-parquet/src/page_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ pub struct PagePruningAccessPlanFilter {
impl PagePruningAccessPlanFilter {
/// Create a new [`PagePruningAccessPlanFilter`] from a physical
/// expression.
#[expect(clippy::needless_pass_by_value)]
pub fn new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Self {
// extract any single column predicates
let predicates = split_conjunction(expr)
Expand Down
1 change: 1 addition & 0 deletions datafusion/datasource-parquet/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ impl ParquetSource {
}

/// Set predicate information
#[expect(clippy::needless_pass_by_value)]
pub fn with_predicate(&self, predicate: Arc<dyn PhysicalExpr>) -> Self {
let mut conf = self.clone();
conf.predicate = Some(Arc::clone(&predicate));
Expand Down