diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 1e86d4192774..385bfb5472a5 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -1072,6 +1072,7 @@ pub async fn fetch_statistics( since = "50.0.0", note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead" )] +#[expect(clippy::needless_pass_by_value)] pub fn statistics_from_parquet_meta_calc( metadata: &ParquetMetaData, table_schema: SchemaRef, @@ -1500,7 +1501,7 @@ fn spawn_parquet_parallel_serialization_task( serialize_tx: Sender>, schema: Arc, writer_props: Arc, - parallel_options: ParallelParquetWriterOptions, + parallel_options: Arc, pool: Arc, ) -> SpawnedTask> { SpawnedTask::spawn(async move { @@ -1671,7 +1672,7 @@ async fn output_single_parquet_file_parallelized( serialize_tx, Arc::clone(&output_schema), Arc::clone(&arc_props), - parallel_options, + parallel_options.into(), Arc::clone(&pool), ); let parquet_meta_data = concatenate_parallel_row_groups( diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index 6505a447d7ce..fcd3a22dcf94 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -314,7 +314,7 @@ impl<'a> DFParquetMetadata<'a> { get_col_stats( table_schema, - null_counts_array, + &null_counts_array, &mut max_accs, &mut min_accs, &mut is_max_value_exact, @@ -362,7 +362,7 @@ fn create_max_min_accs( fn get_col_stats( schema: &Schema, - null_counts: Vec>, + null_counts: &[Precision], max_values: &mut [Option], min_values: &mut [Option], is_max_value_exact: &mut [Option], @@ -432,9 +432,9 @@ fn summarize_min_max_null_counts( max_acc.update_batch(&[Arc::clone(&max_values)])?; let mut cur_max_acc = max_acc.clone(); accumulators.is_max_value_exact[arrow_schema_index] = has_any_exact_match( - cur_max_acc.evaluate()?, - max_values, - is_max_value_exact_stat, + &cur_max_acc.evaluate()?, + &max_values, + &is_max_value_exact_stat, ); } @@ -442,9 +442,9 @@ fn summarize_min_max_null_counts( min_acc.update_batch(&[Arc::clone(&min_values)])?; let mut cur_min_acc = min_acc.clone(); accumulators.is_min_value_exact[arrow_schema_index] = has_any_exact_match( - cur_min_acc.evaluate()?, - min_values, - is_min_value_exact_stat, + &cur_min_acc.evaluate()?, + &min_values, + &is_min_value_exact_stat, ); } @@ -475,13 +475,13 @@ fn summarize_min_max_null_counts( /// values are `[true, false, false]`. Since at least one is `true`, the /// function returns `Some(true)`. fn has_any_exact_match( - value: ScalarValue, - array: ArrayRef, - exactness: BooleanArray, + value: &ScalarValue, + array: &ArrayRef, + exactness: &BooleanArray, ) -> Option { let scalar_array = value.to_scalar().ok()?; let eq_mask = eq(&scalar_array, &array).ok()?; - let combined_mask = and(&eq_mask, &exactness).ok()?; + let combined_mask = and(&eq_mask, exactness).ok()?; Some(combined_mask.true_count() > 0) } @@ -531,7 +531,7 @@ mod tests { let exactness = BooleanArray::from(vec![true, false, false, false, false, false]); - let result = has_any_exact_match(computed_min, row_group_mins, exactness); + let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness); assert_eq!(result, Some(true)); } // Case 2: All inexact matches @@ -542,7 +542,7 @@ mod tests { let exactness = BooleanArray::from(vec![false, false, false, false, false, false]); - let result = has_any_exact_match(computed_min, row_group_mins, exactness); + let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness); assert_eq!(result, Some(false)); } // Case 3: All exact matches @@ -553,7 +553,7 @@ mod tests { let exactness = BooleanArray::from(vec![false, true, true, true, false, true]); - let result = has_any_exact_match(computed_max, row_group_maxes, exactness); + let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness); assert_eq!(result, Some(true)); } // Case 4: All maxes are null values @@ -563,7 +563,7 @@ mod tests { Arc::new(Int32Array::from(vec![None, None, None, None])) as ArrayRef; let exactness = BooleanArray::from(vec![None, Some(true), None, Some(false)]); - let result = has_any_exact_match(computed_max, row_group_maxes, exactness); + let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness); assert_eq!(result, Some(false)); } } diff --git a/datafusion/datasource-parquet/src/mod.rs b/datafusion/datasource-parquet/src/mod.rs index 2f64f34bc09b..e0e906f3ce2a 100644 --- a/datafusion/datasource-parquet/src/mod.rs +++ b/datafusion/datasource-parquet/src/mod.rs @@ -18,6 +18,9 @@ // Make sure fast / cheap clones on Arc are explicit: // https://github.com/apache/datafusion/issues/11143 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))] +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] pub mod access_plan; pub mod file_format; diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index 2698b6c5fbb6..9f4e52c513cf 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -118,6 +118,7 @@ pub struct PagePruningAccessPlanFilter { impl PagePruningAccessPlanFilter { /// Create a new [`PagePruningAccessPlanFilter`] from a physical /// expression. + #[expect(clippy::needless_pass_by_value)] pub fn new(expr: &Arc, schema: SchemaRef) -> Self { // extract any single column predicates let predicates = split_conjunction(expr) diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 27640f37cee4..7c07b7b68c35 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -334,6 +334,7 @@ impl ParquetSource { } /// Set predicate information + #[expect(clippy::needless_pass_by_value)] pub fn with_predicate(&self, predicate: Arc) -> Self { let mut conf = self.clone(); conf.predicate = Some(Arc::clone(&predicate));