From 7195ae02e1832ef14969024cd75e4c9dbd2bd278 Mon Sep 17 00:00:00 2001 From: foskey51 Date: Wed, 12 Nov 2025 13:10:02 +0000 Subject: [PATCH 1/3] chore: enforce clippy lint needless_pass_by_value to datafusion-datasource-parquet --- .../data_io/parquet_advanced_index.rs | 2 +- .../examples/data_io/parquet_index.rs | 2 +- .../examples/default_column_values.rs | 2 +- .../src/datasource/physical_plan/parquet.rs | 2 +- datafusion/core/src/test_util/parquet.rs | 2 +- datafusion/core/tests/fuzz_cases/pruning.rs | 2 +- .../tests/parquet/external_access_plan.rs | 2 +- datafusion/core/tests/parquet/page_pruning.rs | 2 +- .../datasource-parquet/src/file_format.rs | 9 +++--- datafusion/datasource-parquet/src/metadata.rs | 32 +++++++++---------- datafusion/datasource-parquet/src/mod.rs | 3 ++ datafusion/datasource-parquet/src/opener.rs | 2 +- .../datasource-parquet/src/page_filter.rs | 4 +-- datafusion/datasource-parquet/src/source.rs | 8 ++--- datafusion/proto/src/physical_plan/mod.rs | 2 +- .../tests/cases/roundtrip_physical_plan.rs | 4 +-- 16 files changed, 42 insertions(+), 38 deletions(-) diff --git a/datafusion-examples/examples/data_io/parquet_advanced_index.rs b/datafusion-examples/examples/data_io/parquet_advanced_index.rs index af1e03fe4ddb..e4034baf1f65 100644 --- a/datafusion-examples/examples/data_io/parquet_advanced_index.rs +++ b/datafusion-examples/examples/data_io/parquet_advanced_index.rs @@ -494,7 +494,7 @@ impl TableProvider for IndexTableProvider { ParquetSource::new(schema.clone()) // provide the predicate so the DataSourceExec can try and prune // row groups internally - .with_predicate(predicate) + .with_predicate(&predicate) // provide the factory to create parquet reader without re-reading metadata .with_parquet_file_reader_factory(Arc::new(reader_factory)), ); diff --git a/datafusion-examples/examples/data_io/parquet_index.rs b/datafusion-examples/examples/data_io/parquet_index.rs index 4cca73b1f9be..7c517f260fda 100644 --- a/datafusion-examples/examples/data_io/parquet_index.rs +++ b/datafusion-examples/examples/data_io/parquet_index.rs @@ -242,7 +242,7 @@ impl TableProvider for IndexTableProvider { let object_store_url = ObjectStoreUrl::parse("file://")?; let source = - Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate)); + Arc::new(ParquetSource::new(self.schema()).with_predicate(&predicate)); let mut file_scan_config_builder = FileScanConfigBuilder::new(object_store_url, source) .with_projection_indices(projection.cloned()) diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/default_column_values.rs index bfc60519f26e..cb8cf39b50a2 100644 --- a/datafusion-examples/examples/default_column_values.rs +++ b/datafusion-examples/examples/default_column_values.rs @@ -236,7 +236,7 @@ impl TableProvider for DefaultValueTableProvider { )?; let parquet_source = ParquetSource::new(schema.clone()) - .with_predicate(filter) + .with_predicate(&filter) .with_pushdown_filters(true); let object_store_url = ObjectStoreUrl::parse("memory://")?; diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index b27dcf56e33c..303f02f90932 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -163,7 +163,7 @@ mod tests { let mut source = ParquetSource::new(table_schema); if let Some(predicate) = predicate { - source = source.with_predicate(predicate); + source = source.with_predicate(&predicate); } if self.pushdown_predicate { diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index b5213cee3f2d..69095d9b3564 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -185,7 +185,7 @@ impl TestParquetFile { let source = Arc::new( ParquetSource::new(Arc::clone(&self.schema)) .with_table_parquet_options(parquet_options) - .with_predicate(Arc::clone(&physical_filter_expr)), + .with_predicate(&Arc::clone(&physical_filter_expr)), ); let config = scan_config_builder.with_source(source).build(); let parquet_exec = DataSourceExec::from_data_source(config); diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index 51ec8f03e5d2..e33fffb048c2 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -276,7 +276,7 @@ async fn execute_with_predicate( ctx: &SessionContext, ) -> Vec { let parquet_source = if prune_stats { - ParquetSource::new(schema.clone()).with_predicate(predicate.clone()) + ParquetSource::new(schema.clone()).with_predicate(&predicate) } else { ParquetSource::new(schema.clone()) }; diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index b35cb6e09cfb..f0df262e4d9f 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -355,7 +355,7 @@ impl TestFull { let source = if let Some(predicate) = predicate { let df_schema = DFSchema::try_from(schema.clone())?; let predicate = ctx.create_physical_expr(predicate, &df_schema)?; - Arc::new(ParquetSource::new(schema.clone()).with_predicate(predicate)) + Arc::new(ParquetSource::new(schema.clone()).with_predicate(&predicate)) } else { Arc::new(ParquetSource::new(schema.clone())) }; diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index fb2a196b0aa6..21e7b3a64f27 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -82,7 +82,7 @@ async fn get_parquet_exec( let source = Arc::new( ParquetSource::new(schema.clone()) - .with_predicate(predicate) + .with_predicate(&predicate) .with_enable_page_index(true) .with_pushdown_filters(pushdown_filters), ); diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 1e86d4192774..83436fa70002 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -1074,9 +1074,9 @@ pub async fn fetch_statistics( )] pub fn statistics_from_parquet_meta_calc( metadata: &ParquetMetaData, - table_schema: SchemaRef, + table_schema: &SchemaRef, ) -> Result { - DFParquetMetadata::statistics_from_parquet_metadata(metadata, &table_schema) + DFParquetMetadata::statistics_from_parquet_metadata(metadata, table_schema) } /// Implements [`DataSink`] for writing to a parquet file. @@ -1500,9 +1500,10 @@ fn spawn_parquet_parallel_serialization_task( serialize_tx: Sender>, schema: Arc, writer_props: Arc, - parallel_options: ParallelParquetWriterOptions, + parallel_options: &ParallelParquetWriterOptions, pool: Arc, ) -> SpawnedTask> { + let parallel_options = parallel_options.clone(); SpawnedTask::spawn(async move { let max_buffer_rb = parallel_options.max_buffered_record_batches_per_stream; let max_row_group_rows = writer_props.max_row_group_size(); @@ -1671,7 +1672,7 @@ async fn output_single_parquet_file_parallelized( serialize_tx, Arc::clone(&output_schema), Arc::clone(&arc_props), - parallel_options, + ¶llel_options, Arc::clone(&pool), ); let parquet_meta_data = concatenate_parallel_row_groups( diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index 6505a447d7ce..fcd3a22dcf94 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -314,7 +314,7 @@ impl<'a> DFParquetMetadata<'a> { get_col_stats( table_schema, - null_counts_array, + &null_counts_array, &mut max_accs, &mut min_accs, &mut is_max_value_exact, @@ -362,7 +362,7 @@ fn create_max_min_accs( fn get_col_stats( schema: &Schema, - null_counts: Vec>, + null_counts: &[Precision], max_values: &mut [Option], min_values: &mut [Option], is_max_value_exact: &mut [Option], @@ -432,9 +432,9 @@ fn summarize_min_max_null_counts( max_acc.update_batch(&[Arc::clone(&max_values)])?; let mut cur_max_acc = max_acc.clone(); accumulators.is_max_value_exact[arrow_schema_index] = has_any_exact_match( - cur_max_acc.evaluate()?, - max_values, - is_max_value_exact_stat, + &cur_max_acc.evaluate()?, + &max_values, + &is_max_value_exact_stat, ); } @@ -442,9 +442,9 @@ fn summarize_min_max_null_counts( min_acc.update_batch(&[Arc::clone(&min_values)])?; let mut cur_min_acc = min_acc.clone(); accumulators.is_min_value_exact[arrow_schema_index] = has_any_exact_match( - cur_min_acc.evaluate()?, - min_values, - is_min_value_exact_stat, + &cur_min_acc.evaluate()?, + &min_values, + &is_min_value_exact_stat, ); } @@ -475,13 +475,13 @@ fn summarize_min_max_null_counts( /// values are `[true, false, false]`. Since at least one is `true`, the /// function returns `Some(true)`. fn has_any_exact_match( - value: ScalarValue, - array: ArrayRef, - exactness: BooleanArray, + value: &ScalarValue, + array: &ArrayRef, + exactness: &BooleanArray, ) -> Option { let scalar_array = value.to_scalar().ok()?; let eq_mask = eq(&scalar_array, &array).ok()?; - let combined_mask = and(&eq_mask, &exactness).ok()?; + let combined_mask = and(&eq_mask, exactness).ok()?; Some(combined_mask.true_count() > 0) } @@ -531,7 +531,7 @@ mod tests { let exactness = BooleanArray::from(vec![true, false, false, false, false, false]); - let result = has_any_exact_match(computed_min, row_group_mins, exactness); + let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness); assert_eq!(result, Some(true)); } // Case 2: All inexact matches @@ -542,7 +542,7 @@ mod tests { let exactness = BooleanArray::from(vec![false, false, false, false, false, false]); - let result = has_any_exact_match(computed_min, row_group_mins, exactness); + let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness); assert_eq!(result, Some(false)); } // Case 3: All exact matches @@ -553,7 +553,7 @@ mod tests { let exactness = BooleanArray::from(vec![false, true, true, true, false, true]); - let result = has_any_exact_match(computed_max, row_group_maxes, exactness); + let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness); assert_eq!(result, Some(true)); } // Case 4: All maxes are null values @@ -563,7 +563,7 @@ mod tests { Arc::new(Int32Array::from(vec![None, None, None, None])) as ArrayRef; let exactness = BooleanArray::from(vec![None, Some(true), None, Some(false)]); - let result = has_any_exact_match(computed_max, row_group_maxes, exactness); + let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness); assert_eq!(result, Some(false)); } } diff --git a/datafusion/datasource-parquet/src/mod.rs b/datafusion/datasource-parquet/src/mod.rs index 2f64f34bc09b..e0e906f3ce2a 100644 --- a/datafusion/datasource-parquet/src/mod.rs +++ b/datafusion/datasource-parquet/src/mod.rs @@ -18,6 +18,9 @@ // Make sure fast / cheap clones on Arc are explicit: // https://github.com/apache/datafusion/issues/11143 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))] +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] pub mod access_plan; pub mod file_format; diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 3c905d950a96..fd4c64d5500c 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -685,7 +685,7 @@ pub(crate) fn build_page_pruning_predicate( ) -> Arc { Arc::new(PagePruningAccessPlanFilter::new( predicate, - Arc::clone(file_schema), + &Arc::clone(file_schema), )) } diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index 2698b6c5fbb6..ddd872a7cf5f 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -118,14 +118,14 @@ pub struct PagePruningAccessPlanFilter { impl PagePruningAccessPlanFilter { /// Create a new [`PagePruningAccessPlanFilter`] from a physical /// expression. - pub fn new(expr: &Arc, schema: SchemaRef) -> Self { + pub fn new(expr: &Arc, schema: &SchemaRef) -> Self { // extract any single column predicates let predicates = split_conjunction(expr) .into_iter() .filter_map(|predicate| { let pp = match PruningPredicate::try_new( Arc::clone(predicate), - Arc::clone(&schema), + Arc::clone(schema), ) { Ok(pp) => pp, Err(e) => { diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 27640f37cee4..825e5f5717f6 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -105,7 +105,7 @@ use parquet::encryption::decrypt::FileDecryptionProperties; /// # let predicate = lit(true); /// let source = Arc::new( /// ParquetSource::new(Arc::clone(&file_schema)) -/// .with_predicate(predicate) +/// .with_predicate(&predicate) /// ); /// // Create a DataSourceExec for reading `file1.parquet` with a file size of 100MB /// let config = FileScanConfigBuilder::new(object_store_url, source) @@ -334,9 +334,9 @@ impl ParquetSource { } /// Set predicate information - pub fn with_predicate(&self, predicate: Arc) -> Self { + pub fn with_predicate(&self, predicate: &Arc) -> Self { let mut conf = self.clone(); - conf.predicate = Some(Arc::clone(&predicate)); + conf.predicate = Some(Arc::clone(predicate)); conf } @@ -804,7 +804,7 @@ mod tests { let predicate = lit(true); let parquet_source = - ParquetSource::new(Arc::new(Schema::empty())).with_predicate(predicate); + ParquetSource::new(Arc::new(Schema::empty())).with_predicate(&predicate); // same value. but filter() call Arc::clone internally assert_eq!(parquet_source.predicate(), parquet_source.filter().as_ref()); } diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index fc7818fe461a..ab247e04935e 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -714,7 +714,7 @@ impl protobuf::PhysicalPlanNode { ParquetSource::new(table_schema).with_table_parquet_options(options); if let Some(predicate) = predicate { - source = source.with_predicate(predicate); + source = source.with_predicate(&predicate); } let base_config = parse_protobuf_file_scan_config( base_conf, diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 73f39eaa7bf9..eafbaf7dbeb4 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -887,7 +887,7 @@ fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> { let file_source = Arc::new( ParquetSource::new(Arc::clone(&file_schema)) .with_table_parquet_options(options) - .with_predicate(predicate), + .with_predicate(&(predicate as Arc)), ); let scan_config = @@ -947,7 +947,7 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { let file_source = Arc::new( ParquetSource::new(Arc::clone(&file_schema)) - .with_predicate(custom_predicate_expr), + .with_predicate(&(custom_predicate_expr as Arc)), ); let scan_config = From a22e25a3884798b45663740a71a39c0e27a7233c Mon Sep 17 00:00:00 2001 From: foskey51 Date: Sat, 15 Nov 2025 08:09:55 +0000 Subject: [PATCH 2/3] trigger ci check From 9be3d4c41aa0307c9799c8159914a7dd8a32c36e Mon Sep 17 00:00:00 2001 From: foskey51 Date: Sat, 15 Nov 2025 15:53:57 +0000 Subject: [PATCH 3/3] refactor: address pr comments --- .../examples/data_io/parquet_advanced_index.rs | 2 +- datafusion-examples/examples/data_io/parquet_index.rs | 2 +- datafusion-examples/examples/default_column_values.rs | 2 +- .../core/src/datasource/physical_plan/parquet.rs | 2 +- datafusion/core/src/test_util/parquet.rs | 2 +- datafusion/core/tests/fuzz_cases/pruning.rs | 2 +- datafusion/core/tests/parquet/external_access_plan.rs | 2 +- datafusion/core/tests/parquet/page_pruning.rs | 2 +- datafusion/datasource-parquet/src/file_format.rs | 10 +++++----- datafusion/datasource-parquet/src/opener.rs | 2 +- datafusion/datasource-parquet/src/page_filter.rs | 5 +++-- datafusion/datasource-parquet/src/source.rs | 9 +++++---- datafusion/proto/src/physical_plan/mod.rs | 2 +- .../proto/tests/cases/roundtrip_physical_plan.rs | 4 ++-- 14 files changed, 25 insertions(+), 23 deletions(-) diff --git a/datafusion-examples/examples/data_io/parquet_advanced_index.rs b/datafusion-examples/examples/data_io/parquet_advanced_index.rs index e4034baf1f65..af1e03fe4ddb 100644 --- a/datafusion-examples/examples/data_io/parquet_advanced_index.rs +++ b/datafusion-examples/examples/data_io/parquet_advanced_index.rs @@ -494,7 +494,7 @@ impl TableProvider for IndexTableProvider { ParquetSource::new(schema.clone()) // provide the predicate so the DataSourceExec can try and prune // row groups internally - .with_predicate(&predicate) + .with_predicate(predicate) // provide the factory to create parquet reader without re-reading metadata .with_parquet_file_reader_factory(Arc::new(reader_factory)), ); diff --git a/datafusion-examples/examples/data_io/parquet_index.rs b/datafusion-examples/examples/data_io/parquet_index.rs index 7c517f260fda..4cca73b1f9be 100644 --- a/datafusion-examples/examples/data_io/parquet_index.rs +++ b/datafusion-examples/examples/data_io/parquet_index.rs @@ -242,7 +242,7 @@ impl TableProvider for IndexTableProvider { let object_store_url = ObjectStoreUrl::parse("file://")?; let source = - Arc::new(ParquetSource::new(self.schema()).with_predicate(&predicate)); + Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate)); let mut file_scan_config_builder = FileScanConfigBuilder::new(object_store_url, source) .with_projection_indices(projection.cloned()) diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/default_column_values.rs index cb8cf39b50a2..bfc60519f26e 100644 --- a/datafusion-examples/examples/default_column_values.rs +++ b/datafusion-examples/examples/default_column_values.rs @@ -236,7 +236,7 @@ impl TableProvider for DefaultValueTableProvider { )?; let parquet_source = ParquetSource::new(schema.clone()) - .with_predicate(&filter) + .with_predicate(filter) .with_pushdown_filters(true); let object_store_url = ObjectStoreUrl::parse("memory://")?; diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index 303f02f90932..b27dcf56e33c 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -163,7 +163,7 @@ mod tests { let mut source = ParquetSource::new(table_schema); if let Some(predicate) = predicate { - source = source.with_predicate(&predicate); + source = source.with_predicate(predicate); } if self.pushdown_predicate { diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 69095d9b3564..b5213cee3f2d 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -185,7 +185,7 @@ impl TestParquetFile { let source = Arc::new( ParquetSource::new(Arc::clone(&self.schema)) .with_table_parquet_options(parquet_options) - .with_predicate(&Arc::clone(&physical_filter_expr)), + .with_predicate(Arc::clone(&physical_filter_expr)), ); let config = scan_config_builder.with_source(source).build(); let parquet_exec = DataSourceExec::from_data_source(config); diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index e33fffb048c2..51ec8f03e5d2 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -276,7 +276,7 @@ async fn execute_with_predicate( ctx: &SessionContext, ) -> Vec { let parquet_source = if prune_stats { - ParquetSource::new(schema.clone()).with_predicate(&predicate) + ParquetSource::new(schema.clone()).with_predicate(predicate.clone()) } else { ParquetSource::new(schema.clone()) }; diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index f0df262e4d9f..b35cb6e09cfb 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -355,7 +355,7 @@ impl TestFull { let source = if let Some(predicate) = predicate { let df_schema = DFSchema::try_from(schema.clone())?; let predicate = ctx.create_physical_expr(predicate, &df_schema)?; - Arc::new(ParquetSource::new(schema.clone()).with_predicate(&predicate)) + Arc::new(ParquetSource::new(schema.clone()).with_predicate(predicate)) } else { Arc::new(ParquetSource::new(schema.clone())) }; diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 21e7b3a64f27..fb2a196b0aa6 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -82,7 +82,7 @@ async fn get_parquet_exec( let source = Arc::new( ParquetSource::new(schema.clone()) - .with_predicate(&predicate) + .with_predicate(predicate) .with_enable_page_index(true) .with_pushdown_filters(pushdown_filters), ); diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 83436fa70002..385bfb5472a5 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -1072,11 +1072,12 @@ pub async fn fetch_statistics( since = "50.0.0", note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead" )] +#[expect(clippy::needless_pass_by_value)] pub fn statistics_from_parquet_meta_calc( metadata: &ParquetMetaData, - table_schema: &SchemaRef, + table_schema: SchemaRef, ) -> Result { - DFParquetMetadata::statistics_from_parquet_metadata(metadata, table_schema) + DFParquetMetadata::statistics_from_parquet_metadata(metadata, &table_schema) } /// Implements [`DataSink`] for writing to a parquet file. @@ -1500,10 +1501,9 @@ fn spawn_parquet_parallel_serialization_task( serialize_tx: Sender>, schema: Arc, writer_props: Arc, - parallel_options: &ParallelParquetWriterOptions, + parallel_options: Arc, pool: Arc, ) -> SpawnedTask> { - let parallel_options = parallel_options.clone(); SpawnedTask::spawn(async move { let max_buffer_rb = parallel_options.max_buffered_record_batches_per_stream; let max_row_group_rows = writer_props.max_row_group_size(); @@ -1672,7 +1672,7 @@ async fn output_single_parquet_file_parallelized( serialize_tx, Arc::clone(&output_schema), Arc::clone(&arc_props), - ¶llel_options, + parallel_options.into(), Arc::clone(&pool), ); let parquet_meta_data = concatenate_parallel_row_groups( diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index fd4c64d5500c..3c905d950a96 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -685,7 +685,7 @@ pub(crate) fn build_page_pruning_predicate( ) -> Arc { Arc::new(PagePruningAccessPlanFilter::new( predicate, - &Arc::clone(file_schema), + Arc::clone(file_schema), )) } diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index ddd872a7cf5f..9f4e52c513cf 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -118,14 +118,15 @@ pub struct PagePruningAccessPlanFilter { impl PagePruningAccessPlanFilter { /// Create a new [`PagePruningAccessPlanFilter`] from a physical /// expression. - pub fn new(expr: &Arc, schema: &SchemaRef) -> Self { + #[expect(clippy::needless_pass_by_value)] + pub fn new(expr: &Arc, schema: SchemaRef) -> Self { // extract any single column predicates let predicates = split_conjunction(expr) .into_iter() .filter_map(|predicate| { let pp = match PruningPredicate::try_new( Arc::clone(predicate), - Arc::clone(schema), + Arc::clone(&schema), ) { Ok(pp) => pp, Err(e) => { diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 825e5f5717f6..7c07b7b68c35 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -105,7 +105,7 @@ use parquet::encryption::decrypt::FileDecryptionProperties; /// # let predicate = lit(true); /// let source = Arc::new( /// ParquetSource::new(Arc::clone(&file_schema)) -/// .with_predicate(&predicate) +/// .with_predicate(predicate) /// ); /// // Create a DataSourceExec for reading `file1.parquet` with a file size of 100MB /// let config = FileScanConfigBuilder::new(object_store_url, source) @@ -334,9 +334,10 @@ impl ParquetSource { } /// Set predicate information - pub fn with_predicate(&self, predicate: &Arc) -> Self { + #[expect(clippy::needless_pass_by_value)] + pub fn with_predicate(&self, predicate: Arc) -> Self { let mut conf = self.clone(); - conf.predicate = Some(Arc::clone(predicate)); + conf.predicate = Some(Arc::clone(&predicate)); conf } @@ -804,7 +805,7 @@ mod tests { let predicate = lit(true); let parquet_source = - ParquetSource::new(Arc::new(Schema::empty())).with_predicate(&predicate); + ParquetSource::new(Arc::new(Schema::empty())).with_predicate(predicate); // same value. but filter() call Arc::clone internally assert_eq!(parquet_source.predicate(), parquet_source.filter().as_ref()); } diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index ab247e04935e..fc7818fe461a 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -714,7 +714,7 @@ impl protobuf::PhysicalPlanNode { ParquetSource::new(table_schema).with_table_parquet_options(options); if let Some(predicate) = predicate { - source = source.with_predicate(&predicate); + source = source.with_predicate(predicate); } let base_config = parse_protobuf_file_scan_config( base_conf, diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index eafbaf7dbeb4..73f39eaa7bf9 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -887,7 +887,7 @@ fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> { let file_source = Arc::new( ParquetSource::new(Arc::clone(&file_schema)) .with_table_parquet_options(options) - .with_predicate(&(predicate as Arc)), + .with_predicate(predicate), ); let scan_config = @@ -947,7 +947,7 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { let file_source = Arc::new( ParquetSource::new(Arc::clone(&file_schema)) - .with_predicate(&(custom_predicate_expr as Arc)), + .with_predicate(custom_predicate_expr), ); let scan_config =