apache · 2010YOUY01 · Nov 22, 2025 · Nov 21, 2025 · 2010YOUY01 · Nov 21, 2025
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -236,8 +236,7 @@ impl GroupValues for GroupValuesRows {
         // https://github.com/apache/datafusion/issues/7647
         for (field, array) in self.schema.fields.iter().zip(&mut output) {
             let expected = field.data_type();
-            *array =
-                dictionary_encode_if_necessary(Arc::<dyn Array>::clone(array), expected)?;
+            *array = dictionary_encode_if_necessary(array, expected)?;
         }
 
         self.group_values = Some(group_values);
@@ -259,7 +258,7 @@ impl GroupValues for GroupValuesRows {
 }
 
 fn dictionary_encode_if_necessary(
-    array: ArrayRef,
+    array: &ArrayRef,
     expected: &DataType,
 ) -> Result<ArrayRef> {
     match (expected, array.data_type()) {
@@ -269,10 +268,7 @@ fn dictionary_encode_if_necessary(
                 .iter()
                 .zip(struct_array.columns())
                 .map(|(expected_field, column)| {
-                    dictionary_encode_if_necessary(
-                        Arc::<dyn Array>::clone(column),
-                        expected_field.data_type(),
-                    )
+                    dictionary_encode_if_necessary(column, expected_field.data_type())
                 })
                 .collect::<Result<Vec<_>>>()?;
 
@@ -289,13 +285,13 @@ fn dictionary_encode_if_necessary(
                 Arc::<arrow::datatypes::Field>::clone(expected_field),
                 list.offsets().clone(),
                 dictionary_encode_if_necessary(
-                    Arc::<dyn Array>::clone(list.values()),
+                    list.values(),
                     expected_field.data_type(),
                 )?,
                 list.nulls().cloned(),
             )?))
         }
         (DataType::Dictionary(_, _), _) => Ok(cast(array.as_ref(), expected)?),
-        (_, _) => Ok(Arc::<dyn Array>::clone(&array)),
+        (_, _) => Ok(Arc::<dyn Array>::clone(array)),
     }
 }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -626,7 +626,7 @@ impl AggregateExec {
     fn execute_typed(
         &self,
         partition: usize,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
     ) -> Result<StreamType> {
         // no group by at all
         if self.group_by.expr.is_empty() {
@@ -761,7 +761,7 @@ impl AggregateExec {
         &self.input_order_mode
     }
 
-    fn statistics_inner(&self, child_statistics: Statistics) -> Result<Statistics> {
+    fn statistics_inner(&self, child_statistics: &Statistics) -> Result<Statistics> {
         // TODO stats: group expressions:
         // - once expressions will be able to compute their own stats, use it here
         // - case where we group by on a column for which with have the `distinct` stat
@@ -1020,7 +1020,7 @@ impl ExecutionPlan for AggregateExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        self.execute_typed(partition, context)
+        self.execute_typed(partition, &context)
             .map(|stream| stream.into())
     }
 
@@ -1033,7 +1033,8 @@ impl ExecutionPlan for AggregateExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
-        self.statistics_inner(self.input().partition_statistics(partition)?)
+        let child_statistics = self.input().partition_statistics(partition)?;
+        self.statistics_inner(&child_statistics)
     }
 
     fn cardinality_effect(&self) -> CardinalityEffect {
@@ -2220,7 +2221,7 @@ mod tests {
                 Arc::clone(&input_schema),
             )?);
 
-            let stream = partial_aggregate.execute_typed(0, Arc::clone(&task_ctx))?;
+            let stream = partial_aggregate.execute_typed(0, &task_ctx)?;
 
             // ensure that we really got the version we wanted
             match version {

diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
@@ -68,14 +68,14 @@ impl AggregateStream {
     /// Create a new AggregateStream
     pub fn new(
         agg: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
         let agg_schema = Arc::clone(&agg.schema);
         let agg_filter_expr = agg.filter_expr.clone();
 
         let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
-        let input = agg.input.execute(partition, Arc::clone(&context))?;
+        let input = agg.input.execute(partition, Arc::clone(context))?;
 
         let aggregate_expressions = aggregate_expressions(&agg.aggr_expr, &agg.mode, 0)?;
         let filter_expressions = match agg.mode {
@@ -115,7 +115,7 @@ impl AggregateStream {
                         let timer = elapsed_compute.timer();
                         let result = aggregate_batch(
                             &this.mode,
-                            batch,
+                            &batch,
                             &mut this.accumulators,
                             &this.aggregate_expressions,
                             &this.filter_expressions,
@@ -195,7 +195,7 @@ impl RecordBatchStream for AggregateStream {
 /// TODO: Make this a member function
 fn aggregate_batch(
     mode: &AggregateMode,
-    batch: RecordBatch,
+    batch: &RecordBatch,
     accumulators: &mut [AccumulatorItem],
     expressions: &[Vec<Arc<dyn PhysicalExpr>>],
     filters: &[Option<Arc<dyn PhysicalExpr>>],
@@ -215,8 +215,8 @@ fn aggregate_batch(
         .try_for_each(|((accum, expr), filter)| {
             // 1.2
             let batch = match filter {
-                Some(filter) => Cow::Owned(batch_filter(&batch, filter)?),
-                None => Cow::Borrowed(&batch),
+                Some(filter) => Cow::Owned(batch_filter(batch, filter)?),
+                None => Cow::Borrowed(batch),
             };
 
             // 1.3

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -445,7 +445,7 @@ impl GroupedHashAggregateStream {
     /// Create a new GroupedHashAggregateStream
     pub fn new(
         agg: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
         debug!("Creating GroupedHashAggregateStream");
@@ -454,7 +454,7 @@ impl GroupedHashAggregateStream {
         let agg_filter_expr = agg.filter_expr.clone();
 
         let batch_size = context.session_config().batch_size();
-        let input = agg.input.execute(partition, Arc::clone(&context))?;
+        let input = agg.input.execute(partition, Arc::clone(context))?;
         let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
         let group_by_metrics = GroupByMetrics::new(&agg.metrics, partition);
 
@@ -685,7 +685,7 @@ impl Stream for GroupedHashAggregateStream {
                             }
 
                             // Do the grouping
-                            self.group_aggregate_batch(batch)?;
+                            self.group_aggregate_batch(&batch)?;
 
                             self.update_skip_aggregation_probe(input_rows);
 
@@ -728,7 +728,7 @@ impl Stream for GroupedHashAggregateStream {
                             self.spill_previous_if_necessary(&batch)?;
 
                             // Do the grouping
-                            self.group_aggregate_batch(batch)?;
+                            self.group_aggregate_batch(&batch)?;
 
                             // If we can begin emitting rows, do so,
                             // otherwise keep consuming input
@@ -777,7 +777,7 @@ impl Stream for GroupedHashAggregateStream {
                             if let Some(probe) = self.skip_aggregation_probe.as_mut() {
                                 probe.record_skipped(&batch);
                             }
-                            let states = self.transform_to_states(batch)?;
+                            let states = self.transform_to_states(&batch)?;
                             return Poll::Ready(Some(Ok(
                                 states.record_output(&self.baseline_metrics)
                             )));
@@ -854,12 +854,12 @@ impl RecordBatchStream for GroupedHashAggregateStream {
 
 impl GroupedHashAggregateStream {
     /// Perform group-by aggregation for the given [`RecordBatch`].
-    fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<()> {
+    fn group_aggregate_batch(&mut self, batch: &RecordBatch) -> Result<()> {
         // Evaluate the grouping expressions
         let group_by_values = if self.spill_state.is_stream_merging {
-            evaluate_group_by(&self.spill_state.merging_group_by, &batch)?
+            evaluate_group_by(&self.spill_state.merging_group_by, batch)?
         } else {
-            evaluate_group_by(&self.group_by, &batch)?
+            evaluate_group_by(&self.group_by, batch)?
         };
 
         // Only create the timer if there are actual aggregate arguments to evaluate
@@ -876,18 +876,18 @@ impl GroupedHashAggregateStream {
 
         // Evaluate the aggregation expressions.
         let input_values = if self.spill_state.is_stream_merging {
-            evaluate_many(&self.spill_state.merging_aggregate_arguments, &batch)?
+            evaluate_many(&self.spill_state.merging_aggregate_arguments, batch)?
         } else {
-            evaluate_many(&self.aggregate_arguments, &batch)?
+            evaluate_many(&self.aggregate_arguments, batch)?
         };
         drop(timer);
 
         // Evaluate the filter expressions, if any, against the inputs
         let filter_values = if self.spill_state.is_stream_merging {
             let filter_expressions = vec![None; self.accumulators.len()];
-            evaluate_optional(&filter_expressions, &batch)?
+            evaluate_optional(&filter_expressions, batch)?
         } else {
-            evaluate_optional(&self.filter_expressions, &batch)?
+            evaluate_optional(&self.filter_expressions, batch)?
         };
 
         for group_values in &group_by_values {
@@ -1217,10 +1217,10 @@ impl GroupedHashAggregateStream {
     }
 
     /// Transforms input batch to intermediate aggregate state, without grouping it
-    fn transform_to_states(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let mut group_values = evaluate_group_by(&self.group_by, &batch)?;
-        let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
-        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
+    fn transform_to_states(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let mut group_values = evaluate_group_by(&self.group_by, batch)?;
+        let input_values = evaluate_many(&self.aggregate_arguments, batch)?;
+        let filter_values = evaluate_optional(&self.filter_expressions, batch)?;
 
         assert_eq_or_internal_err!(
             group_values.len(),

diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs
@@ -326,13 +326,7 @@ impl<VAL: ValueType> TopKHeap<VAL> {
         }
     }
 
-    fn _tree_print(
-        &self,
-        idx: usize,
-        prefix: String,
-        is_tail: bool,
-        output: &mut String,
-    ) {
+    fn _tree_print(&self, idx: usize, prefix: &str, is_tail: bool, output: &mut String) {
         if let Some(Some(hi)) = self.heap.get(idx) {
             let connector = if idx != 0 {
                 if is_tail {
@@ -357,10 +351,10 @@ impl<VAL: ValueType> TopKHeap<VAL> {
             let right_exists = right_idx < self.len;
 
             if left_exists {
-                self._tree_print(left_idx, child_prefix.clone(), !right_exists, output);
+                self._tree_print(left_idx, &child_prefix, !right_exists, output);
             }
             if right_exists {
-                self._tree_print(right_idx, child_prefix, true, output);
+                self._tree_print(right_idx, &child_prefix, true, output);
             }
         }
     }
@@ -370,7 +364,7 @@ impl<VAL: ValueType> Display for TopKHeap<VAL> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         let mut output = String::new();
         if !self.heap.is_empty() {
-            self._tree_print(0, String::new(), true, &mut output);
+            self._tree_print(0, "", true, &mut output);
         }
         write!(f, "{output}")
     }

diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs
@@ -54,13 +54,13 @@ pub struct GroupedTopKAggregateStream {
 impl GroupedTopKAggregateStream {
     pub fn new(
         aggr: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
         limit: usize,
     ) -> Result<Self> {
         let agg_schema = Arc::clone(&aggr.schema);
         let group_by = aggr.group_by.clone();
-        let input = aggr.input.execute(partition, Arc::clone(&context))?;
+        let input = aggr.input.execute(partition, Arc::clone(context))?;
         let baseline_metrics = BaselineMetrics::new(&aggr.metrics, partition);
         let group_by_metrics = GroupByMetrics::new(&aggr.metrics, partition);
         let aggregate_arguments =
@@ -97,11 +97,12 @@ impl RecordBatchStream for GroupedTopKAggregateStream {
 }
 
 impl GroupedTopKAggregateStream {
-    fn intern(&mut self, ids: ArrayRef, vals: ArrayRef) -> Result<()> {
+    fn intern(&mut self, ids: &ArrayRef, vals: &ArrayRef) -> Result<()> {
         let _timer = self.group_by_metrics.time_calculating_group_ids.timer();
 
         let len = ids.len();
-        self.priority_map.set_batch(ids, Arc::clone(&vals));
+        self.priority_map
+            .set_batch(Arc::clone(ids), Arc::clone(vals));
 
         let has_nulls = vals.null_count() > 0;
         for row_idx in 0..len {
@@ -167,7 +168,7 @@ impl Stream for GroupedTopKAggregateStream {
                     let input_values = Arc::clone(&input_values[0][0]);
 
                     // iterate over each column of group_by values
-                    (*self).intern(group_by_values, input_values)?;
+                    (*self).intern(&group_by_values, &input_values)?;
                 }
                 // inner is done, emit all rows and switch to producing output
                 None => {

diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
@@ -206,8 +206,8 @@ impl ExecutionPlan for AnalyzeExec {
                 show_statistics,
                 total_rows,
                 duration,
-                captured_input,
-                captured_schema,
+                &captured_input,
+                &captured_schema,
                 &metric_types,
             )
         };
@@ -225,8 +225,8 @@ fn create_output_batch(
     show_statistics: bool,
     total_rows: usize,
     duration: std::time::Duration,
-    input: Arc<dyn ExecutionPlan>,
-    schema: SchemaRef,
+    input: &Arc<dyn ExecutionPlan>,
+    schema: &SchemaRef,
     metric_types: &[MetricType],
 ) -> Result<RecordBatch> {
     let mut type_builder = StringBuilder::with_capacity(1, 1024);
@@ -262,7 +262,7 @@ fn create_output_batch(
     }
 
     RecordBatch::try_new(
-        schema,
+        Arc::clone(schema),
         vec![
             Arc::new(type_builder.finish()),
             Arc::new(plan_builder.finish()),

diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
@@ -1118,6 +1118,7 @@ pub fn check_default_invariants<P: ExecutionPlan + ?Sized>(
 ///     1. RepartitionExec for changing the partition number between two `ExecutionPlan`s
 ///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
 ///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
+#[expect(clippy::needless_pass_by_value)]
 pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
     plan.properties().evaluation_type == EvaluationType::Eager
 }
@@ -1172,6 +1173,10 @@ pub async fn collect(
 ///
 /// Dropping the stream will abort the execution of the query, and free up
 /// any allocated resources
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_stream(
     plan: Arc<dyn ExecutionPlan>,
     context: Arc<TaskContext>,
@@ -1236,6 +1241,10 @@ pub async fn collect_partitioned(
 ///
 /// Dropping the stream will abort the execution of the query, and free up
 /// any allocated resources
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_stream_partitioned(
     plan: Arc<dyn ExecutionPlan>,
     context: Arc<TaskContext>,
@@ -1267,6 +1276,10 @@ pub fn execute_stream_partitioned(
 /// violate the `not null` constraints specified in the `sink_schema`. If there are
 /// such columns, it wraps the resulting stream to enforce the `not null` constraints
 /// by invoking the [`check_not_null_constraints`] function on each batch of the stream.
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_input_stream(
     input: Arc<dyn ExecutionPlan>,
     sink_schema: SchemaRef,