diff --git a/datafusion-examples/examples/custom_data_source/custom_datasource.rs b/datafusion-examples/examples/custom_data_source/custom_datasource.rs index 5b2438354710e..b276ae32cf247 100644 --- a/datafusion-examples/examples/custom_data_source/custom_datasource.rs +++ b/datafusion-examples/examples/custom_data_source/custom_datasource.rs @@ -202,8 +202,7 @@ impl CustomExec { schema: SchemaRef, db: CustomDataSource, ) -> Self { - let projected_schema = - project_schema(&schema, projections.map(|v| v.as_ref())).unwrap(); + let projected_schema = project_schema(&schema, projections).unwrap(); let cache = Self::compute_properties(projected_schema.clone()); Self { db, diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index e081280825135..38456944075fc 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -522,7 +522,7 @@ impl TableProvider for ListingTable { // if no files need to be read, return an `EmptyExec` if partitioned_file_lists.is_empty() { - let projected_schema = project_schema(&self.schema(), projection.as_deref())?; + let projected_schema = project_schema(&self.schema(), projection.as_ref())?; return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema)))); } diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index 8511712c4837b..b8a9516aca9c3 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -391,8 +391,12 @@ impl Statistics { /// For example, if we had statistics for columns `{"a", "b", "c"}`, /// projecting to `vec![2, 1]` would return statistics for columns `{"c", /// "b"}`. - pub fn project(mut self, projection: Option<&[usize]>) -> Self { - let Some(projection) = projection.map(AsRef::as_ref) else { + pub fn project>(self, p: Option<&P>) -> Self { + self.project_inner(p.as_ref().map(|p| p.as_ref())) + } + + pub fn project_inner(mut self, projection: Option<&[usize]>) -> Self { + let Some(projection) = projection else { return self; }; @@ -1066,29 +1070,29 @@ mod tests { #[test] fn test_project_none() { - let projection: Option<&[usize]> = None; - let stats = make_stats(vec![10, 20, 30]).project(projection); + let projection: Option> = None; + let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref()); assert_eq!(stats, make_stats(vec![10, 20, 30])); } #[test] fn test_project_empty() { let projection = Some(vec![]); - let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref()); + let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref()); assert_eq!(stats, make_stats(vec![])); } #[test] fn test_project_swap() { let projection = Some(vec![2, 1]); - let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref()); + let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref()); assert_eq!(stats, make_stats(vec![30, 20])); } #[test] fn test_project_repeated() { let projection = Some(vec![1, 2, 1, 1, 0, 2]); - let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref()); + let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref()); assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30])); } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index c22709af94814..6d127ccf4015a 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -59,7 +59,7 @@ use std::thread::available_parallelism; /// /// // Pick columns 'c' and 'b' /// let projection = Some(vec![2, 1]); -/// let projected_schema = project_schema(&schema, projection.as_deref()).unwrap(); +/// let projected_schema = project_schema(&schema, projection.as_ref()).unwrap(); /// /// let expected_schema = SchemaRef::new(Schema::new(vec![ /// Field::new("c", DataType::Utf8, true), @@ -68,12 +68,12 @@ use std::thread::available_parallelism; /// /// assert_eq!(projected_schema, expected_schema); /// ``` -pub fn project_schema( +pub fn project_schema>( schema: &SchemaRef, - projection: Option<&[usize]>, + projection: Option

, ) -> Result { - let schema = match projection.map(AsRef::as_ref) { - Some(columns) => Arc::new(schema.project(columns)?), + let schema = match projection { + Some(columns) => Arc::new(schema.project(columns.as_ref())?), None => Arc::clone(schema), }; Ok(schema) diff --git a/datafusion/core/src/datasource/empty.rs b/datafusion/core/src/datasource/empty.rs index 882abd921a155..5aeca92b1626d 100644 --- a/datafusion/core/src/datasource/empty.rs +++ b/datafusion/core/src/datasource/empty.rs @@ -77,8 +77,7 @@ impl TableProvider for EmptyTable { _limit: Option, ) -> Result> { // even though there is no data, projections apply - let projected_schema = - project_schema(&self.schema, projection.map(AsRef::as_ref))?; + let projected_schema = project_schema(&self.schema, projection)?; Ok(Arc::new( EmptyExec::new(projected_schema).with_partitions(self.partitions), )) diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index 17d5ff6469be1..8453615c2886b 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -86,7 +86,7 @@ impl CustomExecutionPlan { fn new(projection: Option>) -> Self { let schema = TEST_CUSTOM_SCHEMA_REF!(); let schema = - project_schema(&schema, projection.as_deref()).expect("projected schema"); + project_schema(&schema, projection.as_ref()).expect("projected schema"); let cache = Self::compute_properties(schema); Self { projection, cache } } diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs index 9219ae43120ae..9234a95591baa 100644 --- a/datafusion/core/tests/physical_optimizer/join_selection.rs +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -762,7 +762,7 @@ async fn test_hash_join_swap_on_joins_with_projections( "ProjectionExec won't be added above if HashJoinExec contains embedded projection", ); - assert_eq!(swapped_join.projection.as_ref().unwrap(), [0_usize]); + assert_eq!(swapped_join.projection, Some(vec![0_usize])); assert_eq!(swapped.schema().fields.len(), 1); assert_eq!(swapped.schema().fields[0].name(), "small_col"); Ok(()) diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs index 3fc388cd3c4ad..1d12bb3200309 100644 --- a/datafusion/datasource/src/memory.rs +++ b/datafusion/datasource/src/memory.rs @@ -262,7 +262,7 @@ impl MemorySourceConfig { schema: SchemaRef, projection: Option>, ) -> Result { - let projected_schema = project_schema(&schema, projection.as_deref())?; + let projected_schema = project_schema(&schema, projection.as_ref())?; Ok(Self { partitions: partitions.to_vec(), schema, diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs index 7039308bfe298..bb247fb1d5386 100644 --- a/datafusion/physical-expr/src/projection.rs +++ b/datafusion/physical-expr/src/projection.rs @@ -855,13 +855,13 @@ impl OptionProjectionRef { /// Applies an optional projection to a [`SchemaRef`], returning the /// projected schema. pub fn project_schema(&self, schema: &SchemaRef) -> Result { - project_schema(schema, self.inner.as_deref()) + project_schema(schema, self.inner.as_ref()) } /// Applies an optional projection to a [`Statistics`], returning the /// projected stats. pub fn project_statistics(&self, stats: Statistics) -> Statistics { - stats.project(self.inner.as_deref()) + stats.project(self.inner.as_ref()) } } diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index c623ac886386d..6043c81d148ff 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -23,19 +23,6 @@ **Note:** DataFusion `53.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version. -### Schema, statistics project fn take an option slice instead of Vec ref - -`project_schema` and `Statistics::project` now take `Option<&[usize]>` instead of `Option<&Vec>`. - -To convert `Option<&Vec>` into `Option<&[usize]>` you can use `map(|v| v.as_ref())` call, -for example: - -```diff -- let projected_schema = project_schema(&schema, projections)?; -+ let projected_schema = -+ project_schema(&schema, projections.map(|v| v.as_ref()))?; -``` - ### `SimplifyInfo` trait removed, `SimplifyContext` now uses builder-style API The `SimplifyInfo` trait has been removed and replaced with the concrete `SimplifyContext` struct. This simplifies the expression simplification API and removes the need for trait objects.