Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,11 @@ impl SessionStateBuilder {
.get_or_insert_with(Vec::new)
.extend(SessionStateDefaults::default_expr_planners());

let analyzer = self.analyzer.get_or_insert_with(Analyzer::default);
for rewrite in SessionStateDefaults::default_function_rewrites() {
analyzer.add_function_rewrite(rewrite);
}

self.scalar_functions
.get_or_insert_with(Vec::new)
.extend(SessionStateDefaults::default_scalar_functions());
Expand Down
11 changes: 11 additions & 0 deletions datafusion/core/src/execution/session_state_defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use datafusion_catalog::{MemoryCatalogProvider, MemorySchemaProvider};
use datafusion_execution::config::SessionConfig;
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_execution::runtime_env::RuntimeEnv;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::planner::ExprPlanner;
use datafusion_expr::registry::ExtensionTypeRegistrationRef;
use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
Expand Down Expand Up @@ -102,6 +103,16 @@ impl SessionStateDefaults {
expr_planners
}

/// returns the list of default [`FunctionRewrite`]s installed on the analyzer.
pub fn default_function_rewrites() -> Vec<Arc<dyn FunctionRewrite + Send + Sync>> {
let rewrites: Vec<Arc<dyn FunctionRewrite + Send + Sync>> = vec![
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::concat_rewrite::ConcatArrayRewrite),
];

rewrites
}

/// returns the list of default [`ScalarUDF`]s
pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
#[cfg_attr(not(feature = "nested_expressions"), expect(unused_mut))]
Expand Down
86 changes: 86 additions & 0 deletions datafusion/functions-nested/src/concat_rewrite.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! [`ConcatArrayRewrite`] rewrites `concat(array, ...)` to `array_concat(array, ...)`.

use std::any::Any;

use arrow::datatypes::DataType;
use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::Transformed;
use datafusion_common::{DFSchema, Result, plan_err};
use datafusion_expr::Expr;
use datafusion_expr::ExprSchemable;
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_functions::string::concat::ConcatFunc;

use crate::concat::array_concat;

/// [`FunctionRewrite`] that turns `concat(array, ...)` into
/// `array_concat(array, ...)` at the analyzer phase.
///
/// `concat` calls with only non-array arguments are left unchanged.
/// Mixed array and non-array arguments are rejected with a plan error.
#[derive(Debug, Default)]
pub struct ConcatArrayRewrite;

impl FunctionRewrite for ConcatArrayRewrite {
fn name(&self) -> &str {
"concat_array_rewrite"
}

fn rewrite(
&self,
expr: Expr,
schema: &DFSchema,
_config: &ConfigOptions,
) -> Result<Transformed<Expr>> {
let Expr::ScalarFunction(ScalarFunction { func, args }) = &expr else {
return Ok(Transformed::no(expr));
};
if !(func.inner().as_ref() as &dyn Any).is::<ConcatFunc>() {
return Ok(Transformed::no(expr));
}

let mut any_list = false;
let mut any_non_list = false;
for arg in args {
match arg.get_type(schema)? {
DataType::List(_)
| DataType::LargeList(_)
| DataType::FixedSizeList(_, _) => any_list = true,
DataType::Null => {}
_ => any_non_list = true,
}
}

if !any_list {
return Ok(Transformed::no(expr));
}
if any_non_list {
return plan_err!(
"Cannot mix array and non-array arguments in concat function"
);
}

let Expr::ScalarFunction(ScalarFunction { args, .. }) = expr else {
unreachable!("already matched above")
};
Ok(Transformed::yes(array_concat(args)))
}
}
5 changes: 5 additions & 0 deletions datafusion/functions-nested/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub mod array_has;
pub mod arrays_zip;
pub mod cardinality;
pub mod concat;
pub mod concat_rewrite;
pub mod dimension;
pub mod distance;
pub mod empty;
Expand Down Expand Up @@ -192,6 +193,10 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
Ok(()) as Result<()>
})?;

// Register the analyzer rewrite that turns `concat(array, ...)` into
// `array_concat(...)`.
registry.register_function_rewrite(Arc::new(concat_rewrite::ConcatArrayRewrite))?;

Ok(())
}

Expand Down
82 changes: 82 additions & 0 deletions datafusion/sqllogictest/test_files/array/array_concat.slt
Original file line number Diff line number Diff line change
Expand Up @@ -388,4 +388,86 @@ select array_concat(make_array(column3), column1, column2) from arrays_values_v2
[NULL]


## concat() delegates to array_concat when all arguments are arrays.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth adding tests for mixed list types? e.g.,

concat(List, LargeList) -> LargeList
concat(FSL, List) -> FSL

Copy link
Copy Markdown
Contributor Author

@hcrosse hcrosse Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I added FSL + List, but skipped List + LargeList, since array_concat itself errors on that on main. Filed #21702.

## An analyzer-phase FunctionRewrite rewrites the call before execution, so
## the string concat path never sees array inputs.

query ?
select concat(make_array(1, 2, 3), make_array(4, 5));
----
[1, 2, 3, 4, 5]

query ?
select concat(make_array(1, 2), make_array(3, 4), make_array(5, 6));
----
[1, 2, 3, 4, 5, 6]

query ?
select concat(make_array(1, NULL, 3), make_array(4));
----
[1, NULL, 3, 4]

query ?
select concat(make_array('a', 'b'), make_array('c', 'd'));
----
[a, b, c, d]

query ?
select concat(NULL::integer[], make_array(1, 2));
----
[1, 2]

query ?
select concat(make_array(1, 2), NULL::integer[]);
----
[1, 2]

query ?
select concat(column1, column2) from arrays_values_v2;
----
[NULL, 2, 3, 4, 5, NULL]
[7, NULL, 8]
[9, NULL, 10]
[NULL, 1, NULL, 21]
[11, 12]
NULL

query ?
select concat(
arrow_cast(['1', '2'], 'LargeList(Utf8)'),
arrow_cast(['3'], 'LargeList(Utf8)')
);
----
[1, 2, 3]

query ?
select concat(
arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'),
arrow_cast(['3'], 'FixedSizeList(1, Utf8)')
);
----
[1, 2, 3]

query ?
select concat(NULL::integer[], NULL::integer[]);
----
NULL

# Mixed list variants are coerced by array_concat's own coerce_types
# rules (same result as calling array_concat directly).
query ?T
select
concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), make_array(3, 4)) as v,
arrow_typeof(concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), make_array(3, 4))) as t;
----
[1, 2, 3, 4] List(Int64)

# Mixed array + non-array arguments are rejected at plan time.
statement error Cannot mix array and non-array arguments in concat function
select concat(make_array(1), 'x');

statement error Cannot mix array and non-array arguments in concat function
select concat('x', make_array(1));


include ./cleanup.slt.part
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
initial_logical_plan
01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
02)--TableScan: simple_explain_test
logical_plan after apply_function_rewrites SAME TEXT AS ABOVE
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
Expand Down Expand Up @@ -548,6 +549,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
initial_logical_plan
01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
02)--TableScan: simple_explain_test
logical_plan after apply_function_rewrites SAME TEXT AS ABOVE
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
Expand Down
Loading