From 14e145a13aacad4bc01ef6fcd5ec3f96ae6b9594 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 5 Oct 2022 18:45:48 +0800 Subject: [PATCH 1/3] concat type coercion Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/type_coercion.rs | 67 ++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index c7f107b5ec88..4421f53b7333 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -26,8 +26,8 @@ use datafusion_expr::logical_plan::Subquery; use datafusion_expr::type_coercion::data_types; use datafusion_expr::utils::from_plan; use datafusion_expr::{ - is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, Expr, - LogicalPlan, Operator, + is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, + BuiltinScalarFunction, Expr, LogicalPlan, Operator, }; use datafusion_expr::{ExprSchemable, Signature}; use std::sync::Arc; @@ -398,6 +398,20 @@ impl ExprRewriter for TypeCoercionRewriter { } } } + Expr::ScalarFunction { fun, args } => match fun { + BuiltinScalarFunction::Concat + | BuiltinScalarFunction::ConcatWithSeparator => { + let new_args = args + .iter() + .map(|e| e.clone().cast_to(&DataType::Utf8, &self.schema)) + .collect::>>()?; + Ok(Expr::ScalarFunction { + fun, + args: new_args, + }) + } + fun => Ok(Expr::ScalarFunction { fun, args }), + }, expr => Ok(expr), } } @@ -816,6 +830,55 @@ mod test { Ok(()) } + #[test] + fn concat_for_type_coercion() -> Result<()> { + use datafusion_expr::BuiltinScalarFunction::{Concat, ConcatWithSeparator}; + + let empty = empty_with_type(DataType::Utf8); + let args = vec![col("a"), lit("b"), lit(true), lit(false), lit(13)]; + + // concat + { + let expr = Expr::ScalarFunction { + fun: Concat, + args: args.clone(), + }; + + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr], + empty.clone(), + None, + )?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: concat(#a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", + &format!("{:?}", plan) + ); + } + + // concat_ws + { + let expr = Expr::ScalarFunction { + fun: ConcatWithSeparator, + args, + }; + + let plan = + LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: concatwithseparator(#a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", + &format!("{:?}", plan) + ); + } + + Ok(()) + } + fn empty() -> Arc { Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, From a459c5889ad47385d6aa5c57c9dc6abc617af8ac Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 5 Oct 2022 20:01:19 +0800 Subject: [PATCH 2/3] fix test Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/type_coercion.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index ab78ffa84e13..95efaae0f65f 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -853,7 +853,7 @@ mod test { let mut config = OptimizerConfig::default(); let plan = rule.optimize(&plan, &mut config).unwrap(); assert_eq!( - "Projection: concat(#a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", + "Projection: concat(a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", &format!("{:?}", plan) ); } @@ -871,7 +871,7 @@ mod test { let mut config = OptimizerConfig::default(); let plan = rule.optimize(&plan, &mut config).unwrap(); assert_eq!( - "Projection: concatwithseparator(#a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", + "Projection: concatwithseparator(a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", &format!("{:?}", plan) ); } From 354be1117eea095610c00112da63dc2ca7e4edd7 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Thu, 6 Oct 2022 09:47:12 +0800 Subject: [PATCH 3/3] use builder function Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/type_coercion.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index 95efaae0f65f..6854ed4f686d 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -497,7 +497,7 @@ mod test { use arrow::datatypes::DataType; use datafusion_common::{DFField, DFSchema, Result, ScalarValue}; use datafusion_expr::expr_rewriter::ExprRewritable; - use datafusion_expr::{cast, col, is_true, ColumnarValue}; + use datafusion_expr::{cast, col, concat, concat_ws, is_true, ColumnarValue}; use datafusion_expr::{ lit, logical_plan::{EmptyRelation, Projection}, @@ -832,17 +832,12 @@ mod test { #[test] fn concat_for_type_coercion() -> Result<()> { - use datafusion_expr::BuiltinScalarFunction::{Concat, ConcatWithSeparator}; - let empty = empty_with_type(DataType::Utf8); - let args = vec![col("a"), lit("b"), lit(true), lit(false), lit(13)]; + let args = [col("a"), lit("b"), lit(true), lit(false), lit(13)]; // concat { - let expr = Expr::ScalarFunction { - fun: Concat, - args: args.clone(), - }; + let expr = concat(&args); let plan = LogicalPlan::Projection(Projection::try_new( vec![expr], @@ -860,10 +855,7 @@ mod test { // concat_ws { - let expr = Expr::ScalarFunction { - fun: ConcatWithSeparator, - args, - }; + let expr = concat_ws("-", &args); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); @@ -871,7 +863,7 @@ mod test { let mut config = OptimizerConfig::default(); let plan = rule.optimize(&plan, &mut config).unwrap(); assert_eq!( - "Projection: concatwithseparator(a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", + "Projection: concatwithseparator(Utf8(\"-\"), a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation", &format!("{:?}", plan) ); }