From 5d501a7c40ed8e5be1bf6e9d7f9c1ccf61548479 Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 9 May 2023 17:20:05 +0800 Subject: [PATCH] refactor: Expr::InSubquery to use a struct --- .../core/src/datasource/listing/helpers.rs | 2 +- datafusion/core/src/physical_plan/planner.rs | 2 +- datafusion/expr/src/expr.rs | 45 +++++++++++------ datafusion/expr/src/expr_fn.rs | 24 +++++----- datafusion/expr/src/expr_schema.rs | 6 +-- datafusion/expr/src/logical_plan/plan.rs | 5 +- datafusion/expr/src/tree_node/expr.rs | 13 ++--- datafusion/expr/src/utils.rs | 4 +- datafusion/optimizer/README.md | 2 +- .../src/analyzer/count_wildcard_rule.rs | 14 +++--- .../src/analyzer/inline_table_scan.rs | 13 +++-- datafusion/optimizer/src/analyzer/mod.rs | 3 +- .../optimizer/src/analyzer/type_coercion.rs | 48 +++++++++---------- .../optimizer/src/decorrelate_where_in.rs | 5 +- datafusion/optimizer/src/push_down_filter.rs | 2 +- .../simplify_expressions/expr_simplifier.rs | 10 ++-- datafusion/proto/src/logical_plan/to_proto.rs | 4 +- datafusion/sql/src/expr/subquery.rs | 7 +-- datafusion/sql/src/utils.rs | 14 +++--- 19 files changed, 119 insertions(+), 104 deletions(-) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index f4995f741179..427940d4b7ad 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -90,7 +90,7 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { | Expr::SimilarTo { .. } | Expr::InList { .. } | Expr::Exists { .. } - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::GetIndexedField { .. } | Expr::GroupingSet(_) diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index ebbd40fd2570..7f99b4e57462 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -258,7 +258,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Expr::Exists { .. } => Err(DataFusionError::NotImplemented( "EXISTS is not yet supported in the physical plan".to_string(), )), - Expr::InSubquery { .. } => Err(DataFusionError::NotImplemented( + Expr::InSubquery(_) => Err(DataFusionError::NotImplemented( "IN subquery is not yet supported in the physical plan".to_string(), )), Expr::ScalarSubquery(_) => Err(DataFusionError::NotImplemented( diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c182e80f5fae..8b6562285b57 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -165,14 +165,7 @@ pub enum Expr { /// EXISTS subquery Exists(Exists), /// IN subquery - InSubquery { - /// The expression to compare - expr: Box, - /// subquery that will produce a single column of data to compare against - subquery: Subquery, - /// Whether the expression is negated - negated: bool, - }, + InSubquery(InSubquery), /// Scalar subquery ScalarSubquery(Subquery), /// Represents a reference to all fields in a schema. @@ -547,6 +540,28 @@ impl InList { } } +/// IN subquery +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct InSubquery { + /// The expression to compare + pub expr: Box, + /// Subquery that will produce a single column of data to compare against + pub subquery: Subquery, + /// Whether the expression is negated + pub negated: bool, +} + +impl InSubquery { + /// Create a new InSubquery expression + pub fn new(expr: Box, subquery: Subquery, negated: bool) -> Self { + Self { + expr, + subquery, + negated, + } + } +} + /// Grouping sets /// See /// for Postgres definition. @@ -636,7 +651,7 @@ impl Expr { Expr::GetIndexedField { .. } => "GetIndexedField", Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", - Expr::InSubquery { .. } => "InSubquery", + Expr::InSubquery(..) => "InSubquery", Expr::IsNotNull(..) => "IsNotNull", Expr::IsNull(..) => "IsNull", Expr::Like { .. } => "Like", @@ -956,16 +971,16 @@ impl fmt::Debug for Expr { subquery, negated: false, }) => write!(f, "EXISTS ({subquery:?})"), - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated: true, - } => write!(f, "{expr:?} NOT IN ({subquery:?})"), - Expr::InSubquery { + }) => write!(f, "{expr:?} NOT IN ({subquery:?})"), + Expr::InSubquery(InSubquery { expr, subquery, negated: false, - } => write!(f, "{expr:?} IN ({subquery:?})"), + }) => write!(f, "{expr:?} IN ({subquery:?})"), Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"), Expr::BinaryExpr(expr) => write!(f, "{expr}"), Expr::Sort(Sort { @@ -1334,8 +1349,8 @@ fn create_name(e: &Expr) -> Result { } Expr::Exists(Exists { negated: true, .. }) => Ok("NOT EXISTS".to_string()), Expr::Exists(Exists { negated: false, .. }) => Ok("EXISTS".to_string()), - Expr::InSubquery { negated: true, .. } => Ok("NOT IN".to_string()), - Expr::InSubquery { negated: false, .. } => Ok("IN".to_string()), + Expr::InSubquery(InSubquery { negated: true, .. }) => Ok("NOT IN".to_string()), + Expr::InSubquery(InSubquery { negated: false, .. }) => Ok("IN".to_string()), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).name().clone()) } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index f224942cd265..1d781e6f0bf3 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -18,8 +18,8 @@ //! Functions for creating logical expressions use crate::expr::{ - AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, ScalarFunction, - TryCast, + AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, + ScalarFunction, TryCast, }; use crate::{ aggregate_function, built_in_function, conditional_expressions::CaseBuilder, @@ -328,27 +328,27 @@ pub fn not_exists(subquery: Arc) -> Expr { /// Create an IN subquery expression pub fn in_subquery(expr: Expr, subquery: Arc) -> Expr { let outer_ref_columns = subquery.all_out_ref_exprs(); - Expr::InSubquery { - expr: Box::new(expr), - subquery: Subquery { + Expr::InSubquery(InSubquery::new( + Box::new(expr), + Subquery { subquery, outer_ref_columns, }, - negated: false, - } + false, + )) } /// Create a NOT IN subquery expression pub fn not_in_subquery(expr: Expr, subquery: Arc) -> Expr { let outer_ref_columns = subquery.all_out_ref_exprs(); - Expr::InSubquery { - expr: Box::new(expr), - subquery: Subquery { + Expr::InSubquery(InSubquery::new( + Box::new(expr), + Subquery { subquery, outer_ref_columns, }, - negated: true, - } + true, + )) } /// Create a scalar subquery expression diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 36dda847ab5e..8b6afdec94aa 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -18,7 +18,7 @@ use super::{Between, Expr, Like}; use crate::expr::{ AggregateFunction, AggregateUDF, BinaryExpr, Cast, GetIndexedField, InList, - ScalarFunction, ScalarUDF, Sort, TryCast, WindowFunction, + InSubquery, ScalarFunction, ScalarUDF, Sort, TryCast, WindowFunction, }; use crate::field_util::get_indexed_field; use crate::type_coercion::binary::get_result_type; @@ -133,7 +133,7 @@ impl ExprSchemable for Expr { Expr::Not(_) | Expr::IsNull(_) | Expr::Exists { .. } - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::Between { .. } | Expr::InList { .. } | Expr::IsNotNull(_) @@ -232,7 +232,7 @@ impl ExprSchemable for Expr { | Expr::IsNotUnknown(_) | Expr::Exists { .. } | Expr::Placeholder { .. } => Ok(true), - Expr::InSubquery { expr, .. } => expr.nullable(input_schema), + Expr::InSubquery(InSubquery { expr, .. }) => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index f1fdc6254656..fdd875ca84de 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -16,6 +16,7 @@ // under the License. use crate::expr::Exists; +use crate::expr::InSubquery; ///! Logical plan types use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor}; use crate::logical_plan::extension::UserDefinedLogicalNode; @@ -546,7 +547,7 @@ impl LogicalPlan { inspect_expr_pre(expr, |expr| { match expr { Expr::Exists(Exists { subquery, .. }) - | Expr::InSubquery { subquery, .. } + | Expr::InSubquery(InSubquery { subquery, .. }) | Expr::ScalarSubquery(subquery) => { // use a synthetic plan so the collector sees a // LogicalPlan::Subquery (even though it is @@ -572,7 +573,7 @@ impl LogicalPlan { inspect_expr_pre(expr, |expr| { match expr { Expr::Exists(Exists { subquery, .. }) - | Expr::InSubquery { subquery, .. } + | Expr::InSubquery(InSubquery { subquery, .. }) | Expr::ScalarSubquery(subquery) => { // use a synthetic plan so the visitor sees a // LogicalPlan::Subquery (even though it is diff --git a/datafusion/expr/src/tree_node/expr.rs b/datafusion/expr/src/tree_node/expr.rs index ecd90254dc71..7bc9edf4c28a 100644 --- a/datafusion/expr/src/tree_node/expr.rs +++ b/datafusion/expr/src/tree_node/expr.rs @@ -19,7 +19,8 @@ use crate::expr::{ AggregateFunction, AggregateUDF, Between, BinaryExpr, Case, Cast, GetIndexedField, - GroupingSet, InList, Like, ScalarFunction, ScalarUDF, Sort, TryCast, WindowFunction, + GroupingSet, InList, InSubquery, Like, ScalarFunction, ScalarUDF, Sort, TryCast, + WindowFunction, }; use crate::Expr; use datafusion_common::tree_node::VisitRecursion; @@ -45,7 +46,7 @@ impl TreeNode for Expr { | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) | Expr::Sort(Sort { expr, .. }) - | Expr::InSubquery { expr, .. } => vec![expr.as_ref().clone()], + | Expr::InSubquery(InSubquery{ expr, .. }) => vec![expr.as_ref().clone()], Expr::GetIndexedField(GetIndexedField { expr, .. }) => { vec![expr.as_ref().clone()] } @@ -149,15 +150,15 @@ impl TreeNode for Expr { Expr::Column(_) => self, Expr::OuterReferenceColumn(_, _) => self, Expr::Exists { .. } => self, - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated, - } => Expr::InSubquery { - expr: transform_boxed(expr, &mut transform)?, + }) => Expr::InSubquery(InSubquery::new( + transform_boxed(expr, &mut transform)?, subquery, negated, - }, + )), Expr::ScalarSubquery(_) => self, Expr::ScalarVariable(ty, names) => Expr::ScalarVariable(ty, names), Expr::Literal(value) => Expr::Literal(value), diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 7698297b79e9..06f9ccd11cc2 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -303,7 +303,7 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut HashSet) -> Result<()> { | Expr::AggregateUDF { .. } | Expr::InList { .. } | Expr::Exists { .. } - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::Wildcard | Expr::QualifiedWildcard { .. } @@ -704,7 +704,7 @@ pub fn from_plan( match expr { Expr::Exists { .. } | Expr::ScalarSubquery(_) - | Expr::InSubquery { .. } => { + | Expr::InSubquery(_) => { // subqueries could contain aliases so we don't recurse into those Ok(RewriteRecursion::Stop) } diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md index 01c6f6dd26ce..c8baae03efa2 100644 --- a/datafusion/optimizer/README.md +++ b/datafusion/optimizer/README.md @@ -201,7 +201,7 @@ fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec) -> Res impl ExpressionVisitor for InSubqueryVisitor<'_> { fn pre_visit(self, expr: &Expr) -> Result> { - if let Expr::InSubquery { .. } = expr { + if let Expr::InSubquery(_) = expr { self.accum.push(expr.to_owned()); } Ok(Recursion::Continue(self)) diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs index 46cae6f2f1cb..3b0e334618a9 100644 --- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs +++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs @@ -18,9 +18,9 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{Column, DFField, DFSchema, DFSchemaRef, Result}; -use datafusion_expr::expr::AggregateFunction; +use datafusion_expr::expr::{AggregateFunction, InSubquery}; use datafusion_expr::utils::COUNT_STAR_EXPANSION; -use datafusion_expr::Expr::{InSubquery, ScalarSubquery}; +use datafusion_expr::Expr::ScalarSubquery; use datafusion_expr::{ aggregate_function, count, expr, lit, window_function, Aggregate, Expr, Filter, LogicalPlan, Projection, Sort, Subquery, Window, @@ -191,25 +191,25 @@ impl TreeNodeRewriter for CountWildcardRewriter { outer_ref_columns, }) } - InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated, - } => { + }) => { let new_plan = subquery .subquery .as_ref() .clone() .transform_down(&analyze_internal)?; - InSubquery { + Expr::InSubquery(InSubquery::new( expr, - subquery: Subquery { + Subquery { subquery: Arc::new(new_plan), outer_ref_columns: subquery.outer_ref_columns, }, negated, - } + )) } Expr::Exists(expr::Exists { subquery, negated }) => { let new_plan = subquery diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index b202283d8f15..3d0dabdd377c 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -24,6 +24,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::Result; use datafusion_expr::expr::Exists; +use datafusion_expr::expr::InSubquery; use datafusion_expr::{ logical_plan::LogicalPlan, Expr, Filter, LogicalPlanBuilder, TableScan, }; @@ -91,19 +92,17 @@ fn rewrite_subquery(expr: Expr) -> Result> { let subquery = subquery.with_plan(Arc::new(new_plan)); Ok(Transformed::Yes(Expr::Exists(Exists { subquery, negated }))) } - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated, - } => { + }) => { let plan = subquery.subquery.as_ref().clone(); let new_plan = plan.transform_up(&analyze_internal)?; let subquery = subquery.with_plan(Arc::new(new_plan)); - Ok(Transformed::Yes(Expr::InSubquery { - expr, - subquery, - negated, - })) + Ok(Transformed::Yes(Expr::InSubquery(InSubquery::new( + expr, subquery, negated, + )))) } Expr::ScalarSubquery(subquery) => { let plan = subquery.subquery.as_ref().clone(); diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs index a19c3f6268f7..436bb3a06044 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/optimizer/src/analyzer/mod.rs @@ -30,6 +30,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TreeNode, VisitRecursion}; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::expr::Exists; +use datafusion_expr::expr::InSubquery; use datafusion_expr::utils::inspect_expr_pre; use datafusion_expr::{Expr, LogicalPlan}; use log::debug; @@ -121,7 +122,7 @@ fn check_plan(plan: &LogicalPlan) -> Result<()> { // recursively look for subqueries inspect_expr_pre(expr, |expr| match expr { Expr::Exists(Exists { subquery, .. }) - | Expr::InSubquery { subquery, .. } + | Expr::InSubquery(InSubquery { subquery, .. }) | Expr::ScalarSubquery(subquery) => { check_subquery_expr(plan, &subquery.subquery, expr) } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index f42bc96dcbf3..6de095cccdd8 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -25,8 +25,8 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{RewriteRecursion, TreeNodeRewriter}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{ - self, Between, BinaryExpr, Case, Exists, InList, Like, ScalarFunction, ScalarUDF, - WindowFunction, + self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction, + ScalarUDF, WindowFunction, }; use datafusion_expr::expr_schema::cast_subquery; use datafusion_expr::logical_plan::Subquery; @@ -144,11 +144,11 @@ impl TreeNodeRewriter for TypeCoercionRewriter { negated, })) } - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated, - } => { + }) => { let new_plan = analyze_internal(&self.schema, &subquery.subquery)?; let expr_type = expr.get_type(&self.schema)?; let subquery_type = new_plan.schema().field(0).data_type(); @@ -161,11 +161,11 @@ impl TreeNodeRewriter for TypeCoercionRewriter { subquery: Arc::new(new_plan), outer_ref_columns: subquery.outer_ref_columns, }; - Ok(Expr::InSubquery { - expr: Box::new(expr.cast_to(&common_type, &self.schema)?), - subquery: cast_subquery(new_subquery, &common_type)?, + Ok(Expr::InSubquery(InSubquery::new( + Box::new(expr.cast_to(&common_type, &self.schema)?), + cast_subquery(new_subquery, &common_type)?, negated, - }) + ))) } Expr::IsTrue(expr) => { let expr = is_true(get_casted_expr_for_bool_op(&expr, &self.schema)?); @@ -739,7 +739,7 @@ mod test { use datafusion_common::tree_node::TreeNode; use datafusion_common::{DFField, DFSchema, DFSchemaRef, Result, ScalarValue}; - use datafusion_expr::expr::{self, Like, ScalarFunction}; + use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction}; use datafusion_expr::{ cast, col, concat, concat_ws, create_udaf, is_true, AccumulatorFunctionImplementation, AggregateFunction, AggregateUDF, BinaryExpr, @@ -1452,14 +1452,14 @@ mod test { let empty_int32 = empty_with_type(DataType::Int32); let empty_int64 = empty_with_type(DataType::Int64); - let in_subquery_expr = Expr::InSubquery { - expr: Box::new(col("a")), - subquery: Subquery { + let in_subquery_expr = Expr::InSubquery(InSubquery::new( + Box::new(col("a")), + Subquery { subquery: empty_int32, outer_ref_columns: vec![], }, - negated: false, - }; + false, + )); let plan = LogicalPlan::Filter(Filter::try_new(in_subquery_expr, empty_int64)?); // add cast for subquery let expected = "\ @@ -1477,14 +1477,14 @@ mod test { let empty_int32 = empty_with_type(DataType::Int32); let empty_int64 = empty_with_type(DataType::Int64); - let in_subquery_expr = Expr::InSubquery { - expr: Box::new(col("a")), - subquery: Subquery { + let in_subquery_expr = Expr::InSubquery(InSubquery::new( + Box::new(col("a")), + Subquery { subquery: empty_int64, outer_ref_columns: vec![], }, - negated: false, - }; + false, + )); let plan = LogicalPlan::Filter(Filter::try_new(in_subquery_expr, empty_int32)?); // add cast for subquery let expected = "\ @@ -1501,14 +1501,14 @@ mod test { let empty_inside = empty_with_type(DataType::Decimal128(10, 5)); let empty_outside = empty_with_type(DataType::Decimal128(8, 8)); - let in_subquery_expr = Expr::InSubquery { - expr: Box::new(col("a")), - subquery: Subquery { + let in_subquery_expr = Expr::InSubquery(InSubquery::new( + Box::new(col("a")), + Subquery { subquery: empty_inside, outer_ref_columns: vec![], }, - negated: false, - }; + false, + )); let plan = LogicalPlan::Filter(Filter::try_new(in_subquery_expr, empty_outside)?); // add cast for subquery let expected = "Filter: CAST(a AS Decimal128(13, 8)) IN ()\ diff --git a/datafusion/optimizer/src/decorrelate_where_in.rs b/datafusion/optimizer/src/decorrelate_where_in.rs index 6dbbbf9bf946..0d9b472cf4a3 100644 --- a/datafusion/optimizer/src/decorrelate_where_in.rs +++ b/datafusion/optimizer/src/decorrelate_where_in.rs @@ -23,6 +23,7 @@ use crate::utils::{ }; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::{context, Column, Result}; +use datafusion_expr::expr::InSubquery; use datafusion_expr::expr_rewriter::unnormalize_col; use datafusion_expr::logical_plan::{JoinType, Projection, Subquery}; use datafusion_expr::{Expr, Filter, LogicalPlan, LogicalPlanBuilder}; @@ -59,11 +60,11 @@ impl DecorrelateWhereIn { let mut others = vec![]; for it in filters.iter() { match it { - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr, subquery, negated, - } => { + }) => { let subquery_plan = self .try_optimize(&subquery.subquery, config)? .map(Arc::new) diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 2f3524a8d2e8..d724c59d0dd7 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -157,7 +157,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { | Expr::Placeholder { .. } | Expr::ScalarVariable(_, _) => Ok(VisitRecursion::Skip), Expr::Exists { .. } - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::OuterReferenceColumn(_, _) | Expr::ScalarUDF(..) => { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 75055b640477..07391a2d7779 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -28,7 +28,7 @@ use arrow::{ }; use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue}; -use datafusion_expr::expr::{InList, ScalarFunction}; +use datafusion_expr::expr::{InList, InSubquery, ScalarFunction}; use datafusion_expr::{ and, expr, lit, or, BinaryExpr, BuiltinScalarFunction, ColumnarValue, Expr, Like, Volatility, @@ -258,7 +258,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::Column(_) | Expr::OuterReferenceColumn(_, _) | Expr::Exists { .. } - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::WindowFunction { .. } | Expr::Sort { .. } @@ -408,11 +408,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) => { let Expr::ScalarSubquery(subquery) = list.remove(0) else { unreachable!() }; - Expr::InSubquery { - expr, - subquery, - negated, - } + Expr::InSubquery(InSubquery::new(expr, subquery, negated)) } // if expr is a single column reference: diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 4c40931b3ee7..263acd58dc47 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -895,12 +895,12 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::Wildcard(true)), }, Expr::ScalarSubquery(_) - | Expr::InSubquery { .. } + | Expr::InSubquery(_) | Expr::Exists { .. } | Expr::OuterReferenceColumn { .. } => { // we would need to add logical plan operators to datafusion.proto to support this // see discussion in https://github.com/apache/arrow-datafusion/issues/2565 - return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery { .. } | Expr::Exists { .. } | Exp:OuterReferenceColumn not supported".to_string())); + return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists { .. } | Exp:OuterReferenceColumn not supported".to_string())); } Expr::GetIndexedField(GetIndexedField { key, expr }) => Self { expr_type: Some(ExprType::GetIndexedField(Box::new( diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 48960aceda9d..d34065d92fe5 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -18,6 +18,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{DFSchema, Result}; use datafusion_expr::expr::Exists; +use datafusion_expr::expr::InSubquery; use datafusion_expr::{Expr, Subquery}; use sqlparser::ast::Expr as SQLExpr; use sqlparser::ast::Query; @@ -59,14 +60,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); let expr = Box::new(self.sql_to_expr(expr, input_schema, planner_context)?); - Ok(Expr::InSubquery { + Ok(Expr::InSubquery(InSubquery::new( expr, - subquery: Subquery { + Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, }, negated, - }) + ))) } pub(super) fn parse_scalar_subquery( diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 78f25905dbfb..1bfff37b48bd 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -23,7 +23,7 @@ use sqlparser::ast::Ident; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{ AggregateFunction, AggregateUDF, Between, BinaryExpr, Case, GetIndexedField, - GroupingSet, InList, Like, ScalarFunction, ScalarUDF, WindowFunction, + GroupingSet, InList, InSubquery, Like, ScalarFunction, ScalarUDF, WindowFunction, }; use datafusion_expr::expr::{Cast, Sort}; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; @@ -368,15 +368,15 @@ where | Expr::ScalarVariable(_, _) | Expr::Exists { .. } | Expr::ScalarSubquery(_) => Ok(expr.clone()), - Expr::InSubquery { + Expr::InSubquery(InSubquery { expr: nested_expr, subquery, negated, - } => Ok(Expr::InSubquery { - expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?), - subquery: subquery.clone(), - negated: *negated, - }), + }) => Ok(Expr::InSubquery(InSubquery::new( + Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + subquery.clone(), + *negated, + ))), Expr::Wildcard => Ok(Expr::Wildcard), Expr::QualifiedWildcard { .. } => Ok(expr.clone()), Expr::GetIndexedField(GetIndexedField { key, expr }) => {