diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 23571c7f3daa..f0054a443397 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -102,7 +102,7 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::Exists { .. } | Expr::InSubquery { .. } | Expr::ScalarSubquery(_) - | Expr::GetIndexedField { .. } + | Expr::GetIndexedField(_) | Expr::GroupingSet(_) | Expr::Case { .. } => Recursion::Continue(self), diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 2dc1b29ca212..ce3aed7eeaac 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -59,7 +59,7 @@ use arrow::compute::SortOptions; use arrow::datatypes::{Schema, SchemaRef}; use async_trait::async_trait; use datafusion_common::{DFSchema, ScalarValue}; -use datafusion_expr::expr::{Between, BinaryExpr, GroupingSet, Like}; +use datafusion_expr::expr::{Between, BinaryExpr, GetIndexedField, GroupingSet, Like}; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::utils::{expand_wildcard, expr_to_columns}; use datafusion_expr::WindowFrameUnits; @@ -174,7 +174,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NOT UNKNOWN", expr)) } - Expr::GetIndexedField { expr, key } => { + Expr::GetIndexedField(GetIndexedField { expr, key }) => { let expr = create_physical_name(expr, false)?; Ok(format!("{}[{}]", expr, key)) } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 7186ecd3b088..326361baee93 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -120,12 +120,7 @@ pub enum Expr { /// arithmetic negation of an expression, the operand must be of a signed numeric data type Negative(Box), /// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by key - GetIndexedField { - /// the expression to take the field from - expr: Box, - /// The name of the field to take - key: ScalarValue, - }, + GetIndexedField(GetIndexedField), /// Whether an expression is between a given range. Between(Between), /// The CASE expression is similar to a series of nested if/else and there are two forms that @@ -324,6 +319,20 @@ impl Like { } } +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct GetIndexedField { + /// the expression to take the field from + pub expr: Box, + /// The name of the field to take + pub key: ScalarValue, +} + +impl GetIndexedField { + pub fn new(expr: Box, key: ScalarValue) -> Self { + Self { expr, key } + } +} + /// BETWEEN expression #[derive(Clone, PartialEq, Eq, Hash)] pub struct Between { @@ -434,7 +443,7 @@ impl Expr { Expr::Cast { .. } => "Cast", Expr::Column(..) => "Column", Expr::Exists { .. } => "Exists", - Expr::GetIndexedField { .. } => "GetIndexedField", + Expr::GetIndexedField(..) => "GetIndexedField", Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", Expr::InSubquery { .. } => "InSubquery", @@ -854,7 +863,7 @@ impl fmt::Debug for Expr { } Expr::Wildcard => write!(f, "*"), Expr::QualifiedWildcard { qualifier } => write!(f, "{}.*", qualifier), - Expr::GetIndexedField { ref expr, key } => { + Expr::GetIndexedField(GetIndexedField { ref expr, key }) => { write!(f, "({:?})[{}]", expr, key) } Expr::GroupingSet(grouping_sets) => match grouping_sets { @@ -1082,7 +1091,7 @@ fn create_name(e: &Expr) -> Result { Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).name().clone()) } - Expr::GetIndexedField { expr, key } => { + Expr::GetIndexedField(GetIndexedField { expr, key }) => { let expr = create_name(expr)?; Ok(format!("{}[{}]", expr, key)) } diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index d4cfc8067682..05c5abe26fbc 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -17,7 +17,7 @@ //! Expression rewriter -use crate::expr::{Between, BinaryExpr, Case, GroupingSet, Like}; +use crate::expr::{Between, BinaryExpr, Case, GetIndexedField, GroupingSet, Like}; use crate::logical_plan::{Aggregate, Projection}; use crate::utils::{from_plan, grouping_set_to_exprlist}; use crate::{Expr, ExprSchemable, LogicalPlan}; @@ -286,10 +286,12 @@ impl ExprRewritable for Expr { Expr::QualifiedWildcard { qualifier } => { Expr::QualifiedWildcard { qualifier } } - Expr::GetIndexedField { expr, key } => Expr::GetIndexedField { - expr: rewrite_boxed(expr, rewriter)?, - key, - }, + Expr::GetIndexedField(GetIndexedField { expr, key }) => { + Expr::GetIndexedField(GetIndexedField::new( + rewrite_boxed(expr, rewriter)?, + key, + )) + } }; // now rewrite this expression itself diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 6c70eac5b1bd..d7731e2445b4 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -16,7 +16,7 @@ // under the License. use super::{Between, Expr, Like}; -use crate::expr::BinaryExpr; +use crate::expr::{BinaryExpr, GetIndexedField}; use crate::field_util::get_indexed_field; use crate::type_coercion::binary::binary_operator_data_type; use crate::{aggregate_function, function, window_function}; @@ -138,7 +138,7 @@ impl ExprSchemable for Expr { // grouping sets do not really have a type and do not appear in projections Ok(DataType::Null) } - Expr::GetIndexedField { ref expr, key } => { + Expr::GetIndexedField(GetIndexedField { ref expr, key }) => { let data_type = expr.get_type(schema)?; get_indexed_field(&data_type, key).map(|x| x.data_type().clone()) @@ -218,7 +218,7 @@ impl ExprSchemable for Expr { "QualifiedWildcard expressions are not valid in a logical query plan" .to_owned(), )), - Expr::GetIndexedField { ref expr, key } => { + Expr::GetIndexedField(GetIndexedField { ref expr, key }) => { let data_type = expr.get_type(input_schema)?; get_indexed_field(&data_type, key).map(|x| x.is_nullable()) } diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index b78ed89db7d5..93da97f88271 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -18,7 +18,7 @@ //! Expression visitor use crate::{ - expr::{BinaryExpr, GroupingSet}, + expr::{BinaryExpr, GetIndexedField, GroupingSet}, Between, Expr, Like, }; use datafusion_common::Result; @@ -112,7 +112,7 @@ impl ExprVisitable for Expr { | Expr::TryCast { expr, .. } | Expr::Sort { expr, .. } | Expr::InSubquery { expr, .. } - | Expr::GetIndexedField { expr, .. } => expr.accept(visitor), + | Expr::GetIndexedField(GetIndexedField { expr, .. }) => expr.accept(visitor), Expr::GroupingSet(GroupingSet::Rollup(exprs)) => exprs .iter() .fold(Ok(visitor), |v, e| v.and_then(|v| e.accept(v))), diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index be4c45f8c931..aad11d318dda 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -126,7 +126,7 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { | Expr::ScalarSubquery(_) | Expr::Wildcard | Expr::QualifiedWildcard { .. } - | Expr::GetIndexedField { .. } => {} + | Expr::GetIndexedField(_) => {} } Ok(Recursion::Continue(self)) } diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index a9970abf6b64..806899ebf40c 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -493,7 +493,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::Cast { .. } | Expr::TryCast { .. } | Expr::InList { .. } - | Expr::GetIndexedField { .. } => true, + | Expr::GetIndexedField(_) => true, } } diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index d08448ef28c2..e726d37dc92d 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -27,7 +27,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Schema}; use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue}; -use datafusion_expr::expr::BinaryExpr; +use datafusion_expr::expr::{BinaryExpr, GetIndexedField}; use datafusion_expr::{binary_expr, Between, Expr, Like, Operator}; use std::sync::Arc; @@ -308,10 +308,17 @@ pub fn create_physical_expr( input_schema, execution_props, )?), - Expr::GetIndexedField { expr, key } => Ok(Arc::new(GetIndexedFieldExpr::new( - create_physical_expr(expr, input_dfschema, input_schema, execution_props)?, - key.clone(), - ))), + Expr::GetIndexedField(GetIndexedField { expr, key }) => { + Ok(Arc::new(GetIndexedFieldExpr::new( + create_physical_expr( + expr, + input_dfschema, + input_schema, + execution_props, + )?, + key.clone(), + ))) + } Expr::ScalarFunction { fun, args } => { let physical_args = args diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index feb78630d2d6..69849179b041 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -31,7 +31,7 @@ use datafusion::execution::registry::FunctionRegistry; use datafusion_common::{ Column, DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue, }; -use datafusion_expr::expr::BinaryExpr; +use datafusion_expr::expr::{BinaryExpr, GetIndexedField}; use datafusion_expr::{ abs, acos, array, ascii, asin, atan, atan2, bit_length, btrim, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, date_bin, @@ -801,10 +801,10 @@ pub fn parse_expr( let expr = parse_required_expr(&field.expr, registry, "expr")?; - Ok(Expr::GetIndexedField { + Ok(Expr::GetIndexedField(GetIndexedField { expr: Box::new(expr), key, - }) + })) } ExprType::Column(column) => Ok(Expr::Column(column.into())), ExprType::Literal(literal) => { diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 931779ab3948..4e2a171d2c44 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -34,7 +34,7 @@ use arrow::datatypes::{ UnionMode, }; use datafusion_common::{Column, DFField, DFSchemaRef, ScalarValue}; -use datafusion_expr::expr::{Between, BinaryExpr, GroupingSet, Like}; +use datafusion_expr::expr::{Between, BinaryExpr, GetIndexedField, GroupingSet, Like}; use datafusion_expr::{ logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction, BuiltInWindowFunction, BuiltinScalarFunction, Expr, WindowFrame, WindowFrameBound, @@ -816,7 +816,7 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { // see discussion in https://github.com/apache/arrow-datafusion/issues/2565 return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery { .. } | Expr::Exists { .. } not supported".to_string())) } - Expr::GetIndexedField { key, expr } => Self { + Expr::GetIndexedField (GetIndexedField { key, expr }) => Self { expr_type: Some(ExprType::GetIndexedField(Box::new( protobuf::GetIndexedField { key: Some(key.try_into()?), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 7cff840daa6e..e95d629b0e5c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -51,7 +51,9 @@ use crate::utils::{make_decimal_type, normalize_ident, resolve_columns}; use datafusion_common::{ field_not_found, Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, }; -use datafusion_expr::expr::{Between, BinaryExpr, Case, GroupingSet, Like}; +use datafusion_expr::expr::{ + Between, BinaryExpr, Case, GetIndexedField, GroupingSet, Like, +}; use datafusion_expr::logical_plan::builder::project_with_alias; use datafusion_expr::logical_plan::{Filter, Subquery}; use datafusion_expr::Expr::Alias; @@ -123,10 +125,10 @@ fn plan_indexed(expr: Expr, mut keys: Vec) -> Result { expr }; - Ok(Expr::GetIndexedField { + Ok(Expr::GetIndexedField(GetIndexedField { expr: Box::new(expr), key: plan_key(key)?, - }) + })) } impl<'a, S: ContextProvider> SqlToRel<'a, S> { @@ -1834,10 +1836,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Err(_) => { if let Some(field) = schema.fields().iter().find(|f| f.name().eq(&relation)) { // Access to a field of a column which is a structure, example: SELECT my_struct.key - Ok(Expr::GetIndexedField { + Ok(Expr::GetIndexedField (GetIndexedField { expr: Box::new(Expr::Column(field.qualified_column())), key: ScalarValue::Utf8(Some(name)), - }) + })) } else { // table.column identifier Ok(Expr::Column(Column { diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 5b25753230e3..ffbb1e5bbe02 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -21,7 +21,9 @@ use arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE use sqlparser::ast::Ident; use datafusion_common::{DataFusionError, Result, ScalarValue}; -use datafusion_expr::expr::{Between, BinaryExpr, Case, GroupingSet, Like}; +use datafusion_expr::expr::{ + Between, BinaryExpr, Case, GetIndexedField, GroupingSet, Like, +}; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; use datafusion_expr::{Expr, LogicalPlan}; use std::collections::HashMap; @@ -377,10 +379,12 @@ where }), Expr::Wildcard => Ok(Expr::Wildcard), Expr::QualifiedWildcard { .. } => Ok(expr.clone()), - Expr::GetIndexedField { expr, key } => Ok(Expr::GetIndexedField { - expr: Box::new(clone_with_replacement(expr.as_ref(), replacement_fn)?), - key: key.clone(), - }), + Expr::GetIndexedField(GetIndexedField { expr, key }) => { + Ok(Expr::GetIndexedField(GetIndexedField::new( + Box::new(clone_with_replacement(expr.as_ref(), replacement_fn)?), + key.clone(), + ))) + } Expr::GroupingSet(set) => match set { GroupingSet::Rollup(exprs) => Ok(Expr::GroupingSet(GroupingSet::Rollup( exprs