From f7408fc1887b1610d420be9913937857d01771f0 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Fri, 2 Dec 2022 16:59:10 -0500 Subject: [PATCH 01/17] feat: support prepare statement --- .../core/src/datasource/listing/helpers.rs | 3 +- datafusion/core/src/physical_plan/planner.rs | 11 + datafusion/expr/src/expr.rs | 5 + datafusion/expr/src/expr_rewriter.rs | 1 + datafusion/expr/src/expr_schema.rs | 4 +- datafusion/expr/src/expr_visitor.rs | 3 +- datafusion/expr/src/logical_plan/builder.rs | 11 + datafusion/expr/src/logical_plan/mod.rs | 2 +- datafusion/expr/src/logical_plan/plan.rs | 35 +- datafusion/expr/src/utils.rs | 14 +- .../optimizer/src/common_subexpr_eliminate.rs | 3 +- .../optimizer/src/projection_push_down.rs | 3 +- .../simplify_expressions/expr_simplifier.rs | 3 +- datafusion/proto/proto/datafusion.proto | 13 + datafusion/proto/src/from_proto.rs | 5 +- datafusion/proto/src/generated/datafusion.rs | 1512 +++++++++++++++++ datafusion/proto/src/generated/pbjson.rs | 243 +++ datafusion/proto/src/generated/prost.rs | 22 +- datafusion/proto/src/logical_plan.rs | 37 +- datafusion/proto/src/to_proto.rs | 5 +- datafusion/sql/src/parser.rs | 30 + datafusion/sql/src/planner.rs | 35 +- datafusion/sql/src/utils.rs | 1 + 23 files changed, 1978 insertions(+), 23 deletions(-) create mode 100644 datafusion/proto/src/generated/datafusion.rs diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 3cfe9ec148ed..50bb2ee84ad9 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -124,7 +124,8 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::Sort { .. } | Expr::WindowFunction { .. } | Expr::Wildcard - | Expr::QualifiedWildcard { .. } => { + | Expr::QualifiedWildcard { .. } + | Expr::Placeholder(_) => { *self.is_applicable = false; Recursion::Stop(self) } diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index e325b361b233..138f77a07786 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -344,6 +344,9 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Expr::QualifiedWildcard { .. } => Err(DataFusionError::Internal( "Create physical name does not support qualified wildcard".to_string(), )), + Expr::Placeholder(_) => Err(DataFusionError::Internal( + "Create physical name does not support placeholder".to_string(), + )), } } @@ -1031,6 +1034,14 @@ impl DefaultPhysicalPlanner { "Unsupported logical plan: CreateExternalTable".to_string(), )) } + LogicalPlan::Prepare(_) => { + // There is no default plan for "PREPARE" -- it must be + // handled at a higher level (so that the appropriate + // statement can be prepared) + Err(DataFusionError::Internal( + "Unsupported logical plan: Prepare".to_string(), + )) + } LogicalPlan::CreateCatalogSchema(_) => { // There is no default plan for "CREATE SCHEMA". // It must be handled at a higher level (so diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ecab1afd211b..48e9571b2511 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -89,6 +89,8 @@ pub enum Expr { Alias(Box, String), /// A named reference to a qualified filed in a schema. Column(Column), + /// A place holder for parameters in a prepared statement. + Placeholder(String), /// A named reference to a variable in a registry. ScalarVariable(DataType, Vec), /// A constant value. @@ -528,6 +530,7 @@ impl Expr { Expr::Literal(..) => "Literal", Expr::Negative(..) => "Negative", Expr::Not(..) => "Not", + Expr::Placeholder(..) => "Placeholder", Expr::QualifiedWildcard { .. } => "QualifiedWildcard", Expr::ScalarFunction { .. } => "ScalarFunction", Expr::ScalarSubquery { .. } => "ScalarSubquery", @@ -984,6 +987,7 @@ impl fmt::Debug for Expr { ) } }, + Expr::Placeholder(param) => write!(f, "{}", param), } } } @@ -1269,6 +1273,7 @@ fn create_name(e: &Expr) -> Result { Expr::QualifiedWildcard { .. } => Err(DataFusionError::Internal( "Create name does not support qualified wildcard".to_string(), )), + Expr::Placeholder(param) => Ok(format!("{}", param)), } } diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index 40516c53767e..cec3b299d825 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -291,6 +291,7 @@ impl ExprRewritable for Expr { key, )) } + Expr::Placeholder(param) => Expr::Placeholder(param), }; // now rewrite this expression itself diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 8424fa2aa2d1..2634e45cf798 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -127,6 +127,7 @@ impl ExprSchemable for Expr { Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) } + Expr::Placeholder(_) => Ok(DataType::Boolean), Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -198,7 +199,8 @@ impl ExprSchemable for Expr { | Expr::IsNotTrue(_) | Expr::IsNotFalse(_) | Expr::IsNotUnknown(_) - | Expr::Exists { .. } => Ok(false), + | Expr::Exists { .. } + | Expr::Placeholder(_) => Ok(false), // todo: Placeholder should return false? Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index bd839f098fc3..d44936b6aa79 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -133,7 +133,8 @@ impl ExprVisitable for Expr { | Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::Wildcard - | Expr::QualifiedWildcard { .. } => Ok(visitor), + | Expr::QualifiedWildcard { .. } + | Expr::Placeholder(_) => Ok(visitor), Expr::BinaryExpr(BinaryExpr { left, right, .. }) => { let visitor = left.accept(visitor)?; right.accept(visitor) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 71097f5a61c6..406dcfa70dd8 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -45,6 +45,8 @@ use std::any::Any; use std::convert::TryFrom; use std::{collections::HashMap, sync::Arc}; +use super::Prepare; + /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; @@ -119,6 +121,7 @@ impl LogicalPlanBuilder { /// The column names are not specified by the SQL standard and different database systems do it differently, /// so it's usually better to override the default names with a table alias list. pub fn values(mut values: Vec>) -> Result { + // todo: hanlde for Placeholder expr if values.is_empty() { return Err(DataFusionError::Plan("Values list cannot be empty".into())); } @@ -292,6 +295,14 @@ impl LogicalPlanBuilder { )?))) } + pub fn prepare(&self, name: String, data_types: Vec) -> Result { + Ok(Self::from(LogicalPlan::Prepare(Prepare { + name, + data_types, + input: Arc::new(self.plan.clone()), + }))) + } + /// Limit the number of rows returned /// /// `skip` - Number of rows to skip before fetch any row. diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index 2cfe921e67b3..9d26d2a6554e 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -25,7 +25,7 @@ pub use plan::{ Aggregate, Analyze, CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable, CreateView, CrossJoin, Distinct, DropTable, DropView, EmptyRelation, Explain, Extension, Filter, Join, JoinConstraint, JoinType, Limit, - LogicalPlan, Partitioning, PlanType, PlanVisitor, Projection, Repartition, + LogicalPlan, Partitioning, PlanType, PlanVisitor, Prepare, Projection, Repartition, SetVariable, Sort, StringifiedPlan, Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Values, Window, }; diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ed5711a7fb57..2fce8ceaa76f 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -108,6 +108,8 @@ pub enum LogicalPlan { Distinct(Distinct), /// Set a Variable SetVariable(SetVariable), + /// Prepare a statement + Prepare(Prepare), } impl LogicalPlan { @@ -134,6 +136,7 @@ impl LogicalPlan { LogicalPlan::CreateExternalTable(CreateExternalTable { schema, .. }) => { schema } + LogicalPlan::Prepare(Prepare { input, .. }) => input.schema(), LogicalPlan::Explain(explain) => &explain.schema, LogicalPlan::Analyze(analyze) => &analyze.schema, LogicalPlan::Extension(extension) => extension.node.schema(), @@ -201,8 +204,9 @@ impl LogicalPlan { | LogicalPlan::Sort(Sort { input, .. }) | LogicalPlan::CreateMemoryTable(CreateMemoryTable { input, .. }) | LogicalPlan::CreateView(CreateView { input, .. }) - | LogicalPlan::Filter(Filter { input, .. }) => input.all_schemas(), - LogicalPlan::Distinct(Distinct { input, .. }) => input.all_schemas(), + | LogicalPlan::Filter(Filter { input, .. }) + | LogicalPlan::Distinct(Distinct { input, .. }) + | LogicalPlan::Prepare(Prepare { input, .. }) => input.all_schemas(), LogicalPlan::DropTable(_) | LogicalPlan::DropView(_) | LogicalPlan::SetVariable(_) => vec![], @@ -271,7 +275,8 @@ impl LogicalPlan { | LogicalPlan::Analyze(_) | LogicalPlan::Explain(_) | LogicalPlan::Union(_) - | LogicalPlan::Distinct(_) => { + | LogicalPlan::Distinct(_) + | LogicalPlan::Prepare(_) => { vec![] } } @@ -300,7 +305,8 @@ impl LogicalPlan { LogicalPlan::Explain(explain) => vec![&explain.plan], LogicalPlan::Analyze(analyze) => vec![&analyze.input], LogicalPlan::CreateMemoryTable(CreateMemoryTable { input, .. }) - | LogicalPlan::CreateView(CreateView { input, .. }) => { + | LogicalPlan::CreateView(CreateView { input, .. }) + | LogicalPlan::Prepare(Prepare { input, .. }) => { vec![input] } // plans without inputs @@ -448,9 +454,8 @@ impl LogicalPlan { input.accept(visitor)? } LogicalPlan::CreateMemoryTable(CreateMemoryTable { input, .. }) - | LogicalPlan::CreateView(CreateView { input, .. }) => { - input.accept(visitor)? - } + | LogicalPlan::CreateView(CreateView { input, .. }) + | LogicalPlan::Prepare(Prepare { input, .. }) => input.accept(visitor)?, LogicalPlan::Extension(extension) => { for input in extension.node.inputs() { if !input.accept(visitor)? { @@ -961,6 +966,11 @@ impl LogicalPlan { LogicalPlan::Analyze { .. } => write!(f, "Analyze"), LogicalPlan::Union(_) => write!(f, "Union"), LogicalPlan::Extension(e) => e.node.fmt_for_explain(f), + LogicalPlan::Prepare(Prepare { + name, data_types, .. + }) => { + write!(f, "Prepare: {:?} {:?} ", name, data_types) + } } } } @@ -1358,6 +1368,17 @@ pub struct CreateExternalTable { pub options: HashMap, } +/// Prepare a statement +#[derive(Clone)] +pub struct Prepare { + /// The name of the statement + pub name: String, + /// Data types of the parameters + pub data_types: Vec, + /// The logical plan of the statements + pub input: Arc, +} + /// Produces a relation with string representations of /// various parts of the plan #[derive(Clone)] diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index c84f4b8d709c..47aed15cd42a 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -22,8 +22,8 @@ use crate::expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion}; use crate::logical_plan::builder::build_join_schema; use crate::logical_plan::{ Aggregate, Analyze, CreateMemoryTable, CreateView, Distinct, Extension, Filter, Join, - Limit, Partitioning, Projection, Repartition, Sort, Subquery, SubqueryAlias, Union, - Values, Window, + Limit, Partitioning, Prepare, Projection, Repartition, Sort, Subquery, SubqueryAlias, + Union, Values, Window, }; use crate::{Cast, Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder}; use arrow::datatypes::{DataType, TimeUnit}; @@ -126,7 +126,8 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { | Expr::ScalarSubquery(_) | Expr::Wildcard | Expr::QualifiedWildcard { .. } - | Expr::GetIndexedField { .. } => {} + | Expr::GetIndexedField { .. } + | Expr::Placeholder(_) => {} } Ok(Recursion::Continue(self)) } @@ -575,6 +576,13 @@ pub fn from_plan( ); Ok(plan.clone()) } + LogicalPlan::Prepare(Prepare { + name, data_types, .. + }) => Ok(LogicalPlan::Prepare(Prepare { + name: name.clone(), + data_types: data_types.clone(), + input: Arc::new(inputs[0].clone()), + })), LogicalPlan::EmptyRelation(_) | LogicalPlan::TableScan { .. } | LogicalPlan::CreateExternalTable(_) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 11ed5cdbebb0..482298e160e3 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -240,7 +240,8 @@ impl OptimizerRule for CommonSubexprEliminate { | LogicalPlan::DropView(_) | LogicalPlan::SetVariable(_) | LogicalPlan::Distinct(_) - | LogicalPlan::Extension(_) => { + | LogicalPlan::Extension(_) + | LogicalPlan::Prepare(_) => { // apply the optimization to all inputs of the plan utils::optimize_children(self, plan, optimizer_config) } diff --git a/datafusion/optimizer/src/projection_push_down.rs b/datafusion/optimizer/src/projection_push_down.rs index 1e54d7184ccc..9d381e2ef6a3 100644 --- a/datafusion/optimizer/src/projection_push_down.rs +++ b/datafusion/optimizer/src/projection_push_down.rs @@ -392,7 +392,8 @@ fn optimize_plan( | LogicalPlan::SetVariable(_) | LogicalPlan::CrossJoin(_) | LogicalPlan::Distinct(_) - | LogicalPlan::Extension { .. } => { + | LogicalPlan::Extension { .. } + | LogicalPlan::Prepare(_) => { let expr = plan.expressions(); // collect all required columns by this plan exprlist_to_columns(&expr, &mut new_required_columns)?; diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index b32fc53dbaff..dad91fd5c558 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -253,7 +253,8 @@ impl<'a> ConstEvaluator<'a> { | Expr::Sort { .. } | Expr::GroupingSet(_) | Expr::Wildcard - | Expr::QualifiedWildcard { .. } => false, + | Expr::QualifiedWildcard { .. } + | Expr::Placeholder(_) => false, Expr::ScalarFunction { fun, .. } => Self::volatility_ok(fun.volatility()), Expr::ScalarUDF { fun, .. } => Self::volatility_ok(fun.signature.volatility), Expr::Literal(_) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 9ec28f223ef5..bff90421d31f 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -71,6 +71,7 @@ message LogicalPlanNode { DistinctNode distinct = 23; ViewTableScanNode view_scan = 24; CustomTableScanNode custom_scan = 25; + PrepareNode prepare = 26; } } @@ -180,6 +181,12 @@ message CreateExternalTableNode { map options = 11; } +message PrepareNode { + string name = 1; + repeated ArrowType data_types = 2; + LogicalPlanNode input = 3; + } + message CreateCatalogSchemaNode { string schema_name = 1; bool if_not_exists = 2; @@ -343,9 +350,15 @@ message LogicalExprNode { ILikeNode ilike = 32; SimilarToNode similar_to = 33; + PlaceholderNode placeholder = 34; + } } +message PlaceholderNode { + string param = 1; +} + message LogicalExprList { repeated LogicalExprNode expr = 1; } diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index a30e7b323aba..e9510ad1da10 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -19,7 +19,7 @@ use crate::protobuf::plan_type::PlanTypeEnum::{ FinalLogicalPlan, FinalPhysicalPlan, InitialLogicalPlan, InitialPhysicalPlan, OptimizedLogicalPlan, OptimizedPhysicalPlan, }; -use crate::protobuf::{self}; +use crate::protobuf::{self, PlaceholderNode}; use crate::protobuf::{ CubeNode, GroupingSetNode, OptimizedLogicalPlanType, OptimizedPhysicalPlanType, RollupNode, @@ -1184,6 +1184,9 @@ pub fn parse_expr( .collect::, Error>>()?, ))) } + ExprType::Placeholder(PlaceholderNode { param }) => { + Ok(Expr::Placeholder(param.clone())) + } } } diff --git a/datafusion/proto/src/generated/datafusion.rs b/datafusion/proto/src/generated/datafusion.rs new file mode 100644 index 000000000000..eee0732d41aa --- /dev/null +++ b/datafusion/proto/src/generated/datafusion.rs @@ -0,0 +1,1512 @@ +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ColumnRelation { + #[prost(string, tag="1")] + pub relation: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Column { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, tag="2")] + pub relation: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfField { + #[prost(message, optional, tag="1")] + pub field: ::core::option::Option, + #[prost(message, optional, tag="2")] + pub qualifier: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfSchema { + #[prost(message, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec, + #[prost(map="string, string", tag="2")] + pub metadata: ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +/// logical plan +/// LogicalPlan is a nested type +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalPlanNode { + #[prost(oneof="logical_plan_node::LogicalPlanType", tags="1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25")] + pub logical_plan_type: ::core::option::Option, +} +/// Nested message and enum types in `LogicalPlanNode`. +pub mod logical_plan_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum LogicalPlanType { + #[prost(message, tag="1")] + ListingScan(super::ListingTableScanNode), + #[prost(message, tag="3")] + Projection(::prost::alloc::boxed::Box), + #[prost(message, tag="4")] + Selection(::prost::alloc::boxed::Box), + #[prost(message, tag="5")] + Limit(::prost::alloc::boxed::Box), + #[prost(message, tag="6")] + Aggregate(::prost::alloc::boxed::Box), + #[prost(message, tag="7")] + Join(::prost::alloc::boxed::Box), + #[prost(message, tag="8")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag="9")] + Repartition(::prost::alloc::boxed::Box), + #[prost(message, tag="10")] + EmptyRelation(super::EmptyRelationNode), + #[prost(message, tag="11")] + CreateExternalTable(super::CreateExternalTableNode), + #[prost(message, tag="12")] + Explain(::prost::alloc::boxed::Box), + #[prost(message, tag="13")] + Window(::prost::alloc::boxed::Box), + #[prost(message, tag="14")] + Analyze(::prost::alloc::boxed::Box), + #[prost(message, tag="15")] + CrossJoin(::prost::alloc::boxed::Box), + #[prost(message, tag="16")] + Values(super::ValuesNode), + #[prost(message, tag="17")] + Extension(super::LogicalExtensionNode), + #[prost(message, tag="18")] + CreateCatalogSchema(super::CreateCatalogSchemaNode), + #[prost(message, tag="19")] + Union(super::UnionNode), + #[prost(message, tag="20")] + CreateCatalog(super::CreateCatalogNode), + #[prost(message, tag="21")] + SubqueryAlias(::prost::alloc::boxed::Box), + #[prost(message, tag="22")] + CreateView(::prost::alloc::boxed::Box), + #[prost(message, tag="23")] + Distinct(::prost::alloc::boxed::Box), + #[prost(message, tag="24")] + ViewScan(::prost::alloc::boxed::Box), + #[prost(message, tag="25")] + CustomScan(super::CustomTableScanNode), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExtensionNode { + #[prost(bytes="vec", tag="1")] + pub node: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="2")] + pub inputs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ProjectionColumns { + #[prost(string, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CsvFormat { + #[prost(bool, tag="1")] + pub has_header: bool, + #[prost(string, tag="2")] + pub delimiter: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ParquetFormat { + #[prost(bool, tag="1")] + pub enable_pruning: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AvroFormat { +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListingTableScanNode { + #[prost(string, tag="1")] + pub table_name: ::prost::alloc::string::String, + #[prost(string, repeated, tag="2")] + pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag="3")] + pub file_extension: ::prost::alloc::string::String, + #[prost(message, optional, tag="4")] + pub projection: ::core::option::Option, + #[prost(message, optional, tag="5")] + pub schema: ::core::option::Option, + #[prost(message, repeated, tag="6")] + pub filters: ::prost::alloc::vec::Vec, + #[prost(string, repeated, tag="7")] + pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(bool, tag="8")] + pub collect_stat: bool, + #[prost(uint32, tag="9")] + pub target_partitions: u32, + #[prost(oneof="listing_table_scan_node::FileFormatType", tags="10, 11, 12")] + pub file_format_type: ::core::option::Option, +} +/// Nested message and enum types in `ListingTableScanNode`. +pub mod listing_table_scan_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum FileFormatType { + #[prost(message, tag="10")] + Csv(super::CsvFormat), + #[prost(message, tag="11")] + Parquet(super::ParquetFormat), + #[prost(message, tag="12")] + Avro(super::AvroFormat), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ViewTableScanNode { + #[prost(string, tag="1")] + pub table_name: ::prost::alloc::string::String, + #[prost(message, optional, boxed, tag="2")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, + #[prost(message, optional, tag="4")] + pub projection: ::core::option::Option, + #[prost(string, tag="5")] + pub definition: ::prost::alloc::string::String, +} +/// Logical Plan to Scan a CustomTableProvider registered at runtime +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CustomTableScanNode { + #[prost(string, tag="1")] + pub table_name: ::prost::alloc::string::String, + #[prost(message, optional, tag="2")] + pub projection: ::core::option::Option, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, + #[prost(message, repeated, tag="4")] + pub filters: ::prost::alloc::vec::Vec, + #[prost(bytes="vec", tag="5")] + pub custom_table_data: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ProjectionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(oneof="projection_node::OptionalAlias", tags="3")] + pub optional_alias: ::core::option::Option, +} +/// Nested message and enum types in `ProjectionNode`. +pub mod projection_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OptionalAlias { + #[prost(string, tag="3")] + Alias(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SelectionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + /// Maximum number of highest/lowest rows to fetch; negative means no limit + #[prost(int64, tag="3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RepartitionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(oneof="repartition_node::PartitionMethod", tags="2, 3")] + pub partition_method: ::core::option::Option, +} +/// Nested message and enum types in `RepartitionNode`. +pub mod repartition_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PartitionMethod { + #[prost(uint64, tag="2")] + RoundRobin(u64), + #[prost(message, tag="3")] + Hash(super::HashRepartition), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HashRepartition { + #[prost(message, repeated, tag="1")] + pub hash_expr: ::prost::alloc::vec::Vec, + #[prost(uint64, tag="2")] + pub partition_count: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EmptyRelationNode { + #[prost(bool, tag="1")] + pub produce_one_row: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateExternalTableNode { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(string, tag="2")] + pub location: ::prost::alloc::string::String, + #[prost(string, tag="3")] + pub file_type: ::prost::alloc::string::String, + #[prost(bool, tag="4")] + pub has_header: bool, + #[prost(message, optional, tag="5")] + pub schema: ::core::option::Option, + #[prost(string, repeated, tag="6")] + pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(bool, tag="7")] + pub if_not_exists: bool, + #[prost(string, tag="8")] + pub delimiter: ::prost::alloc::string::String, + #[prost(string, tag="9")] + pub definition: ::prost::alloc::string::String, + #[prost(string, tag="10")] + pub file_compression_type: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateCatalogSchemaNode { + #[prost(string, tag="1")] + pub schema_name: ::prost::alloc::string::String, + #[prost(bool, tag="2")] + pub if_not_exists: bool, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateCatalogNode { + #[prost(string, tag="1")] + pub catalog_name: ::prost::alloc::string::String, + #[prost(bool, tag="2")] + pub if_not_exists: bool, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateViewNode { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, boxed, tag="2")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="3")] + pub or_replace: bool, + #[prost(string, tag="4")] + pub definition: ::prost::alloc::string::String, +} +/// a node containing data for defining values list. unlike in SQL where it's two dimensional, here +/// the list is flattened, and with the field n_cols it can be parsed and partitioned into rows +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ValuesNode { + #[prost(uint64, tag="1")] + pub n_cols: u64, + #[prost(message, repeated, tag="2")] + pub values_list: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AnalyzeNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub verbose: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExplainNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub verbose: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub group_expr: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="3")] + pub aggr_expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub window_expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JoinNode { + #[prost(message, optional, boxed, tag="1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(enumeration="JoinType", tag="3")] + pub join_type: i32, + #[prost(enumeration="JoinConstraint", tag="4")] + pub join_constraint: i32, + #[prost(message, repeated, tag="5")] + pub left_join_column: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="6")] + pub right_join_column: ::prost::alloc::vec::Vec, + #[prost(bool, tag="7")] + pub null_equals_null: bool, + #[prost(message, optional, tag="8")] + pub filter: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DistinctNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UnionNode { + #[prost(message, repeated, tag="1")] + pub inputs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CrossJoinNode { + #[prost(message, optional, boxed, tag="1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LimitNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// The number of rows to skip before fetch; non-positive means don't skip any + #[prost(int64, tag="2")] + pub skip: i64, + /// Maximum number of rows to fetch; negative means no limit + #[prost(int64, tag="3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SelectionExecNode { + #[prost(message, optional, tag="1")] + pub expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubqueryAliasNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="2")] + pub alias: ::prost::alloc::string::String, +} +/// logical expressions +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExprNode { + #[prost(oneof="logical_expr_node::ExprType", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] + pub expr_type: ::core::option::Option, +} +/// Nested message and enum types in `LogicalExprNode`. +pub mod logical_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ExprType { + /// column references + #[prost(message, tag="1")] + Column(super::Column), + /// alias + #[prost(message, tag="2")] + Alias(::prost::alloc::boxed::Box), + #[prost(message, tag="3")] + Literal(super::ScalarValue), + /// binary expressions + #[prost(message, tag="4")] + BinaryExpr(::prost::alloc::boxed::Box), + /// aggregate expressions + #[prost(message, tag="5")] + AggregateExpr(::prost::alloc::boxed::Box), + /// null checks + #[prost(message, tag="6")] + IsNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="7")] + IsNotNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="8")] + NotExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="9")] + Between(::prost::alloc::boxed::Box), + #[prost(message, tag="10")] + Case(::prost::alloc::boxed::Box), + #[prost(message, tag="11")] + Cast(::prost::alloc::boxed::Box), + #[prost(message, tag="12")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag="13")] + Negative(::prost::alloc::boxed::Box), + #[prost(message, tag="14")] + InList(::prost::alloc::boxed::Box), + #[prost(bool, tag="15")] + Wildcard(bool), + #[prost(message, tag="16")] + ScalarFunction(super::ScalarFunctionNode), + #[prost(message, tag="17")] + TryCast(::prost::alloc::boxed::Box), + /// window expressions + #[prost(message, tag="18")] + WindowExpr(::prost::alloc::boxed::Box), + /// AggregateUDF expressions + #[prost(message, tag="19")] + AggregateUdfExpr(::prost::alloc::boxed::Box), + /// Scalar UDF expressions + #[prost(message, tag="20")] + ScalarUdfExpr(super::ScalarUdfExprNode), + #[prost(message, tag="21")] + GetIndexedField(::prost::alloc::boxed::Box), + #[prost(message, tag="22")] + GroupingSet(super::GroupingSetNode), + #[prost(message, tag="23")] + Cube(super::CubeNode), + #[prost(message, tag="24")] + Rollup(super::RollupNode), + #[prost(message, tag="25")] + IsTrue(::prost::alloc::boxed::Box), + #[prost(message, tag="26")] + IsFalse(::prost::alloc::boxed::Box), + #[prost(message, tag="27")] + IsUnknown(::prost::alloc::boxed::Box), + #[prost(message, tag="28")] + IsNotTrue(::prost::alloc::boxed::Box), + #[prost(message, tag="29")] + IsNotFalse(::prost::alloc::boxed::Box), + #[prost(message, tag="30")] + IsNotUnknown(::prost::alloc::boxed::Box), + #[prost(message, tag="31")] + Like(::prost::alloc::boxed::Box), + #[prost(message, tag="32")] + Ilike(::prost::alloc::boxed::Box), + #[prost(message, tag="33")] + SimilarTo(::prost::alloc::boxed::Box), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExprList { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GroupingSetNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CubeNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RollupNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetIndexedField { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub key: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNull { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotNull { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsTrue { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsFalse { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsUnknown { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotTrue { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotFalse { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotUnknown { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Not { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AliasNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="2")] + pub alias: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BinaryExprNode { + #[prost(message, optional, boxed, tag="1")] + pub l: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="3")] + pub op: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NegativeNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct InListNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub list: ::prost::alloc::vec::Vec, + #[prost(bool, tag="3")] + pub negated: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarFunctionNode { + #[prost(enumeration="ScalarFunction", tag="1")] + pub fun: i32, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateExprNode { + #[prost(enumeration="AggregateFunction", tag="1")] + pub aggr_function: i32, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(bool, tag="3")] + pub distinct: bool, + #[prost(message, optional, boxed, tag="4")] + pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateUdfExprNode { + #[prost(string, tag="1")] + pub fun_name: ::prost::alloc::string::String, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag="3")] + pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarUdfExprNode { + #[prost(string, tag="1")] + pub fun_name: ::prost::alloc::string::String, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowExprNode { + #[prost(message, optional, boxed, tag="4")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="5")] + pub partition_by: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="6")] + pub order_by: ::prost::alloc::vec::Vec, + #[prost(oneof="window_expr_node::WindowFunction", tags="1, 2")] + pub window_function: ::core::option::Option, + /// repeated LogicalExprNode filter = 7; + #[prost(oneof="window_expr_node::WindowFrame", tags="8")] + pub window_frame: ::core::option::Option, +} +/// Nested message and enum types in `WindowExprNode`. +pub mod window_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WindowFunction { + #[prost(enumeration="super::AggregateFunction", tag="1")] + AggrFunction(i32), + /// udaf = 3 + #[prost(enumeration="super::BuiltInWindowFunction", tag="2")] + BuiltInFunction(i32), + } + /// repeated LogicalExprNode filter = 7; + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WindowFrame { + #[prost(message, tag="8")] + Frame(super::WindowFrame), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BetweenNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub negated: bool, + #[prost(message, optional, boxed, tag="3")] + pub low: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="4")] + pub high: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LikeNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ILikeNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SimilarToNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CaseNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub when_then_expr: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag="3")] + pub else_expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WhenThen { + #[prost(message, optional, tag="1")] + pub when_expr: ::core::option::Option, + #[prost(message, optional, tag="2")] + pub then_expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CastNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub arrow_type: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TryCastNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub arrow_type: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortExprNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub asc: bool, + #[prost(bool, tag="3")] + pub nulls_first: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowFrame { + #[prost(enumeration="WindowFrameUnits", tag="1")] + pub window_frame_units: i32, + #[prost(message, optional, tag="2")] + pub start_bound: ::core::option::Option, + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[prost(oneof="window_frame::EndBound", tags="3")] + pub end_bound: ::core::option::Option, +} +/// Nested message and enum types in `WindowFrame`. +pub mod window_frame { + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum EndBound { + #[prost(message, tag="3")] + Bound(super::WindowFrameBound), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowFrameBound { + #[prost(enumeration="WindowFrameBoundType", tag="1")] + pub window_frame_bound_type: i32, + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[prost(oneof="window_frame_bound::BoundValue", tags="2")] + pub bound_value: ::core::option::Option, +} +/// Nested message and enum types in `WindowFrameBound`. +pub mod window_frame_bound { + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum BoundValue { + #[prost(uint64, tag="2")] + Value(u64), + } +} +// ///////////////////////////////////////////////////////////////////////////////////////////////// +// Arrow Data Types +// ///////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Schema { + #[prost(message, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Field { + /// name of the field + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, boxed, tag="2")] + pub arrow_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="3")] + pub nullable: bool, + /// for complex data types like structs, unions + #[prost(message, repeated, tag="4")] + pub children: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FixedSizeBinary { + #[prost(int32, tag="1")] + pub length: i32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Timestamp { + #[prost(enumeration="TimeUnit", tag="1")] + pub time_unit: i32, + #[prost(string, tag="2")] + pub timezone: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Decimal { + #[prost(uint64, tag="1")] + pub whole: u64, + #[prost(uint64, tag="2")] + pub fractional: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct List { + #[prost(message, optional, boxed, tag="1")] + pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FixedSizeList { + #[prost(message, optional, boxed, tag="1")] + pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(int32, tag="2")] + pub list_size: i32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Dictionary { + #[prost(message, optional, boxed, tag="1")] + pub key: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub value: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Struct { + #[prost(message, repeated, tag="1")] + pub sub_field_types: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Union { + #[prost(message, repeated, tag="1")] + pub union_types: ::prost::alloc::vec::Vec, + #[prost(enumeration="UnionMode", tag="2")] + pub union_mode: i32, + #[prost(int32, repeated, tag="3")] + pub type_ids: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarListValue { + /// encode null explicitly to distinguish a list with a null value + /// from a list with no values) + #[prost(bool, tag="3")] + pub is_null: bool, + #[prost(message, optional, tag="1")] + pub field: ::core::option::Option, + #[prost(message, repeated, tag="2")] + pub values: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTimestampValue { + #[prost(string, tag="5")] + pub timezone: ::prost::alloc::string::String, + #[prost(oneof="scalar_timestamp_value::Value", tags="1, 2, 3, 4")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTimestampValue`. +pub mod scalar_timestamp_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int64, tag="1")] + TimeMicrosecondValue(i64), + #[prost(int64, tag="2")] + TimeNanosecondValue(i64), + #[prost(int64, tag="3")] + TimeSecondValue(i64), + #[prost(int64, tag="4")] + TimeMillisecondValue(i64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarDictionaryValue { + #[prost(message, optional, tag="1")] + pub index_type: ::core::option::Option, + #[prost(message, optional, boxed, tag="2")] + pub value: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IntervalMonthDayNanoValue { + #[prost(int32, tag="1")] + pub months: i32, + #[prost(int32, tag="2")] + pub days: i32, + #[prost(int64, tag="3")] + pub nanos: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StructValue { + /// Note that a null struct value must have one or more fields, so we + /// encode a null StructValue as one witth an empty field_values + /// list. + #[prost(message, repeated, tag="2")] + pub field_values: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="3")] + pub fields: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarValue { + #[prost(oneof="scalar_value::Value", tags="33, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarValue`. +pub mod scalar_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + /// was PrimitiveScalarType null_value = 19; + /// Null value of any type + #[prost(message, tag="33")] + NullValue(super::ArrowType), + #[prost(bool, tag="1")] + BoolValue(bool), + #[prost(string, tag="2")] + Utf8Value(::prost::alloc::string::String), + #[prost(string, tag="3")] + LargeUtf8Value(::prost::alloc::string::String), + #[prost(int32, tag="4")] + Int8Value(i32), + #[prost(int32, tag="5")] + Int16Value(i32), + #[prost(int32, tag="6")] + Int32Value(i32), + #[prost(int64, tag="7")] + Int64Value(i64), + #[prost(uint32, tag="8")] + Uint8Value(u32), + #[prost(uint32, tag="9")] + Uint16Value(u32), + #[prost(uint32, tag="10")] + Uint32Value(u32), + #[prost(uint64, tag="11")] + Uint64Value(u64), + #[prost(float, tag="12")] + Float32Value(f32), + #[prost(double, tag="13")] + Float64Value(f64), + /// Literal Date32 value always has a unit of day + #[prost(int32, tag="14")] + Date32Value(i32), + /// WAS: ScalarType null_list_value = 18; + #[prost(message, tag="17")] + ListValue(super::ScalarListValue), + #[prost(message, tag="20")] + Decimal128Value(super::Decimal128), + #[prost(int64, tag="21")] + Date64Value(i64), + #[prost(int32, tag="24")] + IntervalYearmonthValue(i32), + #[prost(int64, tag="25")] + IntervalDaytimeValue(i64), + #[prost(message, tag="26")] + TimestampValue(super::ScalarTimestampValue), + #[prost(message, tag="27")] + DictionaryValue(::prost::alloc::boxed::Box), + #[prost(bytes, tag="28")] + BinaryValue(::prost::alloc::vec::Vec), + #[prost(bytes, tag="29")] + LargeBinaryValue(::prost::alloc::vec::Vec), + #[prost(int64, tag="30")] + Time64Value(i64), + #[prost(message, tag="31")] + IntervalMonthDayNano(super::IntervalMonthDayNanoValue), + #[prost(message, tag="32")] + StructValue(super::StructValue), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Decimal128 { + #[prost(bytes="vec", tag="1")] + pub value: ::prost::alloc::vec::Vec, + #[prost(int64, tag="2")] + pub p: i64, + #[prost(int64, tag="3")] + pub s: i64, +} +/// Serialized data type +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ArrowType { + #[prost(oneof="arrow_type::ArrowTypeEnum", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30")] + pub arrow_type_enum: ::core::option::Option, +} +/// Nested message and enum types in `ArrowType`. +pub mod arrow_type { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ArrowTypeEnum { + /// arrow::Type::NA + #[prost(message, tag="1")] + None(super::EmptyMessage), + /// arrow::Type::BOOL + #[prost(message, tag="2")] + Bool(super::EmptyMessage), + /// arrow::Type::UINT8 + #[prost(message, tag="3")] + Uint8(super::EmptyMessage), + /// arrow::Type::INT8 + #[prost(message, tag="4")] + Int8(super::EmptyMessage), + /// represents arrow::Type fields in src/arrow/type.h + #[prost(message, tag="5")] + Uint16(super::EmptyMessage), + #[prost(message, tag="6")] + Int16(super::EmptyMessage), + #[prost(message, tag="7")] + Uint32(super::EmptyMessage), + #[prost(message, tag="8")] + Int32(super::EmptyMessage), + #[prost(message, tag="9")] + Uint64(super::EmptyMessage), + #[prost(message, tag="10")] + Int64(super::EmptyMessage), + #[prost(message, tag="11")] + Float16(super::EmptyMessage), + #[prost(message, tag="12")] + Float32(super::EmptyMessage), + #[prost(message, tag="13")] + Float64(super::EmptyMessage), + #[prost(message, tag="14")] + Utf8(super::EmptyMessage), + #[prost(message, tag="32")] + LargeUtf8(super::EmptyMessage), + #[prost(message, tag="15")] + Binary(super::EmptyMessage), + #[prost(int32, tag="16")] + FixedSizeBinary(i32), + #[prost(message, tag="31")] + LargeBinary(super::EmptyMessage), + #[prost(message, tag="17")] + Date32(super::EmptyMessage), + #[prost(message, tag="18")] + Date64(super::EmptyMessage), + #[prost(enumeration="super::TimeUnit", tag="19")] + Duration(i32), + #[prost(message, tag="20")] + Timestamp(super::Timestamp), + #[prost(enumeration="super::TimeUnit", tag="21")] + Time32(i32), + #[prost(enumeration="super::TimeUnit", tag="22")] + Time64(i32), + #[prost(enumeration="super::IntervalUnit", tag="23")] + Interval(i32), + #[prost(message, tag="24")] + Decimal(super::Decimal), + #[prost(message, tag="25")] + List(::prost::alloc::boxed::Box), + #[prost(message, tag="26")] + LargeList(::prost::alloc::boxed::Box), + #[prost(message, tag="27")] + FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag="28")] + Struct(super::Struct), + #[prost(message, tag="29")] + Union(super::Union), + #[prost(message, tag="30")] + Dictionary(::prost::alloc::boxed::Box), + } +} +/// Useful for representing an empty enum variant in rust +/// E.G. enum example{One, Two(i32)} +/// maps to +/// message example{ +/// oneof{ +/// EmptyMessage One = 1; +/// i32 Two = 2; +/// } +/// } +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EmptyMessage { +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OptimizedLogicalPlanType { + #[prost(string, tag="1")] + pub optimizer_name: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OptimizedPhysicalPlanType { + #[prost(string, tag="1")] + pub optimizer_name: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PlanType { + #[prost(oneof="plan_type::PlanTypeEnum", tags="1, 2, 3, 4, 5, 6")] + pub plan_type_enum: ::core::option::Option, +} +/// Nested message and enum types in `PlanType`. +pub mod plan_type { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PlanTypeEnum { + #[prost(message, tag="1")] + InitialLogicalPlan(super::EmptyMessage), + #[prost(message, tag="2")] + OptimizedLogicalPlan(super::OptimizedLogicalPlanType), + #[prost(message, tag="3")] + FinalLogicalPlan(super::EmptyMessage), + #[prost(message, tag="4")] + InitialPhysicalPlan(super::EmptyMessage), + #[prost(message, tag="5")] + OptimizedPhysicalPlan(super::OptimizedPhysicalPlanType), + #[prost(message, tag="6")] + FinalPhysicalPlan(super::EmptyMessage), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StringifiedPlan { + #[prost(message, optional, tag="1")] + pub plan_type: ::core::option::Option, + #[prost(string, tag="2")] + pub plan: ::prost::alloc::string::String, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum JoinType { + Inner = 0, + Left = 1, + Right = 2, + Full = 3, + Semi = 4, + Anti = 5, +} +impl JoinType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + JoinType::Inner => "INNER", + JoinType::Left => "LEFT", + JoinType::Right => "RIGHT", + JoinType::Full => "FULL", + JoinType::Semi => "SEMI", + JoinType::Anti => "ANTI", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum JoinConstraint { + On = 0, + Using = 1, +} +impl JoinConstraint { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + JoinConstraint::On => "ON", + JoinConstraint::Using => "USING", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum ScalarFunction { + Abs = 0, + Acos = 1, + Asin = 2, + Atan = 3, + Ascii = 4, + Ceil = 5, + Cos = 6, + Digest = 7, + Exp = 8, + Floor = 9, + Ln = 10, + Log = 11, + Log10 = 12, + Log2 = 13, + Round = 14, + Signum = 15, + Sin = 16, + Sqrt = 17, + Tan = 18, + Trunc = 19, + Array = 20, + RegexpMatch = 21, + BitLength = 22, + Btrim = 23, + CharacterLength = 24, + Chr = 25, + Concat = 26, + ConcatWithSeparator = 27, + DatePart = 28, + DateTrunc = 29, + InitCap = 30, + Left = 31, + Lpad = 32, + Lower = 33, + Ltrim = 34, + Md5 = 35, + NullIf = 36, + OctetLength = 37, + Random = 38, + RegexpReplace = 39, + Repeat = 40, + Replace = 41, + Reverse = 42, + Right = 43, + Rpad = 44, + Rtrim = 45, + Sha224 = 46, + Sha256 = 47, + Sha384 = 48, + Sha512 = 49, + SplitPart = 50, + StartsWith = 51, + Strpos = 52, + Substr = 53, + ToHex = 54, + ToTimestamp = 55, + ToTimestampMillis = 56, + ToTimestampMicros = 57, + ToTimestampSeconds = 58, + Now = 59, + Translate = 60, + Trim = 61, + Upper = 62, + Coalesce = 63, + Power = 64, + StructFun = 65, + FromUnixtime = 66, + Atan2 = 67, + DateBin = 68, + ArrowTypeof = 69, +} +impl ScalarFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + ScalarFunction::Abs => "Abs", + ScalarFunction::Acos => "Acos", + ScalarFunction::Asin => "Asin", + ScalarFunction::Atan => "Atan", + ScalarFunction::Ascii => "Ascii", + ScalarFunction::Ceil => "Ceil", + ScalarFunction::Cos => "Cos", + ScalarFunction::Digest => "Digest", + ScalarFunction::Exp => "Exp", + ScalarFunction::Floor => "Floor", + ScalarFunction::Ln => "Ln", + ScalarFunction::Log => "Log", + ScalarFunction::Log10 => "Log10", + ScalarFunction::Log2 => "Log2", + ScalarFunction::Round => "Round", + ScalarFunction::Signum => "Signum", + ScalarFunction::Sin => "Sin", + ScalarFunction::Sqrt => "Sqrt", + ScalarFunction::Tan => "Tan", + ScalarFunction::Trunc => "Trunc", + ScalarFunction::Array => "Array", + ScalarFunction::RegexpMatch => "RegexpMatch", + ScalarFunction::BitLength => "BitLength", + ScalarFunction::Btrim => "Btrim", + ScalarFunction::CharacterLength => "CharacterLength", + ScalarFunction::Chr => "Chr", + ScalarFunction::Concat => "Concat", + ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator", + ScalarFunction::DatePart => "DatePart", + ScalarFunction::DateTrunc => "DateTrunc", + ScalarFunction::InitCap => "InitCap", + ScalarFunction::Left => "Left", + ScalarFunction::Lpad => "Lpad", + ScalarFunction::Lower => "Lower", + ScalarFunction::Ltrim => "Ltrim", + ScalarFunction::Md5 => "MD5", + ScalarFunction::NullIf => "NullIf", + ScalarFunction::OctetLength => "OctetLength", + ScalarFunction::Random => "Random", + ScalarFunction::RegexpReplace => "RegexpReplace", + ScalarFunction::Repeat => "Repeat", + ScalarFunction::Replace => "Replace", + ScalarFunction::Reverse => "Reverse", + ScalarFunction::Right => "Right", + ScalarFunction::Rpad => "Rpad", + ScalarFunction::Rtrim => "Rtrim", + ScalarFunction::Sha224 => "SHA224", + ScalarFunction::Sha256 => "SHA256", + ScalarFunction::Sha384 => "SHA384", + ScalarFunction::Sha512 => "SHA512", + ScalarFunction::SplitPart => "SplitPart", + ScalarFunction::StartsWith => "StartsWith", + ScalarFunction::Strpos => "Strpos", + ScalarFunction::Substr => "Substr", + ScalarFunction::ToHex => "ToHex", + ScalarFunction::ToTimestamp => "ToTimestamp", + ScalarFunction::ToTimestampMillis => "ToTimestampMillis", + ScalarFunction::ToTimestampMicros => "ToTimestampMicros", + ScalarFunction::ToTimestampSeconds => "ToTimestampSeconds", + ScalarFunction::Now => "Now", + ScalarFunction::Translate => "Translate", + ScalarFunction::Trim => "Trim", + ScalarFunction::Upper => "Upper", + ScalarFunction::Coalesce => "Coalesce", + ScalarFunction::Power => "Power", + ScalarFunction::StructFun => "StructFun", + ScalarFunction::FromUnixtime => "FromUnixtime", + ScalarFunction::Atan2 => "Atan2", + ScalarFunction::DateBin => "DateBin", + ScalarFunction::ArrowTypeof => "ArrowTypeof", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum AggregateFunction { + Min = 0, + Max = 1, + Sum = 2, + Avg = 3, + Count = 4, + ApproxDistinct = 5, + ArrayAgg = 6, + Variance = 7, + VariancePop = 8, + Covariance = 9, + CovariancePop = 10, + Stddev = 11, + StddevPop = 12, + Correlation = 13, + ApproxPercentileCont = 14, + ApproxMedian = 15, + ApproxPercentileContWithWeight = 16, + Grouping = 17, + Median = 18, +} +impl AggregateFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + AggregateFunction::Min => "MIN", + AggregateFunction::Max => "MAX", + AggregateFunction::Sum => "SUM", + AggregateFunction::Avg => "AVG", + AggregateFunction::Count => "COUNT", + AggregateFunction::ApproxDistinct => "APPROX_DISTINCT", + AggregateFunction::ArrayAgg => "ARRAY_AGG", + AggregateFunction::Variance => "VARIANCE", + AggregateFunction::VariancePop => "VARIANCE_POP", + AggregateFunction::Covariance => "COVARIANCE", + AggregateFunction::CovariancePop => "COVARIANCE_POP", + AggregateFunction::Stddev => "STDDEV", + AggregateFunction::StddevPop => "STDDEV_POP", + AggregateFunction::Correlation => "CORRELATION", + AggregateFunction::ApproxPercentileCont => "APPROX_PERCENTILE_CONT", + AggregateFunction::ApproxMedian => "APPROX_MEDIAN", + AggregateFunction::ApproxPercentileContWithWeight => "APPROX_PERCENTILE_CONT_WITH_WEIGHT", + AggregateFunction::Grouping => "GROUPING", + AggregateFunction::Median => "MEDIAN", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum BuiltInWindowFunction { + RowNumber = 0, + Rank = 1, + DenseRank = 2, + PercentRank = 3, + CumeDist = 4, + Ntile = 5, + Lag = 6, + Lead = 7, + FirstValue = 8, + LastValue = 9, + NthValue = 10, +} +impl BuiltInWindowFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + BuiltInWindowFunction::RowNumber => "ROW_NUMBER", + BuiltInWindowFunction::Rank => "RANK", + BuiltInWindowFunction::DenseRank => "DENSE_RANK", + BuiltInWindowFunction::PercentRank => "PERCENT_RANK", + BuiltInWindowFunction::CumeDist => "CUME_DIST", + BuiltInWindowFunction::Ntile => "NTILE", + BuiltInWindowFunction::Lag => "LAG", + BuiltInWindowFunction::Lead => "LEAD", + BuiltInWindowFunction::FirstValue => "FIRST_VALUE", + BuiltInWindowFunction::LastValue => "LAST_VALUE", + BuiltInWindowFunction::NthValue => "NTH_VALUE", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum WindowFrameUnits { + Rows = 0, + Range = 1, + Groups = 2, +} +impl WindowFrameUnits { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + WindowFrameUnits::Rows => "ROWS", + WindowFrameUnits::Range => "RANGE", + WindowFrameUnits::Groups => "GROUPS", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum WindowFrameBoundType { + CurrentRow = 0, + Preceding = 1, + Following = 2, +} +impl WindowFrameBoundType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + WindowFrameBoundType::CurrentRow => "CURRENT_ROW", + WindowFrameBoundType::Preceding => "PRECEDING", + WindowFrameBoundType::Following => "FOLLOWING", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum DateUnit { + Day = 0, + DateMillisecond = 1, +} +impl DateUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + DateUnit::Day => "Day", + DateUnit::DateMillisecond => "DateMillisecond", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum TimeUnit { + Second = 0, + Millisecond = 1, + Microsecond = 2, + Nanosecond = 3, +} +impl TimeUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + TimeUnit::Second => "Second", + TimeUnit::Millisecond => "Millisecond", + TimeUnit::Microsecond => "Microsecond", + TimeUnit::Nanosecond => "Nanosecond", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum IntervalUnit { + YearMonth = 0, + DayTime = 1, + MonthDayNano = 2, +} +impl IntervalUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + IntervalUnit::YearMonth => "YearMonth", + IntervalUnit::DayTime => "DayTime", + IntervalUnit::MonthDayNano => "MonthDayNano", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum UnionMode { + Sparse = 0, + Dense = 1, +} +impl UnionMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + UnionMode::Sparse => "sparse", + UnionMode::Dense => "dense", + } + } +} diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 22964281a6dc..d5b80d25711c 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -10824,6 +10824,9 @@ impl serde::Serialize for LogicalExprNode { logical_expr_node::ExprType::SimilarTo(v) => { struct_ser.serialize_field("similarTo", v)?; } + logical_expr_node::ExprType::Placeholder(v) => { + struct_ser.serialize_field("placeholder", v)?; + } } } struct_ser.end() @@ -10890,6 +10893,7 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "ilike", "similar_to", "similarTo", + "placeholder", ]; #[allow(clippy::enum_variant_names)] @@ -10927,6 +10931,7 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { Like, Ilike, SimilarTo, + Placeholder, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -10981,6 +10986,7 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "like" => Ok(GeneratedField::Like), "ilike" => Ok(GeneratedField::Ilike), "similarTo" | "similar_to" => Ok(GeneratedField::SimilarTo), + "placeholder" => Ok(GeneratedField::Placeholder), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -11231,6 +11237,13 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { return Err(serde::de::Error::duplicate_field("similarTo")); } expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::SimilarTo) +; + } + GeneratedField::Placeholder => { + if expr_type__.is_some() { + return Err(serde::de::Error::duplicate_field("placeholder")); + } + expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::Placeholder) ; } } @@ -11439,6 +11452,9 @@ impl serde::Serialize for LogicalPlanNode { logical_plan_node::LogicalPlanType::CustomScan(v) => { struct_ser.serialize_field("customScan", v)?; } + logical_plan_node::LogicalPlanType::Prepare(v) => { + struct_ser.serialize_field("prepare", v)?; + } } } struct_ser.end() @@ -11485,6 +11501,7 @@ impl<'de> serde::Deserialize<'de> for LogicalPlanNode { "viewScan", "custom_scan", "customScan", + "prepare", ]; #[allow(clippy::enum_variant_names)] @@ -11513,6 +11530,7 @@ impl<'de> serde::Deserialize<'de> for LogicalPlanNode { Distinct, ViewScan, CustomScan, + Prepare, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -11558,6 +11576,7 @@ impl<'de> serde::Deserialize<'de> for LogicalPlanNode { "distinct" => Ok(GeneratedField::Distinct), "viewScan" | "view_scan" => Ok(GeneratedField::ViewScan), "customScan" | "custom_scan" => Ok(GeneratedField::CustomScan), + "prepare" => Ok(GeneratedField::Prepare), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -11746,6 +11765,13 @@ impl<'de> serde::Deserialize<'de> for LogicalPlanNode { return Err(serde::de::Error::duplicate_field("customScan")); } logical_plan_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_plan_node::LogicalPlanType::CustomScan) +; + } + GeneratedField::Prepare => { + if logical_plan_type__.is_some() { + return Err(serde::de::Error::duplicate_field("prepare")); + } + logical_plan_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_plan_node::LogicalPlanType::Prepare) ; } } @@ -15973,6 +15999,97 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode { deserializer.deserialize_struct("datafusion.PhysicalWindowExprNode", FIELDS, GeneratedVisitor) } } +impl serde::Serialize for PlaceholderNode { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if !self.param.is_empty() { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.PlaceholderNode", len)?; + if !self.param.is_empty() { + struct_ser.serialize_field("param", &self.param)?; + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for PlaceholderNode { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "param", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + Param, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "param" => Ok(GeneratedField::Param), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = PlaceholderNode; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.PlaceholderNode") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut param__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::Param => { + if param__.is_some() { + return Err(serde::de::Error::duplicate_field("param")); + } + param__ = Some(map.next_value()?); + } + } + } + Ok(PlaceholderNode { + param: param__.unwrap_or_default(), + }) + } + } + deserializer.deserialize_struct("datafusion.PlaceholderNode", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for PlanType { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result @@ -16134,6 +16251,132 @@ impl<'de> serde::Deserialize<'de> for PlanType { deserializer.deserialize_struct("datafusion.PlanType", FIELDS, GeneratedVisitor) } } +impl serde::Serialize for PrepareNode { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if !self.name.is_empty() { + len += 1; + } + if !self.data_types.is_empty() { + len += 1; + } + if self.input.is_some() { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.PrepareNode", len)?; + if !self.name.is_empty() { + struct_ser.serialize_field("name", &self.name)?; + } + if !self.data_types.is_empty() { + struct_ser.serialize_field("dataTypes", &self.data_types)?; + } + if let Some(v) = self.input.as_ref() { + struct_ser.serialize_field("input", v)?; + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for PrepareNode { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "name", + "data_types", + "dataTypes", + "input", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + Name, + DataTypes, + Input, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "name" => Ok(GeneratedField::Name), + "dataTypes" | "data_types" => Ok(GeneratedField::DataTypes), + "input" => Ok(GeneratedField::Input), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = PrepareNode; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.PrepareNode") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut name__ = None; + let mut data_types__ = None; + let mut input__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::Name => { + if name__.is_some() { + return Err(serde::de::Error::duplicate_field("name")); + } + name__ = Some(map.next_value()?); + } + GeneratedField::DataTypes => { + if data_types__.is_some() { + return Err(serde::de::Error::duplicate_field("dataTypes")); + } + data_types__ = Some(map.next_value()?); + } + GeneratedField::Input => { + if input__.is_some() { + return Err(serde::de::Error::duplicate_field("input")); + } + input__ = map.next_value()?; + } + } + } + Ok(PrepareNode { + name: name__.unwrap_or_default(), + data_types: data_types__.unwrap_or_default(), + input: input__, + }) + } + } + deserializer.deserialize_struct("datafusion.PrepareNode", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for ProjectionColumns { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 6350e20dc720..d5c391c1c619 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -33,7 +33,7 @@ pub struct DfSchema { pub struct LogicalPlanNode { #[prost( oneof = "logical_plan_node::LogicalPlanType", - tags = "1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25" + tags = "1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26" )] pub logical_plan_type: ::core::option::Option, } @@ -89,6 +89,8 @@ pub mod logical_plan_node { ViewScan(::prost::alloc::boxed::Box), #[prost(message, tag = "25")] CustomScan(super::CustomTableScanNode), + #[prost(message, tag = "26")] + Prepare(::prost::alloc::boxed::Box), } } #[derive(Clone, PartialEq, ::prost::Message)] @@ -275,6 +277,15 @@ pub struct CreateExternalTableNode { >, } #[derive(Clone, PartialEq, ::prost::Message)] +pub struct PrepareNode { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub data_types: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag = "3")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] pub struct CreateCatalogSchemaNode { #[prost(string, tag = "1")] pub schema_name: ::prost::alloc::string::String, @@ -406,7 +417,7 @@ pub struct SubqueryAliasNode { pub struct LogicalExprNode { #[prost( oneof = "logical_expr_node::ExprType", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34" )] pub expr_type: ::core::option::Option, } @@ -488,9 +499,16 @@ pub mod logical_expr_node { Ilike(::prost::alloc::boxed::Box), #[prost(message, tag = "33")] SimilarTo(::prost::alloc::boxed::Box), + #[prost(message, tag = "34")] + Placeholder(super::PlaceholderNode), } } #[derive(Clone, PartialEq, ::prost::Message)] +pub struct PlaceholderNode { + #[prost(string, tag = "1")] + pub param: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] pub struct LogicalExprList { #[prost(message, repeated, tag = "1")] pub expr: ::prost::alloc::vec::Vec, diff --git a/datafusion/proto/src/logical_plan.rs b/datafusion/proto/src/logical_plan.rs index 4c2827ed7f91..3cb326d7cbfb 100644 --- a/datafusion/proto/src/logical_plan.rs +++ b/datafusion/proto/src/logical_plan.rs @@ -25,7 +25,7 @@ use crate::{ }, to_proto, }; -use arrow::datatypes::{Schema, SchemaRef}; +use arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::datasource::TableProvider; use datafusion::{ datasource::{ @@ -39,6 +39,7 @@ use datafusion::{ prelude::SessionContext, }; use datafusion_common::{context, Column, DataFusionError}; +use datafusion_expr::logical_plan::Prepare; use datafusion_expr::{ logical_plan::{ Aggregate, CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateView, @@ -795,6 +796,18 @@ impl AsLogicalPlan for LogicalPlanNode { )? .build() } + LogicalPlanType::Prepare(prepare) => { + let input: LogicalPlan = + into_logical_plan!(prepare.input, ctx, extension_codec)?; + let data_types: Vec = prepare + .data_types + .iter() + .map(|t| DataType::try_from(t)) + .collect::>()?; + LogicalPlanBuilder::from(input) + .prepare(prepare.name.clone(), data_types)? + .build() + } } } @@ -1356,6 +1369,28 @@ impl AsLogicalPlan for LogicalPlanNode { )), }) } + LogicalPlan::Prepare(Prepare { + name, + data_types, + input, + }) => { + let input = protobuf::LogicalPlanNode::try_from_logical_plan( + input, + extension_codec, + )?; + Ok(protobuf::LogicalPlanNode { + logical_plan_type: Some(LogicalPlanType::Prepare(Box::new( + protobuf::PrepareNode { + name: name.clone(), + data_types: data_types + .iter() + .map(|t| t.try_into()) + .collect::, _>>()?, + input: Some(Box::new(input)), + }, + ))), + }) + } LogicalPlan::CreateMemoryTable(_) => Err(proto_error( "LogicalPlan serde is not yet implemented for CreateMemoryTable", )), diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index a73565ae6bae..94d2e35ee0ea 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -27,7 +27,7 @@ use crate::protobuf::{ OptimizedLogicalPlan, OptimizedPhysicalPlan, }, CubeNode, EmptyMessage, GroupingSetNode, LogicalExprList, OptimizedLogicalPlanType, - OptimizedPhysicalPlanType, RollupNode, + OptimizedPhysicalPlanType, PlaceholderNode, RollupNode, }; use arrow::datatypes::{ DataType, Field, IntervalMonthDayNanoType, IntervalUnit, Schema, SchemaRef, TimeUnit, @@ -893,6 +893,9 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { .collect::, Self::Error>>()?, })), }, + Expr::Placeholder(param) => Self { + expr_type: Some(ExprType::Placeholder(PlaceholderNode { param: param.clone() })), + }, Expr::QualifiedWildcard { .. } | Expr::TryCast { .. } => return Err(Error::General("Proto serialization error: Expr::QualifiedWildcard { .. } | Expr::TryCast { .. } not supported".to_string())), diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 4744417f6225..b5cb4cd15f8b 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -728,4 +728,34 @@ mod tests { Ok(()) } + + // TODO: remove these 2 tests because they were tested in sqlparser + // This is just for me to see how the statements look like + #[ignore] + #[test] + fn create_prepared_statement() -> Result<(), ParserError> { + // positive case + let sql = "PREPARE my_plan(TIME, INT) AS SELECT region FROM cpu WHERE time = $1 and usage_user > $2"; + let statements = DFParser::parse_sql(sql)?; + + println!("{:#?}", statements[0]); + assert_eq!(statements.len(), 1); + + let sql = "SELECT region FROM cpu WHERE time = 10 and usage_user > 20"; + let statements = DFParser::parse_sql(sql)?; + println!("{:#?}", statements[0]); + assert_eq!(statements.len(), 1); + + Ok(()) + } + + #[test] + fn execute_statement() -> Result<(), ParserError> { + // positive case + let sql = "EXECUTE my_plan(1, '2022-11-30')"; + let statements = DFParser::parse_sql(sql)?; + println!("{:#?}", statements[0]); + assert_eq!(statements.len(), 1); + Ok(()) + } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index d52dfa08aa97..4a466cbe5481 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -46,7 +46,6 @@ use datafusion_expr::expr::{Between, BinaryExpr, Case, Cast, GroupingSet, Like}; use datafusion_expr::expr_rewriter::normalize_col; use datafusion_expr::expr_rewriter::normalize_col_with_schemas; use datafusion_expr::logical_plan::builder::{project_with_alias, with_alias}; -use datafusion_expr::logical_plan::Join as HashJoin; use datafusion_expr::logical_plan::JoinConstraint as HashJoinConstraint; use datafusion_expr::logical_plan::{ Analyze, CreateCatalog, CreateCatalogSchema, @@ -55,6 +54,7 @@ use datafusion_expr::logical_plan::{ Partitioning, PlanType, SetVariable, ToStringifiedPlan, }; use datafusion_expr::logical_plan::{Filter, Subquery}; +use datafusion_expr::logical_plan::{Join as HashJoin, Prepare}; use datafusion_expr::utils::{ can_hash, check_all_column_from_schema, expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_column_exprs, @@ -331,6 +331,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .to_string(), )), }, + Statement::Prepare { + name, + data_types, + statement, + } => { + let plan = self.sql_statement_to_plan(*statement)?; + Ok(LogicalPlan::Prepare(Prepare { + name: name.to_string(), + data_types: data_types + .into_iter() + .map(|t| self.convert_data_type(&t)) + .collect::>()?, + input: Arc::new(plan), + })) + } Statement::ShowTables { extended, @@ -1740,6 +1755,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(Expr::Literal(ScalarValue::Null)) } SQLExpr::Value(Value::Boolean(n)) => Ok(lit(n)), + SQLExpr::Value(Value::Placeholder(param)) => { + Ok(Expr::Placeholder(param)) + } SQLExpr::UnaryOp { op, expr } => self.parse_sql_unary_op( op, *expr, @@ -1790,6 +1808,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Value(Value::SingleQuotedString(ref s) | Value::DoubleQuotedString(ref s)) => Ok(lit(s.clone())), SQLExpr::Value(Value::Boolean(n)) => Ok(lit(n)), SQLExpr::Value(Value::Null) => Ok(Expr::Literal(ScalarValue::Null)), + SQLExpr::Value(Value::Placeholder(param)) => Ok(Expr::Placeholder(param)), SQLExpr::Extract { field, expr } => Ok(Expr::ScalarFunction { fun: BuiltinScalarFunction::DatePart, args: vec![ @@ -5992,6 +6011,20 @@ mod tests { quick_test(sql, expected); } + // TODO: will ad more tests to cover maby other cases + #[test] + fn test_prepare_statement_to_plan() { + let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $1"; + //let statements = DFParser::parse_sql(sql).unwrap(); + + let expected = "Prepare: \"my_plan\" [Int32] \ + \n Projection: person.id, person.age\ + \n Filter: person.age = $1\ + \n TableScan: person"; + + quick_test(sql, expected); + } + fn assert_field_not_found(err: DataFusionError, name: &str) { match err { DataFusionError::SchemaError { .. } => { diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index f12578fbb75f..60a3c17e63dd 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -411,6 +411,7 @@ where ))) } }, + Expr::Placeholder(param) => Ok(Expr::Placeholder(param.clone())), }, } } From ff0dc10a73a3242269035a0318284f32dea5e5bc Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Fri, 2 Dec 2022 17:04:57 -0500 Subject: [PATCH 02/17] fix: typo --- datafusion/sql/src/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 4a466cbe5481..f13711a90217 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -6011,7 +6011,7 @@ mod tests { quick_test(sql, expected); } - // TODO: will ad more tests to cover maby other cases + // TODO: will add more tests to cover many other cases #[test] fn test_prepare_statement_to_plan() { let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $1"; From 5a502882d7704e766b35c4d5420a66f1db680842 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Mon, 5 Dec 2022 09:37:32 -0500 Subject: [PATCH 03/17] chore: address preliminary review comments --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/logical_plan/plan.rs | 5 +- datafusion/proto/src/generated/datafusion.rs | 1512 ------------------ datafusion/proto/src/logical_plan.rs | 2 +- 5 files changed, 6 insertions(+), 1517 deletions(-) delete mode 100644 datafusion/proto/src/generated/datafusion.rs diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 48e9571b2511..cbe31ad80b88 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1273,7 +1273,7 @@ fn create_name(e: &Expr) -> Result { Expr::QualifiedWildcard { .. } => Err(DataFusionError::Internal( "Create name does not support qualified wildcard".to_string(), )), - Expr::Placeholder(param) => Ok(format!("{}", param)), + Expr::Placeholder(param) => Ok((*param).to_string()), } } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 2634e45cf798..838730bbe78d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -200,7 +200,7 @@ impl ExprSchemable for Expr { | Expr::IsNotFalse(_) | Expr::IsNotUnknown(_) | Expr::Exists { .. } - | Expr::Placeholder(_) => Ok(false), // todo: Placeholder should return false? + | Expr::Placeholder(_) => Ok(true), Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 2fce8ceaa76f..a9b556c1170d 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1368,12 +1368,13 @@ pub struct CreateExternalTable { pub options: HashMap, } -/// Prepare a statement +/// Prepare a statement but do not execute it. Prepare statements can have 0 or more +/// `Expr::Placeholder` expressions that are filled in during execution #[derive(Clone)] pub struct Prepare { /// The name of the statement pub name: String, - /// Data types of the parameters + /// Data types of the parameters ([`Expr::Placeholder`]) pub data_types: Vec, /// The logical plan of the statements pub input: Arc, diff --git a/datafusion/proto/src/generated/datafusion.rs b/datafusion/proto/src/generated/datafusion.rs deleted file mode 100644 index eee0732d41aa..000000000000 --- a/datafusion/proto/src/generated/datafusion.rs +++ /dev/null @@ -1,1512 +0,0 @@ -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ColumnRelation { - #[prost(string, tag="1")] - pub relation: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Column { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, tag="2")] - pub relation: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DfField { - #[prost(message, optional, tag="1")] - pub field: ::core::option::Option, - #[prost(message, optional, tag="2")] - pub qualifier: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DfSchema { - #[prost(message, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec, - #[prost(map="string, string", tag="2")] - pub metadata: ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, -} -/// logical plan -/// LogicalPlan is a nested type -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalPlanNode { - #[prost(oneof="logical_plan_node::LogicalPlanType", tags="1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25")] - pub logical_plan_type: ::core::option::Option, -} -/// Nested message and enum types in `LogicalPlanNode`. -pub mod logical_plan_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum LogicalPlanType { - #[prost(message, tag="1")] - ListingScan(super::ListingTableScanNode), - #[prost(message, tag="3")] - Projection(::prost::alloc::boxed::Box), - #[prost(message, tag="4")] - Selection(::prost::alloc::boxed::Box), - #[prost(message, tag="5")] - Limit(::prost::alloc::boxed::Box), - #[prost(message, tag="6")] - Aggregate(::prost::alloc::boxed::Box), - #[prost(message, tag="7")] - Join(::prost::alloc::boxed::Box), - #[prost(message, tag="8")] - Sort(::prost::alloc::boxed::Box), - #[prost(message, tag="9")] - Repartition(::prost::alloc::boxed::Box), - #[prost(message, tag="10")] - EmptyRelation(super::EmptyRelationNode), - #[prost(message, tag="11")] - CreateExternalTable(super::CreateExternalTableNode), - #[prost(message, tag="12")] - Explain(::prost::alloc::boxed::Box), - #[prost(message, tag="13")] - Window(::prost::alloc::boxed::Box), - #[prost(message, tag="14")] - Analyze(::prost::alloc::boxed::Box), - #[prost(message, tag="15")] - CrossJoin(::prost::alloc::boxed::Box), - #[prost(message, tag="16")] - Values(super::ValuesNode), - #[prost(message, tag="17")] - Extension(super::LogicalExtensionNode), - #[prost(message, tag="18")] - CreateCatalogSchema(super::CreateCatalogSchemaNode), - #[prost(message, tag="19")] - Union(super::UnionNode), - #[prost(message, tag="20")] - CreateCatalog(super::CreateCatalogNode), - #[prost(message, tag="21")] - SubqueryAlias(::prost::alloc::boxed::Box), - #[prost(message, tag="22")] - CreateView(::prost::alloc::boxed::Box), - #[prost(message, tag="23")] - Distinct(::prost::alloc::boxed::Box), - #[prost(message, tag="24")] - ViewScan(::prost::alloc::boxed::Box), - #[prost(message, tag="25")] - CustomScan(super::CustomTableScanNode), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExtensionNode { - #[prost(bytes="vec", tag="1")] - pub node: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="2")] - pub inputs: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ProjectionColumns { - #[prost(string, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CsvFormat { - #[prost(bool, tag="1")] - pub has_header: bool, - #[prost(string, tag="2")] - pub delimiter: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ParquetFormat { - #[prost(bool, tag="1")] - pub enable_pruning: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AvroFormat { -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListingTableScanNode { - #[prost(string, tag="1")] - pub table_name: ::prost::alloc::string::String, - #[prost(string, repeated, tag="2")] - pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(string, tag="3")] - pub file_extension: ::prost::alloc::string::String, - #[prost(message, optional, tag="4")] - pub projection: ::core::option::Option, - #[prost(message, optional, tag="5")] - pub schema: ::core::option::Option, - #[prost(message, repeated, tag="6")] - pub filters: ::prost::alloc::vec::Vec, - #[prost(string, repeated, tag="7")] - pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(bool, tag="8")] - pub collect_stat: bool, - #[prost(uint32, tag="9")] - pub target_partitions: u32, - #[prost(oneof="listing_table_scan_node::FileFormatType", tags="10, 11, 12")] - pub file_format_type: ::core::option::Option, -} -/// Nested message and enum types in `ListingTableScanNode`. -pub mod listing_table_scan_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum FileFormatType { - #[prost(message, tag="10")] - Csv(super::CsvFormat), - #[prost(message, tag="11")] - Parquet(super::ParquetFormat), - #[prost(message, tag="12")] - Avro(super::AvroFormat), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ViewTableScanNode { - #[prost(string, tag="1")] - pub table_name: ::prost::alloc::string::String, - #[prost(message, optional, boxed, tag="2")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, - #[prost(message, optional, tag="4")] - pub projection: ::core::option::Option, - #[prost(string, tag="5")] - pub definition: ::prost::alloc::string::String, -} -/// Logical Plan to Scan a CustomTableProvider registered at runtime -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CustomTableScanNode { - #[prost(string, tag="1")] - pub table_name: ::prost::alloc::string::String, - #[prost(message, optional, tag="2")] - pub projection: ::core::option::Option, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, - #[prost(message, repeated, tag="4")] - pub filters: ::prost::alloc::vec::Vec, - #[prost(bytes="vec", tag="5")] - pub custom_table_data: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ProjectionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - #[prost(oneof="projection_node::OptionalAlias", tags="3")] - pub optional_alias: ::core::option::Option, -} -/// Nested message and enum types in `ProjectionNode`. -pub mod projection_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum OptionalAlias { - #[prost(string, tag="3")] - Alias(::prost::alloc::string::String), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SelectionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SortNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - /// Maximum number of highest/lowest rows to fetch; negative means no limit - #[prost(int64, tag="3")] - pub fetch: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct RepartitionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(oneof="repartition_node::PartitionMethod", tags="2, 3")] - pub partition_method: ::core::option::Option, -} -/// Nested message and enum types in `RepartitionNode`. -pub mod repartition_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum PartitionMethod { - #[prost(uint64, tag="2")] - RoundRobin(u64), - #[prost(message, tag="3")] - Hash(super::HashRepartition), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct HashRepartition { - #[prost(message, repeated, tag="1")] - pub hash_expr: ::prost::alloc::vec::Vec, - #[prost(uint64, tag="2")] - pub partition_count: u64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct EmptyRelationNode { - #[prost(bool, tag="1")] - pub produce_one_row: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateExternalTableNode { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(string, tag="2")] - pub location: ::prost::alloc::string::String, - #[prost(string, tag="3")] - pub file_type: ::prost::alloc::string::String, - #[prost(bool, tag="4")] - pub has_header: bool, - #[prost(message, optional, tag="5")] - pub schema: ::core::option::Option, - #[prost(string, repeated, tag="6")] - pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(bool, tag="7")] - pub if_not_exists: bool, - #[prost(string, tag="8")] - pub delimiter: ::prost::alloc::string::String, - #[prost(string, tag="9")] - pub definition: ::prost::alloc::string::String, - #[prost(string, tag="10")] - pub file_compression_type: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateCatalogSchemaNode { - #[prost(string, tag="1")] - pub schema_name: ::prost::alloc::string::String, - #[prost(bool, tag="2")] - pub if_not_exists: bool, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateCatalogNode { - #[prost(string, tag="1")] - pub catalog_name: ::prost::alloc::string::String, - #[prost(bool, tag="2")] - pub if_not_exists: bool, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateViewNode { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, boxed, tag="2")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="3")] - pub or_replace: bool, - #[prost(string, tag="4")] - pub definition: ::prost::alloc::string::String, -} -/// a node containing data for defining values list. unlike in SQL where it's two dimensional, here -/// the list is flattened, and with the field n_cols it can be parsed and partitioned into rows -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ValuesNode { - #[prost(uint64, tag="1")] - pub n_cols: u64, - #[prost(message, repeated, tag="2")] - pub values_list: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AnalyzeNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub verbose: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ExplainNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub verbose: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub group_expr: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="3")] - pub aggr_expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub window_expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct JoinNode { - #[prost(message, optional, boxed, tag="1")] - pub left: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub right: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(enumeration="JoinType", tag="3")] - pub join_type: i32, - #[prost(enumeration="JoinConstraint", tag="4")] - pub join_constraint: i32, - #[prost(message, repeated, tag="5")] - pub left_join_column: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="6")] - pub right_join_column: ::prost::alloc::vec::Vec, - #[prost(bool, tag="7")] - pub null_equals_null: bool, - #[prost(message, optional, tag="8")] - pub filter: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DistinctNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct UnionNode { - #[prost(message, repeated, tag="1")] - pub inputs: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CrossJoinNode { - #[prost(message, optional, boxed, tag="1")] - pub left: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub right: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LimitNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - /// The number of rows to skip before fetch; non-positive means don't skip any - #[prost(int64, tag="2")] - pub skip: i64, - /// Maximum number of rows to fetch; negative means no limit - #[prost(int64, tag="3")] - pub fetch: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SelectionExecNode { - #[prost(message, optional, tag="1")] - pub expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SubqueryAliasNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="2")] - pub alias: ::prost::alloc::string::String, -} -/// logical expressions -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExprNode { - #[prost(oneof="logical_expr_node::ExprType", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] - pub expr_type: ::core::option::Option, -} -/// Nested message and enum types in `LogicalExprNode`. -pub mod logical_expr_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum ExprType { - /// column references - #[prost(message, tag="1")] - Column(super::Column), - /// alias - #[prost(message, tag="2")] - Alias(::prost::alloc::boxed::Box), - #[prost(message, tag="3")] - Literal(super::ScalarValue), - /// binary expressions - #[prost(message, tag="4")] - BinaryExpr(::prost::alloc::boxed::Box), - /// aggregate expressions - #[prost(message, tag="5")] - AggregateExpr(::prost::alloc::boxed::Box), - /// null checks - #[prost(message, tag="6")] - IsNullExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="7")] - IsNotNullExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="8")] - NotExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="9")] - Between(::prost::alloc::boxed::Box), - #[prost(message, tag="10")] - Case(::prost::alloc::boxed::Box), - #[prost(message, tag="11")] - Cast(::prost::alloc::boxed::Box), - #[prost(message, tag="12")] - Sort(::prost::alloc::boxed::Box), - #[prost(message, tag="13")] - Negative(::prost::alloc::boxed::Box), - #[prost(message, tag="14")] - InList(::prost::alloc::boxed::Box), - #[prost(bool, tag="15")] - Wildcard(bool), - #[prost(message, tag="16")] - ScalarFunction(super::ScalarFunctionNode), - #[prost(message, tag="17")] - TryCast(::prost::alloc::boxed::Box), - /// window expressions - #[prost(message, tag="18")] - WindowExpr(::prost::alloc::boxed::Box), - /// AggregateUDF expressions - #[prost(message, tag="19")] - AggregateUdfExpr(::prost::alloc::boxed::Box), - /// Scalar UDF expressions - #[prost(message, tag="20")] - ScalarUdfExpr(super::ScalarUdfExprNode), - #[prost(message, tag="21")] - GetIndexedField(::prost::alloc::boxed::Box), - #[prost(message, tag="22")] - GroupingSet(super::GroupingSetNode), - #[prost(message, tag="23")] - Cube(super::CubeNode), - #[prost(message, tag="24")] - Rollup(super::RollupNode), - #[prost(message, tag="25")] - IsTrue(::prost::alloc::boxed::Box), - #[prost(message, tag="26")] - IsFalse(::prost::alloc::boxed::Box), - #[prost(message, tag="27")] - IsUnknown(::prost::alloc::boxed::Box), - #[prost(message, tag="28")] - IsNotTrue(::prost::alloc::boxed::Box), - #[prost(message, tag="29")] - IsNotFalse(::prost::alloc::boxed::Box), - #[prost(message, tag="30")] - IsNotUnknown(::prost::alloc::boxed::Box), - #[prost(message, tag="31")] - Like(::prost::alloc::boxed::Box), - #[prost(message, tag="32")] - Ilike(::prost::alloc::boxed::Box), - #[prost(message, tag="33")] - SimilarTo(::prost::alloc::boxed::Box), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExprList { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct GroupingSetNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CubeNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct RollupNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct GetIndexedField { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub key: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNull { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotNull { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsTrue { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsFalse { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsUnknown { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotTrue { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotFalse { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotUnknown { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Not { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AliasNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="2")] - pub alias: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct BinaryExprNode { - #[prost(message, optional, boxed, tag="1")] - pub l: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub r: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="3")] - pub op: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct NegativeNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct InListNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub list: ::prost::alloc::vec::Vec, - #[prost(bool, tag="3")] - pub negated: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarFunctionNode { - #[prost(enumeration="ScalarFunction", tag="1")] - pub fun: i32, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateExprNode { - #[prost(enumeration="AggregateFunction", tag="1")] - pub aggr_function: i32, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - #[prost(bool, tag="3")] - pub distinct: bool, - #[prost(message, optional, boxed, tag="4")] - pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateUdfExprNode { - #[prost(string, tag="1")] - pub fun_name: ::prost::alloc::string::String, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, - #[prost(message, optional, boxed, tag="3")] - pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarUdfExprNode { - #[prost(string, tag="1")] - pub fun_name: ::prost::alloc::string::String, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowExprNode { - #[prost(message, optional, boxed, tag="4")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="5")] - pub partition_by: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="6")] - pub order_by: ::prost::alloc::vec::Vec, - #[prost(oneof="window_expr_node::WindowFunction", tags="1, 2")] - pub window_function: ::core::option::Option, - /// repeated LogicalExprNode filter = 7; - #[prost(oneof="window_expr_node::WindowFrame", tags="8")] - pub window_frame: ::core::option::Option, -} -/// Nested message and enum types in `WindowExprNode`. -pub mod window_expr_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum WindowFunction { - #[prost(enumeration="super::AggregateFunction", tag="1")] - AggrFunction(i32), - /// udaf = 3 - #[prost(enumeration="super::BuiltInWindowFunction", tag="2")] - BuiltInFunction(i32), - } - /// repeated LogicalExprNode filter = 7; - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum WindowFrame { - #[prost(message, tag="8")] - Frame(super::WindowFrame), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct BetweenNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub negated: bool, - #[prost(message, optional, boxed, tag="3")] - pub low: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="4")] - pub high: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LikeNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ILikeNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SimilarToNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CaseNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub when_then_expr: ::prost::alloc::vec::Vec, - #[prost(message, optional, boxed, tag="3")] - pub else_expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WhenThen { - #[prost(message, optional, tag="1")] - pub when_expr: ::core::option::Option, - #[prost(message, optional, tag="2")] - pub then_expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CastNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub arrow_type: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct TryCastNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub arrow_type: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SortExprNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub asc: bool, - #[prost(bool, tag="3")] - pub nulls_first: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowFrame { - #[prost(enumeration="WindowFrameUnits", tag="1")] - pub window_frame_units: i32, - #[prost(message, optional, tag="2")] - pub start_bound: ::core::option::Option, - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[prost(oneof="window_frame::EndBound", tags="3")] - pub end_bound: ::core::option::Option, -} -/// Nested message and enum types in `WindowFrame`. -pub mod window_frame { - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum EndBound { - #[prost(message, tag="3")] - Bound(super::WindowFrameBound), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowFrameBound { - #[prost(enumeration="WindowFrameBoundType", tag="1")] - pub window_frame_bound_type: i32, - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[prost(oneof="window_frame_bound::BoundValue", tags="2")] - pub bound_value: ::core::option::Option, -} -/// Nested message and enum types in `WindowFrameBound`. -pub mod window_frame_bound { - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum BoundValue { - #[prost(uint64, tag="2")] - Value(u64), - } -} -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// Arrow Data Types -// ///////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Schema { - #[prost(message, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Field { - /// name of the field - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, boxed, tag="2")] - pub arrow_type: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="3")] - pub nullable: bool, - /// for complex data types like structs, unions - #[prost(message, repeated, tag="4")] - pub children: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct FixedSizeBinary { - #[prost(int32, tag="1")] - pub length: i32, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Timestamp { - #[prost(enumeration="TimeUnit", tag="1")] - pub time_unit: i32, - #[prost(string, tag="2")] - pub timezone: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Decimal { - #[prost(uint64, tag="1")] - pub whole: u64, - #[prost(uint64, tag="2")] - pub fractional: u64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct List { - #[prost(message, optional, boxed, tag="1")] - pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct FixedSizeList { - #[prost(message, optional, boxed, tag="1")] - pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(int32, tag="2")] - pub list_size: i32, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Dictionary { - #[prost(message, optional, boxed, tag="1")] - pub key: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub value: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Struct { - #[prost(message, repeated, tag="1")] - pub sub_field_types: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Union { - #[prost(message, repeated, tag="1")] - pub union_types: ::prost::alloc::vec::Vec, - #[prost(enumeration="UnionMode", tag="2")] - pub union_mode: i32, - #[prost(int32, repeated, tag="3")] - pub type_ids: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarListValue { - /// encode null explicitly to distinguish a list with a null value - /// from a list with no values) - #[prost(bool, tag="3")] - pub is_null: bool, - #[prost(message, optional, tag="1")] - pub field: ::core::option::Option, - #[prost(message, repeated, tag="2")] - pub values: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarTimestampValue { - #[prost(string, tag="5")] - pub timezone: ::prost::alloc::string::String, - #[prost(oneof="scalar_timestamp_value::Value", tags="1, 2, 3, 4")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarTimestampValue`. -pub mod scalar_timestamp_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - #[prost(int64, tag="1")] - TimeMicrosecondValue(i64), - #[prost(int64, tag="2")] - TimeNanosecondValue(i64), - #[prost(int64, tag="3")] - TimeSecondValue(i64), - #[prost(int64, tag="4")] - TimeMillisecondValue(i64), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarDictionaryValue { - #[prost(message, optional, tag="1")] - pub index_type: ::core::option::Option, - #[prost(message, optional, boxed, tag="2")] - pub value: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IntervalMonthDayNanoValue { - #[prost(int32, tag="1")] - pub months: i32, - #[prost(int32, tag="2")] - pub days: i32, - #[prost(int64, tag="3")] - pub nanos: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct StructValue { - /// Note that a null struct value must have one or more fields, so we - /// encode a null StructValue as one witth an empty field_values - /// list. - #[prost(message, repeated, tag="2")] - pub field_values: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="3")] - pub fields: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarValue { - #[prost(oneof="scalar_value::Value", tags="33, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarValue`. -pub mod scalar_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - /// was PrimitiveScalarType null_value = 19; - /// Null value of any type - #[prost(message, tag="33")] - NullValue(super::ArrowType), - #[prost(bool, tag="1")] - BoolValue(bool), - #[prost(string, tag="2")] - Utf8Value(::prost::alloc::string::String), - #[prost(string, tag="3")] - LargeUtf8Value(::prost::alloc::string::String), - #[prost(int32, tag="4")] - Int8Value(i32), - #[prost(int32, tag="5")] - Int16Value(i32), - #[prost(int32, tag="6")] - Int32Value(i32), - #[prost(int64, tag="7")] - Int64Value(i64), - #[prost(uint32, tag="8")] - Uint8Value(u32), - #[prost(uint32, tag="9")] - Uint16Value(u32), - #[prost(uint32, tag="10")] - Uint32Value(u32), - #[prost(uint64, tag="11")] - Uint64Value(u64), - #[prost(float, tag="12")] - Float32Value(f32), - #[prost(double, tag="13")] - Float64Value(f64), - /// Literal Date32 value always has a unit of day - #[prost(int32, tag="14")] - Date32Value(i32), - /// WAS: ScalarType null_list_value = 18; - #[prost(message, tag="17")] - ListValue(super::ScalarListValue), - #[prost(message, tag="20")] - Decimal128Value(super::Decimal128), - #[prost(int64, tag="21")] - Date64Value(i64), - #[prost(int32, tag="24")] - IntervalYearmonthValue(i32), - #[prost(int64, tag="25")] - IntervalDaytimeValue(i64), - #[prost(message, tag="26")] - TimestampValue(super::ScalarTimestampValue), - #[prost(message, tag="27")] - DictionaryValue(::prost::alloc::boxed::Box), - #[prost(bytes, tag="28")] - BinaryValue(::prost::alloc::vec::Vec), - #[prost(bytes, tag="29")] - LargeBinaryValue(::prost::alloc::vec::Vec), - #[prost(int64, tag="30")] - Time64Value(i64), - #[prost(message, tag="31")] - IntervalMonthDayNano(super::IntervalMonthDayNanoValue), - #[prost(message, tag="32")] - StructValue(super::StructValue), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Decimal128 { - #[prost(bytes="vec", tag="1")] - pub value: ::prost::alloc::vec::Vec, - #[prost(int64, tag="2")] - pub p: i64, - #[prost(int64, tag="3")] - pub s: i64, -} -/// Serialized data type -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ArrowType { - #[prost(oneof="arrow_type::ArrowTypeEnum", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30")] - pub arrow_type_enum: ::core::option::Option, -} -/// Nested message and enum types in `ArrowType`. -pub mod arrow_type { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum ArrowTypeEnum { - /// arrow::Type::NA - #[prost(message, tag="1")] - None(super::EmptyMessage), - /// arrow::Type::BOOL - #[prost(message, tag="2")] - Bool(super::EmptyMessage), - /// arrow::Type::UINT8 - #[prost(message, tag="3")] - Uint8(super::EmptyMessage), - /// arrow::Type::INT8 - #[prost(message, tag="4")] - Int8(super::EmptyMessage), - /// represents arrow::Type fields in src/arrow/type.h - #[prost(message, tag="5")] - Uint16(super::EmptyMessage), - #[prost(message, tag="6")] - Int16(super::EmptyMessage), - #[prost(message, tag="7")] - Uint32(super::EmptyMessage), - #[prost(message, tag="8")] - Int32(super::EmptyMessage), - #[prost(message, tag="9")] - Uint64(super::EmptyMessage), - #[prost(message, tag="10")] - Int64(super::EmptyMessage), - #[prost(message, tag="11")] - Float16(super::EmptyMessage), - #[prost(message, tag="12")] - Float32(super::EmptyMessage), - #[prost(message, tag="13")] - Float64(super::EmptyMessage), - #[prost(message, tag="14")] - Utf8(super::EmptyMessage), - #[prost(message, tag="32")] - LargeUtf8(super::EmptyMessage), - #[prost(message, tag="15")] - Binary(super::EmptyMessage), - #[prost(int32, tag="16")] - FixedSizeBinary(i32), - #[prost(message, tag="31")] - LargeBinary(super::EmptyMessage), - #[prost(message, tag="17")] - Date32(super::EmptyMessage), - #[prost(message, tag="18")] - Date64(super::EmptyMessage), - #[prost(enumeration="super::TimeUnit", tag="19")] - Duration(i32), - #[prost(message, tag="20")] - Timestamp(super::Timestamp), - #[prost(enumeration="super::TimeUnit", tag="21")] - Time32(i32), - #[prost(enumeration="super::TimeUnit", tag="22")] - Time64(i32), - #[prost(enumeration="super::IntervalUnit", tag="23")] - Interval(i32), - #[prost(message, tag="24")] - Decimal(super::Decimal), - #[prost(message, tag="25")] - List(::prost::alloc::boxed::Box), - #[prost(message, tag="26")] - LargeList(::prost::alloc::boxed::Box), - #[prost(message, tag="27")] - FixedSizeList(::prost::alloc::boxed::Box), - #[prost(message, tag="28")] - Struct(super::Struct), - #[prost(message, tag="29")] - Union(super::Union), - #[prost(message, tag="30")] - Dictionary(::prost::alloc::boxed::Box), - } -} -/// Useful for representing an empty enum variant in rust -/// E.G. enum example{One, Two(i32)} -/// maps to -/// message example{ -/// oneof{ -/// EmptyMessage One = 1; -/// i32 Two = 2; -/// } -/// } -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct EmptyMessage { -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct OptimizedLogicalPlanType { - #[prost(string, tag="1")] - pub optimizer_name: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct OptimizedPhysicalPlanType { - #[prost(string, tag="1")] - pub optimizer_name: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct PlanType { - #[prost(oneof="plan_type::PlanTypeEnum", tags="1, 2, 3, 4, 5, 6")] - pub plan_type_enum: ::core::option::Option, -} -/// Nested message and enum types in `PlanType`. -pub mod plan_type { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum PlanTypeEnum { - #[prost(message, tag="1")] - InitialLogicalPlan(super::EmptyMessage), - #[prost(message, tag="2")] - OptimizedLogicalPlan(super::OptimizedLogicalPlanType), - #[prost(message, tag="3")] - FinalLogicalPlan(super::EmptyMessage), - #[prost(message, tag="4")] - InitialPhysicalPlan(super::EmptyMessage), - #[prost(message, tag="5")] - OptimizedPhysicalPlan(super::OptimizedPhysicalPlanType), - #[prost(message, tag="6")] - FinalPhysicalPlan(super::EmptyMessage), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct StringifiedPlan { - #[prost(message, optional, tag="1")] - pub plan_type: ::core::option::Option, - #[prost(string, tag="2")] - pub plan: ::prost::alloc::string::String, -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum JoinType { - Inner = 0, - Left = 1, - Right = 2, - Full = 3, - Semi = 4, - Anti = 5, -} -impl JoinType { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - JoinType::Inner => "INNER", - JoinType::Left => "LEFT", - JoinType::Right => "RIGHT", - JoinType::Full => "FULL", - JoinType::Semi => "SEMI", - JoinType::Anti => "ANTI", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum JoinConstraint { - On = 0, - Using = 1, -} -impl JoinConstraint { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - JoinConstraint::On => "ON", - JoinConstraint::Using => "USING", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum ScalarFunction { - Abs = 0, - Acos = 1, - Asin = 2, - Atan = 3, - Ascii = 4, - Ceil = 5, - Cos = 6, - Digest = 7, - Exp = 8, - Floor = 9, - Ln = 10, - Log = 11, - Log10 = 12, - Log2 = 13, - Round = 14, - Signum = 15, - Sin = 16, - Sqrt = 17, - Tan = 18, - Trunc = 19, - Array = 20, - RegexpMatch = 21, - BitLength = 22, - Btrim = 23, - CharacterLength = 24, - Chr = 25, - Concat = 26, - ConcatWithSeparator = 27, - DatePart = 28, - DateTrunc = 29, - InitCap = 30, - Left = 31, - Lpad = 32, - Lower = 33, - Ltrim = 34, - Md5 = 35, - NullIf = 36, - OctetLength = 37, - Random = 38, - RegexpReplace = 39, - Repeat = 40, - Replace = 41, - Reverse = 42, - Right = 43, - Rpad = 44, - Rtrim = 45, - Sha224 = 46, - Sha256 = 47, - Sha384 = 48, - Sha512 = 49, - SplitPart = 50, - StartsWith = 51, - Strpos = 52, - Substr = 53, - ToHex = 54, - ToTimestamp = 55, - ToTimestampMillis = 56, - ToTimestampMicros = 57, - ToTimestampSeconds = 58, - Now = 59, - Translate = 60, - Trim = 61, - Upper = 62, - Coalesce = 63, - Power = 64, - StructFun = 65, - FromUnixtime = 66, - Atan2 = 67, - DateBin = 68, - ArrowTypeof = 69, -} -impl ScalarFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - ScalarFunction::Abs => "Abs", - ScalarFunction::Acos => "Acos", - ScalarFunction::Asin => "Asin", - ScalarFunction::Atan => "Atan", - ScalarFunction::Ascii => "Ascii", - ScalarFunction::Ceil => "Ceil", - ScalarFunction::Cos => "Cos", - ScalarFunction::Digest => "Digest", - ScalarFunction::Exp => "Exp", - ScalarFunction::Floor => "Floor", - ScalarFunction::Ln => "Ln", - ScalarFunction::Log => "Log", - ScalarFunction::Log10 => "Log10", - ScalarFunction::Log2 => "Log2", - ScalarFunction::Round => "Round", - ScalarFunction::Signum => "Signum", - ScalarFunction::Sin => "Sin", - ScalarFunction::Sqrt => "Sqrt", - ScalarFunction::Tan => "Tan", - ScalarFunction::Trunc => "Trunc", - ScalarFunction::Array => "Array", - ScalarFunction::RegexpMatch => "RegexpMatch", - ScalarFunction::BitLength => "BitLength", - ScalarFunction::Btrim => "Btrim", - ScalarFunction::CharacterLength => "CharacterLength", - ScalarFunction::Chr => "Chr", - ScalarFunction::Concat => "Concat", - ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator", - ScalarFunction::DatePart => "DatePart", - ScalarFunction::DateTrunc => "DateTrunc", - ScalarFunction::InitCap => "InitCap", - ScalarFunction::Left => "Left", - ScalarFunction::Lpad => "Lpad", - ScalarFunction::Lower => "Lower", - ScalarFunction::Ltrim => "Ltrim", - ScalarFunction::Md5 => "MD5", - ScalarFunction::NullIf => "NullIf", - ScalarFunction::OctetLength => "OctetLength", - ScalarFunction::Random => "Random", - ScalarFunction::RegexpReplace => "RegexpReplace", - ScalarFunction::Repeat => "Repeat", - ScalarFunction::Replace => "Replace", - ScalarFunction::Reverse => "Reverse", - ScalarFunction::Right => "Right", - ScalarFunction::Rpad => "Rpad", - ScalarFunction::Rtrim => "Rtrim", - ScalarFunction::Sha224 => "SHA224", - ScalarFunction::Sha256 => "SHA256", - ScalarFunction::Sha384 => "SHA384", - ScalarFunction::Sha512 => "SHA512", - ScalarFunction::SplitPart => "SplitPart", - ScalarFunction::StartsWith => "StartsWith", - ScalarFunction::Strpos => "Strpos", - ScalarFunction::Substr => "Substr", - ScalarFunction::ToHex => "ToHex", - ScalarFunction::ToTimestamp => "ToTimestamp", - ScalarFunction::ToTimestampMillis => "ToTimestampMillis", - ScalarFunction::ToTimestampMicros => "ToTimestampMicros", - ScalarFunction::ToTimestampSeconds => "ToTimestampSeconds", - ScalarFunction::Now => "Now", - ScalarFunction::Translate => "Translate", - ScalarFunction::Trim => "Trim", - ScalarFunction::Upper => "Upper", - ScalarFunction::Coalesce => "Coalesce", - ScalarFunction::Power => "Power", - ScalarFunction::StructFun => "StructFun", - ScalarFunction::FromUnixtime => "FromUnixtime", - ScalarFunction::Atan2 => "Atan2", - ScalarFunction::DateBin => "DateBin", - ScalarFunction::ArrowTypeof => "ArrowTypeof", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum AggregateFunction { - Min = 0, - Max = 1, - Sum = 2, - Avg = 3, - Count = 4, - ApproxDistinct = 5, - ArrayAgg = 6, - Variance = 7, - VariancePop = 8, - Covariance = 9, - CovariancePop = 10, - Stddev = 11, - StddevPop = 12, - Correlation = 13, - ApproxPercentileCont = 14, - ApproxMedian = 15, - ApproxPercentileContWithWeight = 16, - Grouping = 17, - Median = 18, -} -impl AggregateFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - AggregateFunction::Min => "MIN", - AggregateFunction::Max => "MAX", - AggregateFunction::Sum => "SUM", - AggregateFunction::Avg => "AVG", - AggregateFunction::Count => "COUNT", - AggregateFunction::ApproxDistinct => "APPROX_DISTINCT", - AggregateFunction::ArrayAgg => "ARRAY_AGG", - AggregateFunction::Variance => "VARIANCE", - AggregateFunction::VariancePop => "VARIANCE_POP", - AggregateFunction::Covariance => "COVARIANCE", - AggregateFunction::CovariancePop => "COVARIANCE_POP", - AggregateFunction::Stddev => "STDDEV", - AggregateFunction::StddevPop => "STDDEV_POP", - AggregateFunction::Correlation => "CORRELATION", - AggregateFunction::ApproxPercentileCont => "APPROX_PERCENTILE_CONT", - AggregateFunction::ApproxMedian => "APPROX_MEDIAN", - AggregateFunction::ApproxPercentileContWithWeight => "APPROX_PERCENTILE_CONT_WITH_WEIGHT", - AggregateFunction::Grouping => "GROUPING", - AggregateFunction::Median => "MEDIAN", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum BuiltInWindowFunction { - RowNumber = 0, - Rank = 1, - DenseRank = 2, - PercentRank = 3, - CumeDist = 4, - Ntile = 5, - Lag = 6, - Lead = 7, - FirstValue = 8, - LastValue = 9, - NthValue = 10, -} -impl BuiltInWindowFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - BuiltInWindowFunction::RowNumber => "ROW_NUMBER", - BuiltInWindowFunction::Rank => "RANK", - BuiltInWindowFunction::DenseRank => "DENSE_RANK", - BuiltInWindowFunction::PercentRank => "PERCENT_RANK", - BuiltInWindowFunction::CumeDist => "CUME_DIST", - BuiltInWindowFunction::Ntile => "NTILE", - BuiltInWindowFunction::Lag => "LAG", - BuiltInWindowFunction::Lead => "LEAD", - BuiltInWindowFunction::FirstValue => "FIRST_VALUE", - BuiltInWindowFunction::LastValue => "LAST_VALUE", - BuiltInWindowFunction::NthValue => "NTH_VALUE", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum WindowFrameUnits { - Rows = 0, - Range = 1, - Groups = 2, -} -impl WindowFrameUnits { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - WindowFrameUnits::Rows => "ROWS", - WindowFrameUnits::Range => "RANGE", - WindowFrameUnits::Groups => "GROUPS", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum WindowFrameBoundType { - CurrentRow = 0, - Preceding = 1, - Following = 2, -} -impl WindowFrameBoundType { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - WindowFrameBoundType::CurrentRow => "CURRENT_ROW", - WindowFrameBoundType::Preceding => "PRECEDING", - WindowFrameBoundType::Following => "FOLLOWING", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum DateUnit { - Day = 0, - DateMillisecond = 1, -} -impl DateUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - DateUnit::Day => "Day", - DateUnit::DateMillisecond => "DateMillisecond", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum TimeUnit { - Second = 0, - Millisecond = 1, - Microsecond = 2, - Nanosecond = 3, -} -impl TimeUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - TimeUnit::Second => "Second", - TimeUnit::Millisecond => "Millisecond", - TimeUnit::Microsecond => "Microsecond", - TimeUnit::Nanosecond => "Nanosecond", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum IntervalUnit { - YearMonth = 0, - DayTime = 1, - MonthDayNano = 2, -} -impl IntervalUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - IntervalUnit::YearMonth => "YearMonth", - IntervalUnit::DayTime => "DayTime", - IntervalUnit::MonthDayNano => "MonthDayNano", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum UnionMode { - Sparse = 0, - Dense = 1, -} -impl UnionMode { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - UnionMode::Sparse => "sparse", - UnionMode::Dense => "dense", - } - } -} diff --git a/datafusion/proto/src/logical_plan.rs b/datafusion/proto/src/logical_plan.rs index 3cb326d7cbfb..0d2a0d183b91 100644 --- a/datafusion/proto/src/logical_plan.rs +++ b/datafusion/proto/src/logical_plan.rs @@ -802,7 +802,7 @@ impl AsLogicalPlan for LogicalPlanNode { let data_types: Vec = prepare .data_types .iter() - .map(|t| DataType::try_from(t)) + .map(DataType::try_from) .collect::>()?; LogicalPlanBuilder::from(input) .prepare(prepare.name.clone(), data_types)? From e5ec87720993f19050cb5bbe7a94054c73e8e6ba Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Mon, 5 Dec 2022 12:25:17 -0500 Subject: [PATCH 04/17] fix: put Placeholder last to have the expression comparison to work as expected --- datafusion/expr/src/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index cbe31ad80b88..fa807d58f77a 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -89,8 +89,6 @@ pub enum Expr { Alias(Box, String), /// A named reference to a qualified filed in a schema. Column(Column), - /// A place holder for parameters in a prepared statement. - Placeholder(String), /// A named reference to a variable in a registry. ScalarVariable(DataType, Vec), /// A constant value. @@ -246,6 +244,8 @@ pub enum Expr { /// List of grouping set expressions. Only valid in the context of an aggregate /// GROUP BY expression list GroupingSet(GroupingSet), + /// A place holder for parameters in a prepared statement. + Placeholder(String), } /// Binary expression From 678a30b12e16e06231794dcea4b3e6a858825947 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Mon, 5 Dec 2022 15:33:43 -0500 Subject: [PATCH 05/17] test: add more tests and starting to pass param_data_types to expression to get data types of the params , , ... --- datafusion/expr/src/expr_schema.rs | 42 ++++++---- datafusion/expr/src/logical_plan/builder.rs | 2 + datafusion/expr/src/logical_plan/plan.rs | 3 +- datafusion/sql/src/planner.rs | 85 +++++++++++++++++++-- 4 files changed, 113 insertions(+), 19 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 838730bbe78d..509bbf1d48cc 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -27,7 +27,7 @@ use datafusion_common::{DFField, DFSchema, DataFusionError, ExprSchema, Result}; /// trait to allow expr to typable with respect to a schema pub trait ExprSchemable { /// given a schema, return the type of the expr - fn get_type(&self, schema: &S) -> Result; + fn get_type(&self, schema: &S, param_data_types: &Vec) -> Result; /// given a schema, return the nullability of the expr fn nullable(&self, input_schema: &S) -> Result; @@ -52,50 +52,50 @@ impl ExprSchemable for Expr { /// expression refers to a column that does not exist in the /// schema, or when the expression is incorrectly typed /// (e.g. `[utf8] + [bool]`). - fn get_type(&self, schema: &S) -> Result { + fn get_type(&self, schema: &S, param_data_types: &Vec) -> Result { match self { Expr::Alias(expr, _) | Expr::Sort { expr, .. } | Expr::Negative(expr) => { - expr.get_type(schema) + expr.get_type(schema, param_data_types) } Expr::Column(c) => Ok(schema.data_type(c)?.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.get_datatype()), - Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), + Expr::Case(case) => case.when_then_expr[0].1.get_type(schema, param_data_types), Expr::Cast(Cast { data_type, .. }) | Expr::TryCast { data_type, .. } => { Ok(data_type.clone()) } Expr::ScalarUDF { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.get_type(schema, param_data_types)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } Expr::ScalarFunction { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.get_type(schema, param_data_types)) .collect::>>()?; function::return_type(fun, &data_types) } Expr::WindowFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.get_type(schema, param_data_types)) .collect::>>()?; window_function::return_type(fun, &data_types) } Expr::AggregateFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.get_type(schema, param_data_types)) .collect::>>()?; aggregate_function::return_type(fun, &data_types) } Expr::AggregateUDF { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.get_type(schema, param_data_types)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } @@ -120,14 +120,30 @@ impl ExprSchemable for Expr { ref right, ref op, }) => binary_operator_data_type( - &left.get_type(schema)?, + &left.get_type(schema, param_data_types)?, op, - &right.get_type(schema)?, + &right.get_type(schema, param_data_types)?, ), Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) } - Expr::Placeholder(_) => Ok(DataType::Boolean), + // Return the type of the corresponding param defined in param_data_types of `PREPARE my_plan(param_data_types)` + Expr::Placeholder(param) => { + // param is $1, $2, $3, ... + // Let convert it to index: 0, 1, 2, ... + let index = param[1..].parse::(); + let idx = match index { + Ok(index) => index - 1, + Err(_) => return Err(DataFusionError::Internal(format!( + "Invalid placeholder: {}", + param + ))) + }; + println!("index: {}", idx); + + // Return data type of the index in the param_data_types + Ok(param_data_types[idx]) + }, Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -140,7 +156,7 @@ impl ExprSchemable for Expr { Ok(DataType::Null) } Expr::GetIndexedField(GetIndexedField { key, expr }) => { - let data_type = expr.get_type(schema)?; + let data_type = expr.get_type(schema, param_data_types)?; get_indexed_field(&data_type, key).map(|x| x.data_type().clone()) } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 406dcfa70dd8..09422d5c87df 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -155,7 +155,9 @@ impl LogicalPlanBuilder { nulls.push((i, j)); Ok(field_types[j].clone()) } else { + println!("====== expr: {:?}", expr); let data_type = expr.get_type(&empty_schema)?; + println!("========= data_type: {:?}", data_type); if let Some(prev_data_type) = &field_types[j] { if prev_data_type != &data_type { let err = format!("Inconsistent data type across values list at row {} column {}", i, j); diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index a9b556c1170d..8de20b4300ec 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1213,12 +1213,13 @@ impl Filter { pub fn try_new( predicate: Expr, input: Arc, + // param_data_types: &Vec, to be added ) -> datafusion_common::Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan // construction (such as with correlated subqueries) so we make a best effort here and // ignore errors resolving the expression against the schema. - if let Ok(predicate_type) = predicate.get_type(input.schema()) { + if let Ok(predicate_type) = predicate.get_type(input.schema(), &vec![]) { //param_data_types) { if predicate_type != DataType::Boolean { return Err(DataFusionError::Plan(format!( "Cannot create filter with non-boolean predicate '{}' returning {}", diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f13711a90217..07f630bc9b92 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -419,6 +419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { pub fn query_to_plan_with_alias( &self, query: Query, + param_data_types: &Vec, alias: Option, ctes: &mut HashMap, outer_query_schema: Option<&DFSchema>, @@ -992,6 +993,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(LogicalPlan::Filter(Filter::try_new( filter_expr, Arc::new(plan), + &[], // todo: get this list from the ctes after refactoing it )?)) } None => Ok(plan), @@ -5209,6 +5211,21 @@ mod tests { assert_eq!(format!("{:?}", plan), expected); } + fn prepare_stmt_quick_test( + sql: &str, + expected_plan: &str, + expected_data_types: &str, + ) { + let plan = logical_plan(sql).unwrap(); + // verify plan + assert_eq!(format!("{:?}", plan), expected_plan); + // verify data types + if let LogicalPlan::Prepare(Prepare { data_types, .. }) = plan { + let dt = format!("{:?}", data_types); + assert_eq!(dt, expected_data_types); + } + } + struct MockContextProvider {} impl ContextProvider for MockContextProvider { @@ -6011,18 +6028,76 @@ mod tests { quick_test(sql, expected); } - // TODO: will add more tests to cover many other cases #[test] - fn test_prepare_statement_to_plan() { + fn test_prepare_statement_to_plan_no_param() { + // no embedded parameter but still declare it + let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = 10"; + + let expected_plan = "Prepare: \"my_plan\" [Int32] \ + \n Projection: person.id, person.age\ + \n Filter: person.age = Int64(10)\ + \n TableScan: person"; + + let expected_dt = "[Int32]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + + ///////////////////////// + // no embedded parameter and no declare it + let sql = "PREPARE my_plan AS SELECT id, age FROM person WHERE age = 10"; + + let expected_plan = "Prepare: \"my_plan\" [] \ + \n Projection: person.id, person.age\ + \n Filter: person.age = Int64(10)\ + \n TableScan: person"; + + let expected_dt = "[]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + fn test_prepare_statement_to_plan_one_param() { let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $1"; - //let statements = DFParser::parse_sql(sql).unwrap(); - let expected = "Prepare: \"my_plan\" [Int32] \ + let expected_plan = "Prepare: \"my_plan\" [Int32] \ \n Projection: person.id, person.age\ \n Filter: person.age = $1\ \n TableScan: person"; - quick_test(sql, expected); + let expected_dt = "[Int32]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + fn test_prepare_statement_to_plan_multi_params() { + let sql = "PREPARE my_plan(INT, DOUBLE, STRING, INT) AS SELECT id, age FROM person WHERE age IN ($1, $4) AND salary > $2 OR first_name = $3"; + + let expected_plan = "Prepare: \"my_plan\" [Int32, Float64, Utf8, Int32] \ + \n Projection: person.id, person.age\ + \n Filter: person.age IN ([$1, $4]) AND person.salary > $2 OR person.first_name = $3\ + \n TableScan: person"; + + let expected_dt = "[Int32, Float64, Utf8, Int32]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + fn test_prepare_statement_to_plan_value_list() { + let sql = "PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);"; + + let expected_plan = "Prepare: \"my_plan\" [Utf8, Utf8] \ + \n Projection: t.num, t.letter\ + \n SubqueryAlias: t\ + \n Projection: t.column1 AS num, t.column2 AS letter\ + \n SubqueryAlias: t\ + \n Values: (Int64(1), $1), (Int64(2), $2)"; + + let expected_dt = "[Utf8, Utf8]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); } fn assert_field_not_found(err: DataFusionError, name: &str) { From e04da97102c9fe8faf1c81b92c225e313f1d194f Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 6 Dec 2022 09:52:12 -0500 Subject: [PATCH 06/17] test: one more test and a bit of refactor while waiting for the CTEs/PlannerContext PR --- datafusion/expr/src/expr_schema.rs | 66 +++++++++++++++++------- datafusion/expr/src/logical_plan/plan.rs | 14 ++++- datafusion/sql/src/planner.rs | 29 ++++++++--- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 509bbf1d48cc..c8a2f7001eb2 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -27,7 +27,14 @@ use datafusion_common::{DFField, DFSchema, DataFusionError, ExprSchema, Result}; /// trait to allow expr to typable with respect to a schema pub trait ExprSchemable { /// given a schema, return the type of the expr - fn get_type(&self, schema: &S, param_data_types: &Vec) -> Result; + fn get_type(&self, schema: &S) -> Result; + + /// given a schema and param data types, return the type of the expr + fn get_type_with_params( + &self, + schema: &S, + param_data_types: &Vec, + ) -> Result; /// given a schema, return the nullability of the expr fn nullable(&self, input_schema: &S) -> Result; @@ -52,50 +59,60 @@ impl ExprSchemable for Expr { /// expression refers to a column that does not exist in the /// schema, or when the expression is incorrectly typed /// (e.g. `[utf8] + [bool]`). - fn get_type(&self, schema: &S, param_data_types: &Vec) -> Result { + fn get_type(&self, schema: &S) -> Result { + self.get_type_with_params(schema, &vec![]) + } + + fn get_type_with_params( + &self, + schema: &S, + param_data_types: &Vec, + ) -> Result { match self { Expr::Alias(expr, _) | Expr::Sort { expr, .. } | Expr::Negative(expr) => { - expr.get_type(schema, param_data_types) + expr.get_type_with_params(schema, param_data_types) } Expr::Column(c) => Ok(schema.data_type(c)?.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.get_datatype()), - Expr::Case(case) => case.when_then_expr[0].1.get_type(schema, param_data_types), + Expr::Case(case) => case.when_then_expr[0] + .1 + .get_type_with_params(schema, param_data_types), Expr::Cast(Cast { data_type, .. }) | Expr::TryCast { data_type, .. } => { Ok(data_type.clone()) } Expr::ScalarUDF { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type(schema, param_data_types)) + .map(|e| e.get_type_with_params(schema, param_data_types)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } Expr::ScalarFunction { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type(schema, param_data_types)) + .map(|e| e.get_type_with_params(schema, param_data_types)) .collect::>>()?; function::return_type(fun, &data_types) } Expr::WindowFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema, param_data_types)) + .map(|e| e.get_type_with_params(schema, param_data_types)) .collect::>>()?; window_function::return_type(fun, &data_types) } Expr::AggregateFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema, param_data_types)) + .map(|e| e.get_type_with_params(schema, param_data_types)) .collect::>>()?; aggregate_function::return_type(fun, &data_types) } Expr::AggregateUDF { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type(schema, param_data_types)) + .map(|e| e.get_type_with_params(schema, param_data_types)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } @@ -120,9 +137,9 @@ impl ExprSchemable for Expr { ref right, ref op, }) => binary_operator_data_type( - &left.get_type(schema, param_data_types)?, + &left.get_type_with_params(schema, param_data_types)?, op, - &right.get_type(schema, param_data_types)?, + &right.get_type_with_params(schema, param_data_types)?, ), Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) @@ -133,17 +150,26 @@ impl ExprSchemable for Expr { // Let convert it to index: 0, 1, 2, ... let index = param[1..].parse::(); let idx = match index { - Ok(index) => index - 1, - Err(_) => return Err(DataFusionError::Internal(format!( - "Invalid placeholder: {}", - param - ))) + Ok(index) => index - 1, + Err(_) => { + return Err(DataFusionError::Internal(format!( + "Invalid placeholder: {}", + param + ))) + } }; - println!("index: {}", idx); + println!("==== index: {}", idx); + + if param_data_types.len() <= idx { + return Err(DataFusionError::Internal(format!( + "Placehoder {} does not exist in the parameter list: {:?}", + param, param_data_types + ))); + } // Return data type of the index in the param_data_types - Ok(param_data_types[idx]) - }, + Ok(param_data_types[idx].clone()) + } Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -156,7 +182,7 @@ impl ExprSchemable for Expr { Ok(DataType::Null) } Expr::GetIndexedField(GetIndexedField { key, expr }) => { - let data_type = expr.get_type(schema, param_data_types)?; + let data_type = expr.get_type_with_params(schema, param_data_types)?; get_indexed_field(&data_type, key).map(|x| x.data_type().clone()) } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 8de20b4300ec..ac5250c1d894 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1213,13 +1213,23 @@ impl Filter { pub fn try_new( predicate: Expr, input: Arc, - // param_data_types: &Vec, to be added + ) -> datafusion_common::Result { + Self::try_new_with_params(predicate, input, &vec![]) + } + + /// Create a new filter operator with provided list of parmeter data types + pub fn try_new_with_params( + predicate: Expr, + input: Arc, + param_data_types: &Vec, ) -> datafusion_common::Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan // construction (such as with correlated subqueries) so we make a best effort here and // ignore errors resolving the expression against the schema. - if let Ok(predicate_type) = predicate.get_type(input.schema(), &vec![]) { //param_data_types) { + if let Ok(predicate_type) = + predicate.get_type_with_params(input.schema(), param_data_types) + { if predicate_type != DataType::Boolean { return Err(DataFusionError::Plan(format!( "Cannot create filter with non-boolean predicate '{}' returning {}", diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 07f630bc9b92..e9fb7c9d4736 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -419,7 +419,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { pub fn query_to_plan_with_alias( &self, query: Query, - param_data_types: &Vec, alias: Option, ctes: &mut HashMap, outer_query_schema: Option<&DFSchema>, @@ -990,10 +989,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &[using_columns], )?; - Ok(LogicalPlan::Filter(Filter::try_new( + Ok(LogicalPlan::Filter(Filter::try_new_with_params( filter_expr, Arc::new(plan), - &[], // todo: get this list from the ctes after refactoing it + &vec![], // todo: this will come from the refactored ctes that include the param data types )?)) } None => Ok(plan), @@ -6072,14 +6071,30 @@ mod tests { #[test] fn test_prepare_statement_to_plan_multi_params() { - let sql = "PREPARE my_plan(INT, DOUBLE, STRING, INT) AS SELECT id, age FROM person WHERE age IN ($1, $4) AND salary > $2 OR first_name = $3"; + let sql = "PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE) AS SELECT id, age FROM person WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2"; - let expected_plan = "Prepare: \"my_plan\" [Int32, Float64, Utf8, Int32] \ + let expected_plan = "Prepare: \"my_plan\" [Int32, Utf8, Float64, Int32, Float64] \ \n Projection: person.id, person.age\ - \n Filter: person.age IN ([$1, $4]) AND person.salary > $2 OR person.first_name = $3\ + \n Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2\ \n TableScan: person"; - let expected_dt = "[Int32, Float64, Utf8, Int32]"; + let expected_dt = "[Int32, Utf8, Float64, Int32, Float64]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + fn test_prepare_statement_to_plan_having() { + let sql = "PREPARE my_plan(INT, DOUBLE) AS SELECT id, sum(age) FROM person WHERE salary > $2 GROUP BY id HAVING sum(age) < $1"; + + let expected_plan = "Prepare: \"my_plan\" [Int32, Float64] \ + \n Projection: person.id, SUM(person.age)\ + \n Filter: SUM(person.age) < $1\ + \n Aggregate: groupBy=[[person.id]], aggr=[[SUM(person.age)]]\ + \n Filter: person.salary > $2\ + \n TableScan: person"; + + let expected_dt = "[Int32, Float64]"; prepare_stmt_quick_test(sql, expected_plan, expected_dt); } From ba8ad0f7d92863dc76dfee4f827e01007393a328 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 6 Dec 2022 15:39:08 -0500 Subject: [PATCH 07/17] feat: use prepare stmt's param data types in the planner context --- datafusion/expr/src/expr_schema.rs | 16 +++- datafusion/expr/src/logical_plan/builder.rs | 31 +++++-- datafusion/expr/src/logical_plan/plan.rs | 4 +- .../simplify_expressions/simplify_exprs.rs | 2 +- datafusion/proto/src/logical_plan.rs | 4 +- datafusion/sql/src/planner.rs | 91 ++++++++++++++----- 6 files changed, 109 insertions(+), 39 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index c8a2f7001eb2..26073c60cfd0 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -23,6 +23,7 @@ use crate::{aggregate_function, function, window_function}; use arrow::compute::can_cast_types; use arrow::datatypes::DataType; use datafusion_common::{DFField, DFSchema, DataFusionError, ExprSchema, Result}; +use log::debug; /// trait to allow expr to typable with respect to a schema pub trait ExprSchemable { @@ -33,7 +34,7 @@ pub trait ExprSchemable { fn get_type_with_params( &self, schema: &S, - param_data_types: &Vec, + param_data_types: &[DataType], ) -> Result; /// given a schema, return the nullability of the expr @@ -60,13 +61,13 @@ impl ExprSchemable for Expr { /// schema, or when the expression is incorrectly typed /// (e.g. `[utf8] + [bool]`). fn get_type(&self, schema: &S) -> Result { - self.get_type_with_params(schema, &vec![]) + self.get_type_with_params(schema, &[]) } fn get_type_with_params( &self, schema: &S, - param_data_types: &Vec, + param_data_types: &[DataType], ) -> Result { match self { Expr::Alias(expr, _) | Expr::Sort { expr, .. } | Expr::Negative(expr) => { @@ -158,7 +159,6 @@ impl ExprSchemable for Expr { ))) } }; - println!("==== index: {}", idx); if param_data_types.len() <= idx { return Err(DataFusionError::Internal(format!( @@ -167,8 +167,14 @@ impl ExprSchemable for Expr { ))); } + let param_type = param_data_types[idx].clone(); + debug!( + "type of param {} param_data_types[idx]: {:?}", + param, param_type + ); + // Return data type of the index in the param_data_types - Ok(param_data_types[idx].clone()) + Ok(param_type) } Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 10ff286e0675..1a8feac89886 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -120,7 +120,12 @@ impl LogicalPlanBuilder { /// By default, it assigns the names column1, column2, etc. to the columns of a VALUES table. /// The column names are not specified by the SQL standard and different database systems do it differently, /// so it's usually better to override the default names with a table alias list. - pub fn values(mut values: Vec>) -> Result { + /// + /// If the values include params/binders such as $1, $2, $3, etc, then the `param_data_types` should be provided. + pub fn values( + mut values: Vec>, + param_data_types: &[DataType], + ) -> Result { // todo: hanlde for Placeholder expr if values.is_empty() { return Err(DataFusionError::Plan("Values list cannot be empty".into())); @@ -155,9 +160,7 @@ impl LogicalPlanBuilder { nulls.push((i, j)); Ok(field_types[j].clone()) } else { - println!("====== expr: {:?}", expr); - let data_type = expr.get_type(&empty_schema)?; - println!("========= data_type: {:?}", data_type); + let data_type = expr.get_type_with_params(&empty_schema, param_data_types)?; if let Some(prev_data_type) = &field_types[j] { if prev_data_type != &data_type { let err = format!("Inconsistent data type across values list at row {} column {}", i, j); @@ -277,11 +280,23 @@ impl LogicalPlanBuilder { /// Apply a filter pub fn filter(&self, expr: impl Into) -> Result { + self.filter_with_params(expr, &[]) + } + + /// Apply a filter wit provided data types for params of prepared statement + pub fn filter_with_params( + &self, + expr: impl Into, + param_data_types: &[DataType], + ) -> Result { let expr = normalize_col(expr.into(), &self.plan)?; - Ok(Self::from(LogicalPlan::Filter(Filter::try_new( - expr, - Arc::new(self.plan.clone()), - )?))) + Ok(Self::from(LogicalPlan::Filter( + Filter::try_new_with_params( + expr, + Arc::new(self.plan.clone()), + param_data_types, + )?, + ))) } pub fn prepare(&self, name: String, data_types: Vec) -> Result { diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ac5250c1d894..f43ad7c9b945 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1214,14 +1214,14 @@ impl Filter { predicate: Expr, input: Arc, ) -> datafusion_common::Result { - Self::try_new_with_params(predicate, input, &vec![]) + Self::try_new_with_params(predicate, input, &[]) } /// Create a new filter operator with provided list of parmeter data types pub fn try_new_with_params( predicate: Expr, input: Arc, - param_data_types: &Vec, + param_data_types: &[DataType], ) -> datafusion_common::Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index d2c57f3a79c4..ab4b64ebaafb 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -369,7 +369,7 @@ mod tests { Box::new(lit(1)), )); let values = vec![vec![expr1, expr2]]; - let plan = LogicalPlanBuilder::values(values)?.build()?; + let plan = LogicalPlanBuilder::values(values, &[])?.build()?; let expected = "\ Values: (Int32(3) AS Int32(1) + Int32(2), Int32(1) AS Int32(2) - Int32(1))"; diff --git a/datafusion/proto/src/logical_plan.rs b/datafusion/proto/src/logical_plan.rs index 2bae79e5e1dd..1c8de31cf4b1 100644 --- a/datafusion/proto/src/logical_plan.rs +++ b/datafusion/proto/src/logical_plan.rs @@ -39,8 +39,8 @@ use datafusion::{ prelude::SessionContext, }; use datafusion_common::{context, Column, DataFusionError}; -use datafusion_expr::logical_plan::Prepare; use datafusion_expr::logical_plan::builder::{project, subquery_alias_owned}; +use datafusion_expr::logical_plan::Prepare; use datafusion_expr::{ logical_plan::{ Aggregate, CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateView, @@ -320,7 +320,7 @@ impl AsLogicalPlan for LogicalPlanNode { .collect::, _>>() .map_err(|e| e.into()) }?; - LogicalPlanBuilder::values(values)?.build() + LogicalPlanBuilder::values(values, &[])?.build() } LogicalPlanType::Projection(projection) => { let input: LogicalPlan = diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 644a50c779ac..bd379107a12e 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -45,7 +45,9 @@ use datafusion_common::{ use datafusion_expr::expr::{Between, BinaryExpr, Case, Cast, GroupingSet, Like}; use datafusion_expr::expr_rewriter::normalize_col; use datafusion_expr::expr_rewriter::normalize_col_with_schemas; -use datafusion_expr::logical_plan::builder::{ project, subquery_alias, subquery_alias_owned}; +use datafusion_expr::logical_plan::builder::{ + project, subquery_alias, subquery_alias_owned, +}; use datafusion_expr::logical_plan::JoinConstraint as HashJoinConstraint; use datafusion_expr::logical_plan::{ Analyze, CreateCatalog, CreateCatalogSchema, @@ -125,6 +127,16 @@ impl PlannerContext { ctes: HashMap::new(), } } + + /// Create a new PlannerContext + pub fn new_with_prepare_param_data_types( + prepare_param_data_types: Vec, + ) -> Self { + Self { + prepare_param_data_types, + ctes: HashMap::new(), + } + } } /// SQL query planner @@ -195,6 +207,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Generate a logical plan from an SQL statement pub fn sql_statement_to_plan(&self, statement: Statement) -> Result { + self.sql_statement_to_plan_with_context(statement, &mut PlannerContext::new()) + } + + /// Generate a logical plan from an SQL statement + pub fn sql_statement_to_plan_with_context( + &self, + statement: Statement, + planner_context: &mut PlannerContext, + ) -> Result { let sql = Some(statement.to_string()); match statement { Statement::Explain { @@ -205,9 +226,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { describe_alias: _, .. } => self.explain_statement_to_plan(verbose, analyze, *statement), - Statement::Query(query) => { - self.query_to_plan(*query, &mut PlannerContext::new()) - } + Statement::Query(query) => self.query_to_plan(*query, planner_context), Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable), Statement::SetVariable { local, @@ -230,7 +249,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { && table_properties.is_empty() && with_options.is_empty() => { - let plan = self.query_to_plan(*query, &mut PlannerContext::new())?; + let plan = self.query_to_plan(*query, planner_context)?; let input_schema = plan.schema(); let plan = if !columns.is_empty() { @@ -347,13 +366,24 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { data_types, statement, } => { - let plan = self.sql_statement_to_plan(*statement)?; + // Convert parser data types to DataFusion data types + let data_types: Vec = data_types + .into_iter() + .map(|t| self.convert_data_type(&t)) + .collect::>()?; + + // Create planner context with parameters + let mut planner_context = + PlannerContext::new_with_prepare_param_data_types(data_types.clone()); + + // Build logical plan for inner statement of the prepare statement + let plan = self.sql_statement_to_plan_with_context( + *statement, + &mut planner_context, + )?; Ok(LogicalPlan::Prepare(Prepare { name: name.to_string(), - data_types: data_types - .into_iter() - .map(|t| self.convert_data_type(&t)) - .collect::>()?, + data_types, input: Arc::new(plan), })) } @@ -490,7 +520,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SetExpr::Select(s) => { self.select_to_plan(*s, planner_context, alias, outer_query_schema) } - SetExpr::Values(v) => self.sql_values_to_plan(v), + SetExpr::Values(v) => { + self.sql_values_to_plan(v, &planner_context.prepare_param_data_types) + } SetExpr::SetOperation { op, left, @@ -1048,7 +1080,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(LogicalPlan::Filter(Filter::try_new_with_params( filter_expr, Arc::new(plan), - &vec![], // todo: this will come from the refactored ctes that include the param data types + &planner_context.prepare_param_data_types, )?)) } None => Ok(plan), @@ -1107,6 +1139,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // process `from` clause let plan = self.plan_from_tables(select.from, planner_context, outer_query_schema)?; + let empty_from = matches!(plan, LogicalPlan::EmptyRelation(_)); // build from schema for unqualifier column ambiguous check // we should get only one field for unqualifier column from schema. @@ -1234,7 +1267,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr { LogicalPlanBuilder::from(plan) - .filter(having_expr_post_aggr)? + .filter_with_params( + having_expr_post_aggr, + &planner_context.prepare_param_data_types, + )? .build()? } else { plan @@ -1816,7 +1852,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } - fn sql_values_to_plan(&self, values: SQLValues) -> Result { + fn sql_values_to_plan( + &self, + values: SQLValues, + param_data_types: &[DataType], + ) -> Result { // values should not be based on any other schema let schema = DFSchema::empty(); let values = values @@ -1872,7 +1912,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .collect::>>() }) .collect::>>()?; - LogicalPlanBuilder::values(values)?.build() + LogicalPlanBuilder::values(values, param_data_types)?.build() } fn sql_expr_to_logical_expr( @@ -6167,7 +6207,10 @@ mod tests { #[test] fn test_prepare_statement_to_plan_multi_params() { - let sql = "PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE) AS SELECT id, age FROM person WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2"; + let sql = "PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE) AS + SELECT id, age + FROM person + WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2"; let expected_plan = "Prepare: \"my_plan\" [Int32, Utf8, Float64, Int32, Float64] \ \n Projection: person.id, person.age\ @@ -6181,16 +6224,22 @@ mod tests { #[test] fn test_prepare_statement_to_plan_having() { - let sql = "PREPARE my_plan(INT, DOUBLE) AS SELECT id, sum(age) FROM person WHERE salary > $2 GROUP BY id HAVING sum(age) < $1"; - - let expected_plan = "Prepare: \"my_plan\" [Int32, Float64] \ + let sql = "PREPARE my_plan(INT, DOUBLE, DOUBLE, DOUBLE) AS + SELECT id, SUM(age) + FROM person \ + WHERE salary > $2 + GROUP BY id + HAVING sum(age) < $1 AND SUM(age) > 10 OR SUM(age) in ($3, $4)\ + "; + + let expected_plan = "Prepare: \"my_plan\" [Int32, Float64, Float64, Float64] \ \n Projection: person.id, SUM(person.age)\ - \n Filter: SUM(person.age) < $1\ + \n Filter: SUM(person.age) < $1 AND SUM(person.age) > Int64(10) OR SUM(person.age) IN ([$3, $4])\ \n Aggregate: groupBy=[[person.id]], aggr=[[SUM(person.age)]]\ \n Filter: person.salary > $2\ \n TableScan: person"; - let expected_dt = "[Int32, Float64]"; + let expected_dt = "[Int32, Float64, Float64, Float64]"; prepare_stmt_quick_test(sql, expected_plan, expected_dt); } From 67cbfdda908a1a850c8ba19bb593d1014d83f71e Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 6 Dec 2022 15:47:04 -0500 Subject: [PATCH 08/17] chore: cleanup --- datafusion/expr/src/logical_plan/builder.rs | 1 - datafusion/sql/src/parser.rs | 30 --------------------- datafusion/sql/src/planner.rs | 4 +-- 3 files changed, 2 insertions(+), 33 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 1a8feac89886..705b48d1745d 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -126,7 +126,6 @@ impl LogicalPlanBuilder { mut values: Vec>, param_data_types: &[DataType], ) -> Result { - // todo: hanlde for Placeholder expr if values.is_empty() { return Err(DataFusionError::Plan("Values list cannot be empty".into())); } diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index b5cb4cd15f8b..4744417f6225 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -728,34 +728,4 @@ mod tests { Ok(()) } - - // TODO: remove these 2 tests because they were tested in sqlparser - // This is just for me to see how the statements look like - #[ignore] - #[test] - fn create_prepared_statement() -> Result<(), ParserError> { - // positive case - let sql = "PREPARE my_plan(TIME, INT) AS SELECT region FROM cpu WHERE time = $1 and usage_user > $2"; - let statements = DFParser::parse_sql(sql)?; - - println!("{:#?}", statements[0]); - assert_eq!(statements.len(), 1); - - let sql = "SELECT region FROM cpu WHERE time = 10 and usage_user > 20"; - let statements = DFParser::parse_sql(sql)?; - println!("{:#?}", statements[0]); - assert_eq!(statements.len(), 1); - - Ok(()) - } - - #[test] - fn execute_statement() -> Result<(), ParserError> { - // positive case - let sql = "EXECUTE my_plan(1, '2022-11-30')"; - let statements = DFParser::parse_sql(sql)?; - println!("{:#?}", statements[0]); - assert_eq!(statements.len(), 1); - Ok(()) - } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index bd379107a12e..7beb3ed55e89 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -120,7 +120,7 @@ impl Default for PlannerContext { } impl PlannerContext { - /// Create a new PlannerContext + /// Create an empty PlannerContext pub fn new() -> Self { Self { prepare_param_data_types: vec![], @@ -128,7 +128,7 @@ impl PlannerContext { } } - /// Create a new PlannerContext + /// Create a new PlannerContext with provided prepare_param_data_types pub fn new_with_prepare_param_data_types( prepare_param_data_types: Vec, ) -> Self { From 001251cdd5fee3f9a43d15c1f24c753a93473c6d Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 11:34:41 -0500 Subject: [PATCH 09/17] refactor: address review comments --- .../core/src/datasource/listing/helpers.rs | 2 +- datafusion/core/src/physical_plan/planner.rs | 2 +- datafusion/expr/src/expr.rs | 16 +++-- datafusion/expr/src/expr_rewriter.rs | 2 +- datafusion/expr/src/expr_schema.rs | 71 ++++--------------- datafusion/expr/src/expr_visitor.rs | 2 +- datafusion/expr/src/logical_plan/builder.rs | 27 ++----- datafusion/expr/src/logical_plan/plan.rs | 13 +--- datafusion/expr/src/utils.rs | 2 +- .../simplify_expressions/expr_simplifier.rs | 2 +- .../simplify_expressions/simplify_exprs.rs | 2 +- datafusion/proto/proto/datafusion.proto | 3 +- datafusion/proto/src/from_proto.rs | 13 +++- datafusion/proto/src/generated/pbjson.rs | 42 +++++++---- datafusion/proto/src/generated/prost.rs | 4 +- datafusion/proto/src/logical_plan.rs | 2 +- datafusion/proto/src/to_proto.rs | 4 +- datafusion/sql/Cargo.toml | 1 + datafusion/sql/src/planner.rs | 51 ++++++++++--- datafusion/sql/src/utils.rs | 5 +- 20 files changed, 131 insertions(+), 135 deletions(-) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index c56e4a1ba992..2c014068dff8 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -122,7 +122,7 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::WindowFunction { .. } | Expr::Wildcard | Expr::QualifiedWildcard { .. } - | Expr::Placeholder(_) => { + | Expr::Placeholder { .. } => { *self.is_applicable = false; Recursion::Stop(self) } diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 5dd3919a86cb..bbfa1b6e120f 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -344,7 +344,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Expr::QualifiedWildcard { .. } => Err(DataFusionError::Internal( "Create physical name does not support qualified wildcard".to_string(), )), - Expr::Placeholder(_) => Err(DataFusionError::Internal( + Expr::Placeholder { .. } => Err(DataFusionError::Internal( "Create physical name does not support placeholder".to_string(), )), } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index d6cbf243de35..fbc98cf01a20 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -244,8 +244,14 @@ pub enum Expr { /// List of grouping set expressions. Only valid in the context of an aggregate /// GROUP BY expression list GroupingSet(GroupingSet), - /// A place holder for parameters in a prepared statement. - Placeholder(String), + /// A place holder for parameters in a prepared statement + /// (e.g. `$foo` or `$1`) + Placeholder { + /// The identifier of the parameter (e.g, $1 or $foo) + id: String, + /// The type the parameter will be filled in with + data_type: DataType, + }, } /// Binary expression @@ -530,7 +536,7 @@ impl Expr { Expr::Literal(..) => "Literal", Expr::Negative(..) => "Negative", Expr::Not(..) => "Not", - Expr::Placeholder(..) => "Placeholder", + Expr::Placeholder { .. } => "Placeholder", Expr::QualifiedWildcard { .. } => "QualifiedWildcard", Expr::ScalarFunction { .. } => "ScalarFunction", Expr::ScalarSubquery { .. } => "ScalarSubquery", @@ -983,7 +989,7 @@ impl fmt::Debug for Expr { ) } }, - Expr::Placeholder(param) => write!(f, "{}", param), + Expr::Placeholder { id, .. } => write!(f, "{}", id), } } } @@ -1267,7 +1273,7 @@ fn create_name(e: &Expr) -> Result { Expr::QualifiedWildcard { .. } => Err(DataFusionError::Internal( "Create name does not support qualified wildcard".to_string(), )), - Expr::Placeholder(param) => Ok((*param).to_string()), + Expr::Placeholder { id, .. } => Ok((*id).to_string()), } } diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index e06493945e18..b107d591769f 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -291,7 +291,7 @@ impl ExprRewritable for Expr { key, )) } - Expr::Placeholder(param) => Expr::Placeholder(param), + Expr::Placeholder { id, data_type } => Expr::Placeholder { id, data_type }, }; // now rewrite this expression itself diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 26073c60cfd0..e65eaf3ae274 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -23,20 +23,12 @@ use crate::{aggregate_function, function, window_function}; use arrow::compute::can_cast_types; use arrow::datatypes::DataType; use datafusion_common::{DFField, DFSchema, DataFusionError, ExprSchema, Result}; -use log::debug; /// trait to allow expr to typable with respect to a schema pub trait ExprSchemable { /// given a schema, return the type of the expr fn get_type(&self, schema: &S) -> Result; - /// given a schema and param data types, return the type of the expr - fn get_type_with_params( - &self, - schema: &S, - param_data_types: &[DataType], - ) -> Result; - /// given a schema, return the nullability of the expr fn nullable(&self, input_schema: &S) -> Result; @@ -61,59 +53,49 @@ impl ExprSchemable for Expr { /// schema, or when the expression is incorrectly typed /// (e.g. `[utf8] + [bool]`). fn get_type(&self, schema: &S) -> Result { - self.get_type_with_params(schema, &[]) - } - - fn get_type_with_params( - &self, - schema: &S, - param_data_types: &[DataType], - ) -> Result { match self { Expr::Alias(expr, _) | Expr::Sort { expr, .. } | Expr::Negative(expr) => { - expr.get_type_with_params(schema, param_data_types) + expr.get_type(schema) } Expr::Column(c) => Ok(schema.data_type(c)?.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.get_datatype()), - Expr::Case(case) => case.when_then_expr[0] - .1 - .get_type_with_params(schema, param_data_types), + Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), Expr::Cast(Cast { data_type, .. }) | Expr::TryCast { data_type, .. } => { Ok(data_type.clone()) } Expr::ScalarUDF { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type_with_params(schema, param_data_types)) + .map(|e| e.get_type(schema)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } Expr::ScalarFunction { fun, args } => { let data_types = args .iter() - .map(|e| e.get_type_with_params(schema, param_data_types)) + .map(|e| e.get_type(schema)) .collect::>>()?; function::return_type(fun, &data_types) } Expr::WindowFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type_with_params(schema, param_data_types)) + .map(|e| e.get_type(schema)) .collect::>>()?; window_function::return_type(fun, &data_types) } Expr::AggregateFunction { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type_with_params(schema, param_data_types)) + .map(|e| e.get_type(schema)) .collect::>>()?; aggregate_function::return_type(fun, &data_types) } Expr::AggregateUDF { fun, args, .. } => { let data_types = args .iter() - .map(|e| e.get_type_with_params(schema, param_data_types)) + .map(|e| e.get_type(schema)) .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } @@ -138,44 +120,15 @@ impl ExprSchemable for Expr { ref right, ref op, }) => binary_operator_data_type( - &left.get_type_with_params(schema, param_data_types)?, + &left.get_type(schema)?, op, - &right.get_type_with_params(schema, param_data_types)?, + &right.get_type(schema)?, ), Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) } // Return the type of the corresponding param defined in param_data_types of `PREPARE my_plan(param_data_types)` - Expr::Placeholder(param) => { - // param is $1, $2, $3, ... - // Let convert it to index: 0, 1, 2, ... - let index = param[1..].parse::(); - let idx = match index { - Ok(index) => index - 1, - Err(_) => { - return Err(DataFusionError::Internal(format!( - "Invalid placeholder: {}", - param - ))) - } - }; - - if param_data_types.len() <= idx { - return Err(DataFusionError::Internal(format!( - "Placehoder {} does not exist in the parameter list: {:?}", - param, param_data_types - ))); - } - - let param_type = param_data_types[idx].clone(); - debug!( - "type of param {} param_data_types[idx]: {:?}", - param, param_type - ); - - // Return data type of the index in the param_data_types - Ok(param_type) - } + Expr::Placeholder { data_type, .. } => Ok(data_type.clone()), Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -188,7 +141,7 @@ impl ExprSchemable for Expr { Ok(DataType::Null) } Expr::GetIndexedField(GetIndexedField { key, expr }) => { - let data_type = expr.get_type_with_params(schema, param_data_types)?; + let data_type = expr.get_type(schema)?; get_indexed_field(&data_type, key).map(|x| x.data_type().clone()) } @@ -248,7 +201,7 @@ impl ExprSchemable for Expr { | Expr::IsNotFalse(_) | Expr::IsNotUnknown(_) | Expr::Exists { .. } - | Expr::Placeholder(_) => Ok(true), + | Expr::Placeholder { .. } => Ok(true), Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index d44936b6aa79..b5c6c6802555 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -134,7 +134,7 @@ impl ExprVisitable for Expr { | Expr::ScalarSubquery(_) | Expr::Wildcard | Expr::QualifiedWildcard { .. } - | Expr::Placeholder(_) => Ok(visitor), + | Expr::Placeholder { .. } => Ok(visitor), Expr::BinaryExpr(BinaryExpr { left, right, .. }) => { let visitor = left.accept(visitor)?; right.accept(visitor) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 705b48d1745d..585fac16875c 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -122,10 +122,7 @@ impl LogicalPlanBuilder { /// so it's usually better to override the default names with a table alias list. /// /// If the values include params/binders such as $1, $2, $3, etc, then the `param_data_types` should be provided. - pub fn values( - mut values: Vec>, - param_data_types: &[DataType], - ) -> Result { + pub fn values(mut values: Vec>) -> Result { if values.is_empty() { return Err(DataFusionError::Plan("Values list cannot be empty".into())); } @@ -159,7 +156,7 @@ impl LogicalPlanBuilder { nulls.push((i, j)); Ok(field_types[j].clone()) } else { - let data_type = expr.get_type_with_params(&empty_schema, param_data_types)?; + let data_type = expr.get_type(&empty_schema)?; if let Some(prev_data_type) = &field_types[j] { if prev_data_type != &data_type { let err = format!("Inconsistent data type across values list at row {} column {}", i, j); @@ -279,23 +276,11 @@ impl LogicalPlanBuilder { /// Apply a filter pub fn filter(&self, expr: impl Into) -> Result { - self.filter_with_params(expr, &[]) - } - - /// Apply a filter wit provided data types for params of prepared statement - pub fn filter_with_params( - &self, - expr: impl Into, - param_data_types: &[DataType], - ) -> Result { let expr = normalize_col(expr.into(), &self.plan)?; - Ok(Self::from(LogicalPlan::Filter( - Filter::try_new_with_params( - expr, - Arc::new(self.plan.clone()), - param_data_types, - )?, - ))) + Ok(Self::from(LogicalPlan::Filter(Filter::try_new( + expr, + Arc::new(self.plan.clone()), + )?))) } pub fn prepare(&self, name: String, data_types: Vec) -> Result { diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index f43ad7c9b945..a9b556c1170d 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1213,23 +1213,12 @@ impl Filter { pub fn try_new( predicate: Expr, input: Arc, - ) -> datafusion_common::Result { - Self::try_new_with_params(predicate, input, &[]) - } - - /// Create a new filter operator with provided list of parmeter data types - pub fn try_new_with_params( - predicate: Expr, - input: Arc, - param_data_types: &[DataType], ) -> datafusion_common::Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan // construction (such as with correlated subqueries) so we make a best effort here and // ignore errors resolving the expression against the schema. - if let Ok(predicate_type) = - predicate.get_type_with_params(input.schema(), param_data_types) - { + if let Ok(predicate_type) = predicate.get_type(input.schema()) { if predicate_type != DataType::Boolean { return Err(DataFusionError::Plan(format!( "Cannot create filter with non-boolean predicate '{}' returning {}", diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index fac685e395a2..88631cc6f07b 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -127,7 +127,7 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { | Expr::Wildcard | Expr::QualifiedWildcard { .. } | Expr::GetIndexedField { .. } - | Expr::Placeholder(_) => {} + | Expr::Placeholder { .. } => {} } Ok(Recursion::Continue(self)) } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index dad91fd5c558..3a51099fe645 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -254,7 +254,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::GroupingSet(_) | Expr::Wildcard | Expr::QualifiedWildcard { .. } - | Expr::Placeholder(_) => false, + | Expr::Placeholder { .. } => false, Expr::ScalarFunction { fun, .. } => Self::volatility_ok(fun.volatility()), Expr::ScalarUDF { fun, .. } => Self::volatility_ok(fun.signature.volatility), Expr::Literal(_) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index ab4b64ebaafb..d2c57f3a79c4 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -369,7 +369,7 @@ mod tests { Box::new(lit(1)), )); let values = vec![vec![expr1, expr2]]; - let plan = LogicalPlanBuilder::values(values, &[])?.build()?; + let plan = LogicalPlanBuilder::values(values)?.build()?; let expected = "\ Values: (Int32(3) AS Int32(1) + Int32(2), Int32(1) AS Int32(2) - Int32(1))"; diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 60b018e26a08..b71dc87d5733 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -356,7 +356,8 @@ message LogicalExprNode { } message PlaceholderNode { - string param = 1; + string id = 1; + ArrowType data_type = 2; } message LogicalExprList { diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index a9bddf402dcf..9d51ac5de134 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -1192,9 +1192,16 @@ pub fn parse_expr( .collect::, Error>>()?, ))) } - ExprType::Placeholder(PlaceholderNode { param }) => { - Ok(Expr::Placeholder(param.clone())) - } + ExprType::Placeholder(PlaceholderNode { id, data_type }) => match data_type { + None => { + let message = format!("Protobuf deserialization error: data type must be provided for the placeholder {}", id); + Err(proto_error(message)) + } + Some(data_type) => Ok(Expr::Placeholder { + id: id.clone(), + data_type: data_type.try_into()?, + }), + }, } } diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 2cfef77d9ea5..877e69e66ce0 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -16007,12 +16007,18 @@ impl serde::Serialize for PlaceholderNode { { use serde::ser::SerializeStruct; let mut len = 0; - if !self.param.is_empty() { + if !self.id.is_empty() { + len += 1; + } + if self.data_type.is_some() { len += 1; } let mut struct_ser = serializer.serialize_struct("datafusion.PlaceholderNode", len)?; - if !self.param.is_empty() { - struct_ser.serialize_field("param", &self.param)?; + if !self.id.is_empty() { + struct_ser.serialize_field("id", &self.id)?; + } + if let Some(v) = self.data_type.as_ref() { + struct_ser.serialize_field("dataType", v)?; } struct_ser.end() } @@ -16024,12 +16030,15 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { D: serde::Deserializer<'de>, { const FIELDS: &[&str] = &[ - "param", + "id", + "data_type", + "dataType", ]; #[allow(clippy::enum_variant_names)] enum GeneratedField { - Param, + Id, + DataType, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -16051,7 +16060,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { E: serde::de::Error, { match value { - "param" => Ok(GeneratedField::Param), + "id" => Ok(GeneratedField::Id), + "dataType" | "data_type" => Ok(GeneratedField::DataType), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -16071,19 +16081,27 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { where V: serde::de::MapAccess<'de>, { - let mut param__ = None; + let mut id__ = None; + let mut data_type__ = None; while let Some(k) = map.next_key()? { match k { - GeneratedField::Param => { - if param__.is_some() { - return Err(serde::de::Error::duplicate_field("param")); + GeneratedField::Id => { + if id__.is_some() { + return Err(serde::de::Error::duplicate_field("id")); } - param__ = Some(map.next_value()?); + id__ = Some(map.next_value()?); + } + GeneratedField::DataType => { + if data_type__.is_some() { + return Err(serde::de::Error::duplicate_field("dataType")); + } + data_type__ = map.next_value()?; } } } Ok(PlaceholderNode { - param: param__.unwrap_or_default(), + id: id__.unwrap_or_default(), + data_type: data_type__, }) } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 44c372d12302..e2e9fc364ee7 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -506,7 +506,9 @@ pub mod logical_expr_node { #[derive(Clone, PartialEq, ::prost::Message)] pub struct PlaceholderNode { #[prost(string, tag = "1")] - pub param: ::prost::alloc::string::String, + pub id: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub data_type: ::core::option::Option, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct LogicalExprList { diff --git a/datafusion/proto/src/logical_plan.rs b/datafusion/proto/src/logical_plan.rs index 1c8de31cf4b1..f7334e8e799d 100644 --- a/datafusion/proto/src/logical_plan.rs +++ b/datafusion/proto/src/logical_plan.rs @@ -320,7 +320,7 @@ impl AsLogicalPlan for LogicalPlanNode { .collect::, _>>() .map_err(|e| e.into()) }?; - LogicalPlanBuilder::values(values, &[])?.build() + LogicalPlanBuilder::values(values)?.build() } LogicalPlanType::Projection(projection) => { let input: LogicalPlan = diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 8cc2f4d99868..0b70879fcfd1 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -888,8 +888,8 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { .collect::, Self::Error>>()?, })), }, - Expr::Placeholder(param) => Self { - expr_type: Some(ExprType::Placeholder(PlaceholderNode { param: param.clone() })), + Expr::Placeholder{ id, data_type } => Self { + expr_type: Some(ExprType::Placeholder(PlaceholderNode { id: id.clone(), data_type: Some(data_type.try_into()?) })), }, Expr::QualifiedWildcard { .. } | Expr::TryCast { .. } => diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index decec707546c..5139bd2b7a6e 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -40,4 +40,5 @@ unicode_expressions = [] arrow-schema = "28.0.0" datafusion-common = { path = "../common", version = "15.0.0" } datafusion-expr = { path = "../expr", version = "15.0.0" } +log = "^0.4" sqlparser = "0.27" diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 7beb3ed55e89..b490406a81bd 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -16,7 +16,7 @@ // under the License. //! SQL Query Planner (produces logical plan from SQL AST) - +use log::debug; use std::collections::{HashMap, HashSet}; use std::str::FromStr; use std::sync::Arc; @@ -1077,10 +1077,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &[using_columns], )?; - Ok(LogicalPlan::Filter(Filter::try_new_with_params( + Ok(LogicalPlan::Filter(Filter::try_new( filter_expr, Arc::new(plan), - &planner_context.prepare_param_data_types, )?)) } None => Ok(plan), @@ -1267,10 +1266,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr { LogicalPlanBuilder::from(plan) - .filter_with_params( - having_expr_post_aggr, - &planner_context.prepare_param_data_types, - )? + .filter(having_expr_post_aggr)? .build()? } else { plan @@ -1874,7 +1870,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } SQLExpr::Value(Value::Boolean(n)) => Ok(lit(n)), SQLExpr::Value(Value::Placeholder(param)) => { - Ok(Expr::Placeholder(param)) + Self::create_placeholder_expr(param, param_data_types) } SQLExpr::UnaryOp { op, expr } => self.parse_sql_unary_op( op, @@ -1912,7 +1908,42 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .collect::>>() }) .collect::>>()?; - LogicalPlanBuilder::values(values, param_data_types)?.build() + LogicalPlanBuilder::values(values)?.build() + } + + fn create_placeholder_expr( + param: String, + param_data_types: &[DataType], + ) -> Result { + // Parse the placeholder as a number becasue it is the only support from sqlparser and postgres + let index = param[1..].parse::(); + let idx = match index { + Ok(index) => index - 1, + Err(_) => { + return Err(DataFusionError::Internal(format!( + "Invalid placeholder: {}", + param + ))) + } + }; + // Check if the placeholder is in the parameter list + if param_data_types.len() <= idx { + return Err(DataFusionError::Internal(format!( + "Placehoder {} does not exist in the parameter list: {:?}", + param, param_data_types + ))); + } + // Data type of the parameter + let param_type = param_data_types[idx].clone(); + debug!( + "type of param {} param_data_types[idx]: {:?}", + param, param_type + ); + + Ok(Expr::Placeholder { + id: param, + data_type: param_type, + }) } fn sql_expr_to_logical_expr( @@ -1926,7 +1957,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Value(Value::SingleQuotedString(ref s) | Value::DoubleQuotedString(ref s)) => Ok(lit(s.clone())), SQLExpr::Value(Value::Boolean(n)) => Ok(lit(n)), SQLExpr::Value(Value::Null) => Ok(Expr::Literal(ScalarValue::Null)), - SQLExpr::Value(Value::Placeholder(param)) => Ok(Expr::Placeholder(param)), + SQLExpr::Value(Value::Placeholder(param)) => Self::create_placeholder_expr(param, &planner_context.prepare_param_data_types), SQLExpr::Extract { field, expr } => Ok(Expr::ScalarFunction { fun: BuiltinScalarFunction::DatePart, args: vec![ diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index b3c9b757e774..94737fec4e91 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -411,7 +411,10 @@ where ))) } }, - Expr::Placeholder(param) => Ok(Expr::Placeholder(param.clone())), + Expr::Placeholder { id, data_type } => Ok(Expr::Placeholder { + id: id.clone(), + data_type: data_type.clone(), + }), }, } } From 5d78097eacba1d165f018deaadbac5c06d280c2a Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 13:19:48 -0500 Subject: [PATCH 10/17] chore: cleanup --- datafusion/expr/src/expr_schema.rs | 1 - datafusion/expr/src/logical_plan/builder.rs | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index e65eaf3ae274..ae516001bc07 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -127,7 +127,6 @@ impl ExprSchemable for Expr { Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) } - // Return the type of the corresponding param defined in param_data_types of `PREPARE my_plan(param_data_types)` Expr::Placeholder { data_type, .. } => Ok(data_type.clone()), Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 585fac16875c..54c86c3a11af 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -26,9 +26,9 @@ use crate::{and, binary_expr, Operator}; use crate::{ logical_plan::{ Aggregate, Analyze, CrossJoin, Distinct, EmptyRelation, Explain, Filter, Join, - JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Projection, - Repartition, Sort, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Values, - Window, + JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Prepare, + Projection, Repartition, Sort, SubqueryAlias, TableScan, ToStringifiedPlan, + Union, Values, Window, }, utils::{ can_hash, expand_qualified_wildcard, expand_wildcard, @@ -45,8 +45,6 @@ use std::any::Any; use std::convert::TryFrom; use std::{collections::HashMap, sync::Arc}; -use super::Prepare; - /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; From 13443951e2262d6273ea8bee2285b219fac7c96a Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 14:44:39 -0500 Subject: [PATCH 11/17] test: more prepare statement tests --- datafusion-cli/Cargo.lock | 1 + .../sqllogictests/test_files/prepare.slt | 40 ++++++++ datafusion/sql/src/planner.rs | 92 ++++++++++++++++++- 3 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 datafusion/core/tests/sqllogictests/test_files/prepare.slt diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index dbf9cc88d784..839255cea5bc 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -800,6 +800,7 @@ dependencies = [ "arrow-schema", "datafusion-common", "datafusion-expr", + "log", "sqlparser", ] diff --git a/datafusion/core/tests/sqllogictests/test_files/prepare.slt b/datafusion/core/tests/sqllogictests/test_files/prepare.slt new file mode 100644 index 000000000000..51bd6b5719fd --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/prepare.slt @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## Prepare Statement Tests +########## + +statement ok +create table person (id int, first_name varchar, last_name varchar, age int, state varchar, salary double, birthday timestamp, "😀" int) as values (1, 'jane', 'smith', 20, 'MA', 100000.45, '2000-11-12T00:00:00'::timestamp, 99); + +query C rowsort +select * from person; +---- +1 jane smith 20 MA 100000.45 2000-11-12T00:00:00.000000000 99 + +# TODO: support error instead of panikng +# thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: SQL(ParserError("Expected AS, found: SELECT"))', datafusion/core/tests/sqllogictests/src/main.rs:197:42 +# statement error +# PREPARE AS SELECT id, age FROM person WHERE age = $foo + +# TODO: this statement shoudl work after we support EXECUTE statement and caching this logicalplan somewhere +# statement ok +# PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); + +# And then we may want to add test_prepare_statement* here + diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index b490406a81bd..7258c9047961 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -6194,6 +6194,59 @@ mod tests { quick_test(sql, expected); } + #[test] + #[should_panic(expected = "Invalid placeholder: $foo")] + fn test_prepare_statement_to_plan_panic_param_format() { + // param is not number following the $ sign + // panic due to error returned from the parser + let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $foo"; + + let expected_plan = "whatever"; + let expected_dt = "whatever"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + #[should_panic(expected = "value: SQL(ParserError(\"Expected AS, found: SELECT\"))")] + fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() { + // param is not number following the $ sign + // panic due to error returned from the parser + let sql = "PREPARE AS SELECT id, age FROM person WHERE age = $foo"; + + let expected_plan = "whatever"; + let expected_dt = "whatever"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + #[should_panic( + expected = "value: SchemaError(FieldNotFound { field: Column { relation: None, name: \"id\" }, valid_fields: Some([]) })" + )] + fn test_prepare_statement_to_plan_panic_no_relation_and_constant_param() { + let sql = "PREPARE my_plan(INT) AS SELECT id + $1"; + + let expected_plan = "whatever"; + let expected_dt = "whatever"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + + #[test] + #[should_panic( + expected = "value: Internal(\"Placehoder $2 does not exist in the parameter list: [Int32]\")" + )] + fn test_prepare_statement_to_plan_panic_no_data_types() { + // only provide 1 data type while using 2 params + let sql = "PREPARE my_plan(INT) AS SELECT 1 + $1 + $2"; + + let expected_plan = "whatever"; + let expected_dt = "whatever"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + #[test] fn test_prepare_statement_to_plan_no_param() { // no embedded parameter but still declare it @@ -6222,6 +6275,35 @@ mod tests { prepare_stmt_quick_test(sql, expected_plan, expected_dt); } + #[test] + fn test_prepare_statement_to_plan_params_as_constants() { + let sql = "PREPARE my_plan(INT) AS SELECT $1"; + + let expected_plan = "Prepare: \"my_plan\" [Int32] \ + \n Projection: $1\n EmptyRelation"; + let expected_dt = "[Int32]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + + ///////////////////////// + let sql = "PREPARE my_plan(INT) AS SELECT 1 + $1"; + + let expected_plan = "Prepare: \"my_plan\" [Int32] \ + \n Projection: Int64(1) + $1\n EmptyRelation"; + let expected_dt = "[Int32]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + + ///////////////////////// + let sql = "PREPARE my_plan(INT, DOUBLE) AS SELECT 1 + $1 + $2"; + + let expected_plan = "Prepare: \"my_plan\" [Int32, Float64] \ + \n Projection: Int64(1) + $1 + $2\n EmptyRelation"; + let expected_dt = "[Int32, Float64]"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + #[test] fn test_prepare_statement_to_plan_one_param() { let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $1"; @@ -6238,17 +6320,17 @@ mod tests { #[test] fn test_prepare_statement_to_plan_multi_params() { - let sql = "PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE) AS - SELECT id, age + let sql = "PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE, STRING) AS + SELECT id, age, $6 FROM person WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2"; - let expected_plan = "Prepare: \"my_plan\" [Int32, Utf8, Float64, Int32, Float64] \ - \n Projection: person.id, person.age\ + let expected_plan = "Prepare: \"my_plan\" [Int32, Utf8, Float64, Int32, Float64, Utf8] \ + \n Projection: person.id, person.age,\ \n Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2\ \n TableScan: person"; - let expected_dt = "[Int32, Utf8, Float64, Int32, Float64]"; + let expected_dt = "[Int32, Utf8, Float64, Int32, Float64, Utf8]"; prepare_stmt_quick_test(sql, expected_plan, expected_dt); } From 87787da7258614a2d4cd29c22206b3126c3bb79c Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 14:47:58 -0500 Subject: [PATCH 12/17] chore: cleanup --- datafusion/core/tests/sqllogictests/test_files/prepare.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/prepare.slt b/datafusion/core/tests/sqllogictests/test_files/prepare.slt index 51bd6b5719fd..cc525cbcdfec 100644 --- a/datafusion/core/tests/sqllogictests/test_files/prepare.slt +++ b/datafusion/core/tests/sqllogictests/test_files/prepare.slt @@ -27,12 +27,12 @@ select * from person; ---- 1 jane smith 20 MA 100000.45 2000-11-12T00:00:00.000000000 99 -# TODO: support error instead of panikng +# TODO: support error instead of panicking # thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: SQL(ParserError("Expected AS, found: SELECT"))', datafusion/core/tests/sqllogictests/src/main.rs:197:42 # statement error # PREPARE AS SELECT id, age FROM person WHERE age = $foo -# TODO: this statement shoudl work after we support EXECUTE statement and caching this logicalplan somewhere +# TODO: this statement should work after we support EXECUTE statement and caching this prepare logical plan somewhere # statement ok # PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); From cdb328c0dd1d5023da7562af411fb9ed8609116a Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 16:13:49 -0500 Subject: [PATCH 13/17] chore: fix typos and add tests into the sqllogicaltests --- .../sqllogictests/test_files/prepare.slt | 55 ++++++++++++++++--- datafusion/sql/src/planner.rs | 4 +- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/prepare.slt b/datafusion/core/tests/sqllogictests/test_files/prepare.slt index cc525cbcdfec..03a5d4823c29 100644 --- a/datafusion/core/tests/sqllogictests/test_files/prepare.slt +++ b/datafusion/core/tests/sqllogictests/test_files/prepare.slt @@ -27,14 +27,53 @@ select * from person; ---- 1 jane smith 20 MA 100000.45 2000-11-12T00:00:00.000000000 99 -# TODO: support error instead of panicking -# thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: SQL(ParserError("Expected AS, found: SELECT"))', datafusion/core/tests/sqllogictests/src/main.rs:197:42 -# statement error -# PREPARE AS SELECT id, age FROM person WHERE age = $foo +# Error due to syntax and semantic violation -# TODO: this statement should work after we support EXECUTE statement and caching this prepare logical plan somewhere -# statement ok -# PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); +# Syntax error: no name specified after the keyword prepare +statement error +PREPARE AS SELECT id, age FROM person WHERE age = $foo; -# And then we may want to add test_prepare_statement* here +# param following a non-number, $foo, not supported +statement error +PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $foo; + +# not specify table hence cannot specify columns +statement error +PREPARE my_plan(INT) AS SELECT id + $1; + +# not specify data types for all params +statement error +PREPARE my_plan(INT) AS SELECT 1 + $1 + $2; + +# ####################### +# TODO: all the errors below should work ok after we store the prepare logical plan somewhere +statement error +PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); + +statement error +PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = 10; + +statement error +PREPARE my_plan AS SELECT id, age FROM person WHERE age = 10; + +statement error +PREPARE my_plan(INT) AS SELECT $1; + +statement error +PREPARE my_plan(INT) AS SELECT 1 + $1; + +statement error +PREPARE my_plan(INT, DOUBLE) AS SELECT 1 + $1 + $2; + +statement error +PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age = $1; + +statement error +PREPARE my_plan(INT, STRING, DOUBLE, INT, DOUBLE, STRING) AS SELECT id, age, $6 FROM person WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2"; + +statement error +PREPARE my_plan(INT, DOUBLE, DOUBLE, DOUBLE) AS SELECT id, SUM(age) FROM person WHERE salary > $2 GROUP BY id HAVING sum(age) < $1 AND SUM(age) > 10 OR SUM(age) in ($3, $4); + +statement error +PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index c20bfa4b8aaa..61f89646a83b 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1913,13 +1913,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { param: String, param_data_types: &[DataType], ) -> Result { - // Parse the placeholder as a number becasue it is the only support from sqlparser and postgres + // Parse the placeholder as a number because it is the only support from sqlparser and postgres let index = param[1..].parse::(); let idx = match index { Ok(index) => index - 1, Err(_) => { return Err(DataFusionError::Internal(format!( - "Invalid placeholder: {}", + "Invalid placeholder, not a number: {}", param ))) } From 58a31178bc80a895cda5694db07ed2c1c239a1a4 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 16:21:59 -0500 Subject: [PATCH 14/17] docs: add docstring --- datafusion/expr/src/logical_plan/builder.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index dcb3627bde13..6edd67604ad2 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -281,6 +281,7 @@ impl LogicalPlanBuilder { )?))) } + /// Make a builder for a prepare logical plan from the builder's plan pub fn prepare(&self, name: String, data_types: Vec) -> Result { Ok(Self::from(LogicalPlan::Prepare(Prepare { name, From 30705d93a238915c7b6e6dc17cfea133a6364796 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 7 Dec 2022 18:13:36 -0500 Subject: [PATCH 15/17] chore: update test panic message due to recent change to have clearer message per review comment --- datafusion/sql/src/planner.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 61f89646a83b..1a9aec98347b 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -6248,7 +6248,9 @@ mod tests { } #[test] - #[should_panic(expected = "Invalid placeholder: $foo")] + #[should_panic( + expected = "value: Internal(\"Invalid placeholder, not a number: $foo\"" + )] fn test_prepare_statement_to_plan_panic_param_format() { // param is not number following the $ sign // panic due to error returned from the parser From 87a352312c90dbe3880251ab3d50b85421ecc9c3 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Thu, 8 Dec 2022 10:28:14 -0500 Subject: [PATCH 16/17] chore: add a test and a doc string per review comments --- .../tests/sqllogictests/test_files/prepare.slt | 4 ++++ datafusion/sql/src/planner.rs | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/datafusion/core/tests/sqllogictests/test_files/prepare.slt b/datafusion/core/tests/sqllogictests/test_files/prepare.slt index 03a5d4823c29..948a2e3bc830 100644 --- a/datafusion/core/tests/sqllogictests/test_files/prepare.slt +++ b/datafusion/core/tests/sqllogictests/test_files/prepare.slt @@ -45,6 +45,10 @@ PREPARE my_plan(INT) AS SELECT id + $1; statement error PREPARE my_plan(INT) AS SELECT 1 + $1 + $2; +# cannot use IS param +statement error +PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age is $1; + # ####################### # TODO: all the errors below should work ok after we store the prepare logical plan somewhere statement error diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 1a9aec98347b..e6e2b6644f06 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1909,6 +1909,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { LogicalPlanBuilder::values(values)?.build() } + /// Create a placeholder expression + /// This is the same as Postgres's prepare statement syntax in which a placeholder starts with `$` sign and then + /// number 1, 2, ... etc. For example, `$1` is the first placeholder; $2 is the second one and so on. fn create_placeholder_expr( param: String, param_data_types: &[DataType], @@ -6302,6 +6305,19 @@ mod tests { prepare_stmt_quick_test(sql, expected_plan, expected_dt); } + #[test] + #[should_panic( + expected = "value: SQL(ParserError(\"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: $1\"" + )] + fn test_prepare_statement_to_plan_panic_is_param() { + let sql = "PREPARE my_plan(INT) AS SELECT id, age FROM person WHERE age is $1"; + + let expected_plan = "whatever"; + let expected_dt = "whatever"; + + prepare_stmt_quick_test(sql, expected_plan, expected_dt); + } + #[test] fn test_prepare_statement_to_plan_no_param() { // no embedded parameter but still declare it From dd6c3e05fc56b9cd09fa88b620d07867294a9c4a Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Thu, 8 Dec 2022 11:38:36 -0500 Subject: [PATCH 17/17] fix: output of a test after master merge --- datafusion/sql/src/planner.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 9a9b1f5e5719..82d8c3834294 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -6414,11 +6414,10 @@ mod tests { let sql = "PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);"; let expected_plan = "Prepare: \"my_plan\" [Utf8, Utf8] \ - \n Projection: t.num, t.letter\ - \n SubqueryAlias: t\ - \n Projection: t.column1 AS num, t.column2 AS letter\ - \n SubqueryAlias: t\ - \n Values: (Int64(1), $1), (Int64(2), $2)"; + \n Projection: num, letter\ + \n Projection: t.column1 AS num, t.column2 AS letter\ + \n SubqueryAlias: t\ + \n Values: (Int64(1), $1), (Int64(2), $2)"; let expected_dt = "[Utf8, Utf8]";