From 046da8209f037c0f450745c392daa28f26ca07a9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 3 Nov 2022 17:34:13 -0600 Subject: [PATCH 1/7] fix regression --- datafusion/sql/src/planner.rs | 81 +++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 2bc854f2e0a7..60a4c6e85fca 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -90,9 +90,24 @@ pub trait ContextProvider { fn get_variable_type(&self, variable_names: &[String]) -> Option; } +/// SQL parser options +#[derive(Debug)] +struct ParserOptions { + parse_float_as_decimal: bool, +} + +impl Default for ParserOptions { + fn default() -> Self { + Self { + parse_float_as_decimal: false, + } + } +} + /// SQL query planner pub struct SqlToRel<'a, S: ContextProvider> { schema_provider: &'a S, + options: ParserOptions, } fn plan_key(key: SQLExpr) -> Result { @@ -135,7 +150,10 @@ fn plan_indexed(expr: Expr, mut keys: Vec) -> Result { impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Create a new query planner pub fn new(schema_provider: &'a S) -> Self { - SqlToRel { schema_provider } + SqlToRel { + schema_provider, + options: ParserOptions::default(), + } } /// Generate a logical plan from an DataFusion SQL statement @@ -1696,7 +1714,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .map(|row| { row.into_iter() .map(|v| match v { - SQLExpr::Value(Value::Number(n, _)) => parse_sql_number(&n), + SQLExpr::Value(Value::Number(n, _)) => self.parse_sql_number(&n), SQLExpr::Value( Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), ) => Ok(lit(s)), @@ -1750,7 +1768,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ctes: &mut HashMap, ) -> Result { match sql { - SQLExpr::Value(Value::Number(n, _)) => parse_sql_number(&n), + SQLExpr::Value(Value::Number(n, _)) => self.parse_sql_number(&n), SQLExpr::Value(Value::SingleQuotedString(ref s) | Value::DoubleQuotedString(ref s)) => Ok(lit(s.clone())), SQLExpr::Value(Value::Boolean(n)) => Ok(lit(n)), SQLExpr::Value(Value::Null) => Ok(Expr::Literal(ScalarValue::Null)), @@ -2671,6 +2689,48 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(lit(ScalarValue::new_list(Some(values), data_type))) } } + + /// Parse number in sql string, convert to Expr::Literal + fn parse_sql_number(&self, n: &str) -> Result { + if let Some(i) = n.find('E') { + let _mantissa = &n[0..i]; + let _exponent = &n[i + 1..]; + Err(DataFusionError::NotImplemented( + "sql numeric literals in scientific notation are not supported" + .to_string(), + )) + } else if let Ok(n) = n.parse::() { + Ok(lit(n)) + } else if self.options.parse_float_as_decimal { + if let Some(i) = n.find('.') { + let p = n.len() - 1; + let s = n.len() - i - 1; + let str = n.replace('.', ""); + let n = str.parse::().map_err(|_| { + DataFusionError::from(ParserError(format!( + "Cannot parse {} as i128 when building decimal", + str + ))) + })?; + Ok(Expr::Literal(ScalarValue::Decimal128( + Some(n), + p as u8, + s as u8, + ))) + } else { + n.parse::().map(lit).map_err(|_| { + DataFusionError::from(ParserError(format!( + "Cannot parse {} as f64", + n + ))) + }) + } + } else { + n.parse::().map(lit).map_err(|_| { + DataFusionError::from(ParserError(format!("Cannot parse {} as f64", n))) + }) + } + } } /// Normalize a SQL object name @@ -2907,21 +2967,6 @@ pub fn convert_data_type(sql_type: &SQLDataType) -> Result { } } -// Parse number in sql string, convert to Expr::Literal -fn parse_sql_number(n: &str) -> Result { - // parse first as i64 - n.parse::() - .map(lit) - // if parsing as i64 fails try f64 - .or_else(|_| n.parse::().map(lit)) - .map_err(|_| { - DataFusionError::from(ParserError(format!( - "Cannot parse {} as i64 or f64", - n - ))) - }) -} - #[cfg(test)] mod tests { use super::*; From 0f2a4cff9c942604724c096dd23165e823bf39c7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 3 Nov 2022 17:39:38 -0600 Subject: [PATCH 2/7] fix --- datafusion/sql/src/planner.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 60a4c6e85fca..d78af5574421 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2718,12 +2718,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { s as u8, ))) } else { - n.parse::().map(lit).map_err(|_| { + let number = n.parse::().map_err(|_| { DataFusionError::from(ParserError(format!( - "Cannot parse {} as f64", + "Cannot parse {} as i128 when building decimal", n ))) - }) + })?; + Ok(Expr::Literal(ScalarValue::Decimal128(Some(number), 38, 0,))) } } else { n.parse::().map(lit).map_err(|_| { From d9ce88e61562c20c1366acfaa07681a25a14341d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 3 Nov 2022 17:50:38 -0600 Subject: [PATCH 3/7] test --- datafusion/sql/src/planner.rs | 55 ++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index d78af5574421..45630772931f 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -92,7 +92,7 @@ pub trait ContextProvider { /// SQL parser options #[derive(Debug)] -struct ParserOptions { +pub struct ParserOptions { parse_float_as_decimal: bool, } @@ -150,9 +150,14 @@ fn plan_indexed(expr: Expr, mut keys: Vec) -> Result { impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Create a new query planner pub fn new(schema_provider: &'a S) -> Self { + Self::new_with_options(schema_provider, ParserOptions::default()) + } + + /// Create a new query planner + pub fn new_with_options(schema_provider: &'a S, options: ParserOptions) -> Self { SqlToRel { schema_provider, - options: ParserOptions::default(), + options, } } @@ -2692,9 +2697,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Parse number in sql string, convert to Expr::Literal fn parse_sql_number(&self, n: &str) -> Result { - if let Some(i) = n.find('E') { - let _mantissa = &n[0..i]; - let _exponent = &n[i + 1..]; + if let Some(_) = n.find('E') { + // not implemented yet + // https://github.com/apache/arrow-datafusion/issues/3448 Err(DataFusionError::NotImplemented( "sql numeric literals in scientific notation are not supported" .to_string(), @@ -2724,7 +2729,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { n ))) })?; - Ok(Expr::Literal(ScalarValue::Decimal128(Some(number), 38, 0,))) + Ok(Expr::Literal(ScalarValue::Decimal128(Some(number), 38, 0))) } } else { n.parse::().map(lit).map_err(|_| { @@ -2975,6 +2980,19 @@ mod tests { use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect}; use std::any::Any; + #[test] + fn parse_decimals() { + let options = ParserOptions { + parse_float_as_decimal: true, + }; + quick_test_with_options( + "SELECT 1, 1.0, 0.1, .1, 12.34", + "Projection: Int64(1), Decimal128(Some(10),2,1), Decimal128(Some(1),2,1), Decimal128(Some(1),1,1), Decimal128(Some(1234),4,2)\ + \n EmptyRelation", + options + ); + } + #[test] fn select_no_relation() { quick_test( @@ -4936,8 +4954,15 @@ mod tests { } fn logical_plan(sql: &str) -> Result { + logical_plan_with_options(sql, ParserOptions::default()) + } + + fn logical_plan_with_options( + sql: &str, + options: ParserOptions, + ) -> Result { let dialect = &GenericDialect {}; - logical_plan_with_dialect(sql, dialect) + logical_plan_with_dialect_and_options(sql, dialect, options) } fn logical_plan_with_dialect( @@ -4950,12 +4975,28 @@ mod tests { planner.statement_to_plan(ast.pop_front().unwrap()) } + fn logical_plan_with_dialect_and_options( + sql: &str, + dialect: &dyn Dialect, + options: ParserOptions, + ) -> Result { + let planner = SqlToRel::new_with_options(&MockContextProvider {}, options); + let result = DFParser::parse_sql_with_dialect(sql, dialect); + let mut ast = result?; + planner.statement_to_plan(ast.pop_front().unwrap()) + } + /// Create logical plan, write with formatter, compare to expected output fn quick_test(sql: &str, expected: &str) { let plan = logical_plan(sql).unwrap(); assert_eq!(format!("{:?}", plan), expected); } + fn quick_test_with_options(sql: &str, expected: &str, options: ParserOptions) { + let plan = logical_plan_with_options(sql, options).unwrap(); + assert_eq!(format!("{:?}", plan), expected); + } + struct MockContextProvider {} impl ContextProvider for MockContextProvider { From cd6c86bda8ee49f30aedce1affc9cf292049a8c6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 14 Nov 2022 16:31:21 -0700 Subject: [PATCH 4/7] address feedback --- datafusion/sql/src/planner.rs | 44 ++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 070be768e59b..cd8de7c6cf0a 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2703,10 +2703,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } else if let Ok(n) = n.parse::() { Ok(lit(n)) } else if self.options.parse_float_as_decimal { - if let Some(i) = n.find('.') { - let p = n.len() - 1; - let s = n.len() - i - 1; - let str = n.replace('.', ""); + // remove leading zeroes + let str = n.trim_start_matches('0'); + if let Some(i) = str.find('.') { + // remove trailing zeroes + let str = str.trim_end_matches('0'); + let p = str.len() - 1; + let s = str.len() - i - 1; + let str = str.replace('.', ""); let n = str.parse::().map_err(|_| { DataFusionError::from(ParserError(format!( "Cannot parse {} as i128 when building decimal", @@ -3093,15 +3097,29 @@ mod tests { #[test] fn parse_decimals() { - let options = ParserOptions { - parse_float_as_decimal: true, - }; - quick_test_with_options( - "SELECT 1, 1.0, 0.1, .1, 12.34", - "Projection: Int64(1), Decimal128(Some(10),2,1), Decimal128(Some(1),2,1), Decimal128(Some(1),1,1), Decimal128(Some(1234),4,2)\ - \n EmptyRelation", - options - ); + let test_data = [ + ("1", "Int64(1)"), + ("001", "Int64(1)"), + ("0.1", "Decimal128(Some(1),1,1)"), + ("0.01", "Decimal128(Some(1),2,2)"), + ("1.0", "Decimal128(Some(1),1,0)"), + ("10.01", "Decimal128(Some(1001),4,2)"), + ( + "10000000000000000000.00", + "Decimal128(Some(10000000000000000000),20,0)", + ), + ]; + for (a, b) in test_data { + let sql = format!("SELECT {}", a); + let expected = format!("Projection: {}\n EmptyRelation", b); + quick_test_with_options( + &sql, + &expected, + ParserOptions { + parse_float_as_decimal: true, + }, + ); + } } #[test] From 94bb8a528b476c1ae0c61c5901b8a17766515b36 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 14 Nov 2022 16:43:41 -0700 Subject: [PATCH 5/7] match spark --- datafusion/sql/src/planner.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index cd8de7c6cf0a..f44bc7d0e91d 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2706,8 +2706,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // remove leading zeroes let str = n.trim_start_matches('0'); if let Some(i) = str.find('.') { - // remove trailing zeroes - let str = str.trim_end_matches('0'); let p = str.len() - 1; let s = str.len() - i - 1; let str = str.replace('.', ""); @@ -3102,11 +3100,11 @@ mod tests { ("001", "Int64(1)"), ("0.1", "Decimal128(Some(1),1,1)"), ("0.01", "Decimal128(Some(1),2,2)"), - ("1.0", "Decimal128(Some(1),1,0)"), + ("1.0", "Decimal128(Some(10),2,1)"), ("10.01", "Decimal128(Some(1001),4,2)"), ( "10000000000000000000.00", - "Decimal128(Some(10000000000000000000),20,0)", + "Decimal128(Some(1000000000000000000000),22,2)", ), ]; for (a, b) in test_data { From b1857e68983a9d88b3aee489e7378bf3510e0c9b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 15 Nov 2022 08:46:27 -0700 Subject: [PATCH 6/7] fix merge conflict --- datafusion/sql/src/planner.rs | 99 ----------------------------------- 1 file changed, 99 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f44bc7d0e91d..8236c7128e9d 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2987,105 +2987,6 @@ fn extract_possible_join_keys( } } -/// Convert SQL simple data type to relational representation of data type -pub fn convert_simple_data_type(sql_type: &SQLDataType) -> Result { - match sql_type { - SQLDataType::Boolean => Ok(DataType::Boolean), - SQLDataType::TinyInt(_) => Ok(DataType::Int8), - SQLDataType::SmallInt(_) => Ok(DataType::Int16), - SQLDataType::Int(_) | SQLDataType::Integer(_) => Ok(DataType::Int32), - SQLDataType::BigInt(_) => Ok(DataType::Int64), - SQLDataType::UnsignedTinyInt(_) => Ok(DataType::UInt8), - SQLDataType::UnsignedSmallInt(_) => Ok(DataType::UInt16), - SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) => { - Ok(DataType::UInt32) - } - SQLDataType::UnsignedBigInt(_) => Ok(DataType::UInt64), - SQLDataType::Float(_) => Ok(DataType::Float32), - SQLDataType::Real => Ok(DataType::Float32), - SQLDataType::Double | SQLDataType::DoublePrecision => Ok(DataType::Float64), - SQLDataType::Char(_) - | SQLDataType::Varchar(_) - | SQLDataType::Text - | SQLDataType::String => Ok(DataType::Utf8), - SQLDataType::Timestamp(tz_info) => { - let tz = if matches!(tz_info, TimezoneInfo::Tz) - || matches!(tz_info, TimezoneInfo::WithTimeZone) - { - Some("UTC".to_string()) - } else { - None - }; - Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz)) - } - SQLDataType::Date => Ok(DataType::Date32), - SQLDataType::Time(tz_info) => { - if matches!(tz_info, TimezoneInfo::None) - || matches!(tz_info, TimezoneInfo::WithoutTimeZone) - { - Ok(DataType::Time64(TimeUnit::Nanosecond)) - } else { - // We dont support TIMETZ and TIME WITH TIME ZONE for now - Err(DataFusionError::NotImplemented(format!( - "Unsupported SQL type {:?}", - sql_type - ))) - } - } - SQLDataType::Decimal(exact_number_info) => { - let (precision, scale) = match *exact_number_info { - ExactNumberInfo::None => (None, None), - ExactNumberInfo::Precision(precision) => (Some(precision), None), - ExactNumberInfo::PrecisionAndScale(precision, scale) => { - (Some(precision), Some(scale)) - } - }; - make_decimal_type(precision, scale) - } - SQLDataType::Bytea => Ok(DataType::Binary), - // Explicitly list all other types so that if sqlparser - // adds/changes the `SQLDataType` the compiler will tell us on upgrade - // and avoid bugs like https://github.com/apache/arrow-datafusion/issues/3059 - SQLDataType::Nvarchar(_) - | SQLDataType::Uuid - | SQLDataType::Binary(_) - | SQLDataType::Varbinary(_) - | SQLDataType::Blob(_) - | SQLDataType::Datetime - | SQLDataType::Interval - | SQLDataType::Regclass - | SQLDataType::Custom(_) - | SQLDataType::Array(_) - | SQLDataType::Enum(_) - | SQLDataType::Set(_) - | SQLDataType::MediumInt(_) - | SQLDataType::UnsignedMediumInt(_) - | SQLDataType::Character(_) - | SQLDataType::CharacterVarying(_) - | SQLDataType::CharVarying(_) - | SQLDataType::CharacterLargeObject(_) - | SQLDataType::CharLargeObject(_) - | SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!( - "Unsupported SQL type {:?}", - sql_type - ))), - } -} - -/// Convert SQL data type to relational representation of data type -pub fn convert_data_type(sql_type: &SQLDataType) -> Result { - match sql_type { - SQLDataType::Array(inner_sql_type) => { - let data_type = convert_simple_data_type(inner_sql_type)?; - - Ok(DataType::List(Box::new(Field::new( - "field", data_type, true, - )))) - } - other => convert_simple_data_type(other), - } -} - #[cfg(test)] mod tests { use super::*; From 6b97745d6378b650d7130a127a1addd49d4831d6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 15 Nov 2022 12:53:37 -0700 Subject: [PATCH 7/7] clippy --- datafusion/sql/src/planner.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 8236c7128e9d..1f84cddd2897 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -93,19 +93,11 @@ pub trait ContextProvider { } /// SQL parser options -#[derive(Debug)] +#[derive(Debug, Default)] pub struct ParserOptions { parse_float_as_decimal: bool, } -impl Default for ParserOptions { - fn default() -> Self { - Self { - parse_float_as_decimal: false, - } - } -} - /// SQL query planner pub struct SqlToRel<'a, S: ContextProvider> { schema_provider: &'a S, @@ -2693,7 +2685,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Parse number in sql string, convert to Expr::Literal fn parse_sql_number(&self, n: &str) -> Result { - if let Some(_) = n.find('E') { + if n.find('E').is_some() { // not implemented yet // https://github.com/apache/arrow-datafusion/issues/3448 Err(DataFusionError::NotImplemented(