From ae3a271f2556b667c6b3e9023a55856fb7a6efcb Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 21 Jul 2020 10:08:04 -0400 Subject: [PATCH 01/97] First pass on some create table, and some analyze table parsing --- examples/cli.rs | 1 + src/ast/data_type.rs | 3 + src/ast/mod.rs | 139 ++++++++++++++++++++++++++++++++++- src/dialect/hive.rs | 24 ++++++ src/dialect/keywords.rs | 17 +++++ src/dialect/mod.rs | 2 + src/parser.rs | 149 ++++++++++++++++++++++++++++++++++++-- src/test_utils.rs | 1 + tests/sqlparser_common.rs | 4 +- tests/sqlparser_hive.rs | 44 +++++++++++ 10 files changed, 373 insertions(+), 11 deletions(-) create mode 100644 src/dialect/hive.rs create mode 100644 tests/sqlparser_hive.rs diff --git a/examples/cli.rs b/examples/cli.rs index f019c520b..fb9b7a46d 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -39,6 +39,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--ansi" => Box::new(AnsiDialect {}), "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), + "--hive" => Box::new(HiveDialect{}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 53122ab5d..cc8bd3260 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,6 +61,8 @@ pub enum DataType { Regclass, /// Text Text, + /// String (Hive) + String, /// Bytea Bytea, /// Custom type such as enums @@ -101,6 +103,7 @@ impl fmt::Display for DataType { DataType::Interval => write!(f, "INTERVAL"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), + DataType::String => write!(f, "STRING"), DataType::Bytea => write!(f, "BYTEA"), DataType::Array(ty) => write!(f, "{}[]", ty), DataType::Custom(ty) => write!(f, "{}", ty), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1203b096d..ed722877e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -425,6 +425,15 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { + /// Analyze (Hive) + Analyze { + table_name: ObjectName, + partitions: Option>, + for_columns: bool, + cache_metadata: bool, + noscan: bool, + compute_statistics: bool + }, /// SELECT Query(Box), /// INSERT @@ -433,8 +442,12 @@ pub enum Statement { table_name: ObjectName, /// COLUMNS columns: Vec, + /// Overwrite (Hive) + overwrite: bool, /// A SQL query that specifies what to insert source: Box, + /// partitioned insert (Hive) + partitioned: Option> }, Copy { /// TABLE @@ -480,6 +493,8 @@ pub enum Statement { /// Optional schema columns: Vec, constraints: Vec, + hive_distribution: HiveDistributionStyle, + hive_formats: Option, with_options: Vec, file_format: Option, location: Option, @@ -553,7 +568,12 @@ pub enum Statement { Rollback { chain: bool }, /// CREATE SCHEMA CreateSchema { schema_name: ObjectName }, - + /// CREATE DATABASE + CreateDatabase { + db_name: ObjectName, + ine: bool, location: Option, + managed_location: Option + }, /// ASSERT [AS ] Assert { condition: Expr, @@ -568,12 +588,17 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), + Statement::Analyze { table_name, partitions, for_columns, cache_metadata, noscan, compute_statistics } => { + Ok(()) + } Statement::Insert { table_name, + overwrite, + partitioned, columns, source, } => { - write!(f, "INSERT INTO {} ", table_name)?; + write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE" } else { "INTO" })?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } @@ -629,6 +654,20 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateDatabase { db_name, ine, location, managed_location } => { + write!(f, "CREATE")?; + if *ine { + write!(f, " IF NOT EXISTS")?; + } + write!(f, " {}", db_name)?; + if let Some(l) = location { + write!(f, " LOCATION '{}'", l)?; + } + if let Some(ml) = managed_location { + write!(f, " MANAGEDLOCATION '{}'", ml)?; + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -666,6 +705,8 @@ impl fmt::Display for Statement { with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats, external, file_format, location, @@ -702,6 +743,42 @@ impl fmt::Display for Statement { write!(f, " WITHOUT ROWID")?; } + match hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?, + HiveDistributionStyle::CLUSTERED { columns, sorted_by, num_buckets } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {} BUCKETS", num_buckets)?; + } + } + HiveDistributionStyle::SKEWED { columns, on, stored_as_directories } => { + write!(f, " SKEWED BY ({})) ON ({})", display_comma_separated(&columns), display_comma_separated(&on))?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + }, + _ => () + } + + if let Some(HiveFormat { row_format, storage, location }) = hive_formats { + + match row_format { + Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{}'", class)?, + Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + None => () + } + match storage { + Some(HiveIOFormat::IOF { input_format, output_format }) => write!(f, " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format)?, + Some(HiveIOFormat::FileFormat { format }) => write!(f, " STORED AS {}", format)?, + None => () + } + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } + } if *external { write!( f, @@ -1000,6 +1077,64 @@ impl fmt::Display for ObjectType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveDistributionStyle { + PARTITIONED { + columns: Vec + }, + CLUSTERED { + columns: Vec, + sorted_by: Vec, + num_buckets: i32 + }, + SKEWED { + columns: Vec, + on: Vec, + stored_as_directories: bool + }, + NONE +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveRowFormat { + SERDE { + class: String + }, + DELIMITED +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveIOFormat { + IOF { + input_format: Expr, + output_format: Expr, + }, + FileFormat { + format: FileFormat + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct HiveFormat { + pub row_format: Option, + pub storage: Option, + pub location: Option +} + +impl Default for HiveFormat { + fn default() -> Self { + HiveFormat { + row_format: None, + location: None, + storage: None + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs new file mode 100644 index 000000000..71a5eee26 --- /dev/null +++ b/src/dialect/hive.rs @@ -0,0 +1,24 @@ +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct HiveDialect {} + +impl Dialect for HiveDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + (ch == '"') || (ch == '\'') || (ch == '`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + } + + fn is_identifier_part(&self, ch: char) -> bool { + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') + || ch == '_' + || ch == '$' + || ch == '{' + || ch == '}' + } +} diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 6ebe27f0e..57c9b2f30 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -69,6 +69,7 @@ define_keywords!( ALL, ALLOCATE, ALTER, + ANALYZE, AND, ANY, APPLY, @@ -97,6 +98,7 @@ define_keywords!( BOTH, BY, BYTEA, + CACHE, CALL, CALLED, CARDINALITY, @@ -121,6 +123,7 @@ define_keywords!( COLUMNS, COMMIT, COMMITTED, + COMPUTE, CONDITION, CONNECT, CONSTRAINT, @@ -151,6 +154,7 @@ define_keywords!( CURRENT_USER, CURSOR, CYCLE, + DATABASE, DATE, DAY, DEALLOCATE, @@ -159,6 +163,7 @@ define_keywords!( DECLARE, DEFAULT, DELETE, + DELIMITED, DENSE_RANK, DEREF, DESC, @@ -199,6 +204,7 @@ define_keywords!( FOLLOWING, FOR, FOREIGN, + FORMAT, FRAME_ROW, FREE, FROM, @@ -213,6 +219,7 @@ define_keywords!( GROUPS, HAVING, HEADER, + HIVEVAR, HOLD, HOUR, IDENTITY, @@ -222,6 +229,7 @@ define_keywords!( INDICATOR, INNER, INOUT, + INPUTFORMAT, INSENSITIVE, INSERT, INT, @@ -255,11 +263,13 @@ define_keywords!( LOCALTIMESTAMP, LOCATION, LOWER, + MANAGEDLOCATION, MATCH, MATERIALIZED, MAX, MEMBER, MERGE, + METADATA, METHOD, MIN, MINUTE, @@ -277,6 +287,7 @@ define_keywords!( NO, NONE, NORMALIZE, + NOSCAN, NOT, NTH_VALUE, NTILE, @@ -298,13 +309,16 @@ define_keywords!( ORDER, OUT, OUTER, + OUTPUTFORMAT, OVER, OVERFLOW, OVERLAPS, OVERLAY, + OVERWRITE, PARAMETER, PARQUET, PARTITION, + PARTITIONED, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -365,6 +379,7 @@ define_keywords!( SELECT, SENSITIVE, SEQUENCEFILE, + SERDE, SERIALIZABLE, SESSION, SESSION_USER, @@ -382,10 +397,12 @@ define_keywords!( SQRT, START, STATIC, + STATISTICS, STDDEV_POP, STDDEV_SAMP, STDIN, STORED, + STRING, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c9ddbedd3..98f43823b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -16,6 +16,7 @@ pub mod keywords; mod mssql; mod mysql; mod postgresql; +mod hive; use std::fmt::Debug; @@ -24,6 +25,7 @@ pub use self::generic::GenericDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; +pub use self::hive::HiveDialect; pub trait Dialect: Debug { /// Determine if a character starts a quoted identifier. The default diff --git a/src/parser.rs b/src/parser.rs index e41379edb..dbf2c94ee 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -132,6 +132,7 @@ impl Parser { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } + Keyword::ANALYZE => Ok(self.parse_analyze()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -159,6 +160,45 @@ impl Parser { } } + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + + loop { + match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { + Some(Keyword::PARTITION) => partitions = Some(self.parse_comma_separated(Parser::parse_expr)?), + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + partitions, + cache_metadata, + noscan, + compute_statistics + }) + } + /// Parse a new expression pub fn parse_expr(&mut self) -> Result { self.parse_subexpr(0) @@ -865,7 +905,7 @@ impl Parser { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - //println!("parse_keywords aborting .. did not find {}", keyword); + println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -1038,13 +1078,30 @@ impl Parser { Ok(Statement::CreateSchema { schema_name }) } - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { + + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name()?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => managed_location = Some(self.parse_literal_string()?), + _ => break + } + } + Ok(Statement::CreateDatabase { db_name, ine, location, managed_location }) + } + + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; + let hive_distribution = self.parse_hive_distribution()?; self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; let file_format = self.parse_file_format()?; @@ -1055,6 +1112,8 @@ impl Parser { name: table_name, columns, constraints, + hive_distribution, + hive_formats: None, with_options: vec![], or_replace, if_not_exists: false, @@ -1147,6 +1206,61 @@ impl Parser { }) } + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { + columns + }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF {input_format, output_format}); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + }, + None => break, + _ => break + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } + pub fn parse_create_table(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; @@ -1156,6 +1270,8 @@ impl Parser { // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` let with_options = self.parse_with_options()?; @@ -1173,6 +1289,8 @@ impl Parser { with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats: Some(hive_formats), external: false, file_format: None, location: None, @@ -1564,6 +1682,7 @@ impl Parser { // parse_interval_literal for a taste. Keyword::INTERVAL => Ok(DataType::Interval), Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String), Keyword::TEXT => { if self.consume_token(&Token::LBracket) { // Note: this is postgresql-specific @@ -1901,15 +2020,20 @@ impl Parser { } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL]); + let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let token = self.peek_token(); let value = match (self.parse_value(), token) { (Ok(value), _) => SetVariableValue::Literal(value), (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), (Err(_), unexpected) => self.expected("variable value", unexpected)?, }; + println!("{:?}", value); Ok(Statement::SetVariable { local: modifier == Some(Keyword::LOCAL), variable, @@ -2155,12 +2279,23 @@ impl Parser { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { - self.expect_keyword(Keyword::INTO)?; + let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; + let overwrite = if action == Keyword::OVERWRITE { true } else { false }; + if overwrite { + self.expect_keyword(Keyword::TABLE)?; + } let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; + let partitioned = if self.parse_keywords(&[Keyword::PARTITION]) { + Some(self.parse_comma_separated(Parser::parse_expr)?) + } else { + None + }; let source = Box::new(self.parse_query()?); Ok(Statement::Insert { table_name, + overwrite, + partitioned, columns, source, }) diff --git a/src/test_utils.rs b/src/test_utils.rs index 4d4d35616..834a79e35 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -117,6 +117,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), + Box::new(HiveDialect {}) ], } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5889cc357..1d55a01f1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -90,7 +90,7 @@ fn parse_insert_invalid() { let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected INTO, found: public".to_string()), + ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), res.unwrap_err() ); } @@ -3193,7 +3193,7 @@ fn all_keywords_sorted() { // assert!(ALL_KEYWORDS.is_sorted()) let mut copy = Vec::from(ALL_KEYWORDS); copy.sort(); - assert!(copy == ALL_KEYWORDS) + assert_eq!(copy, ALL_KEYWORDS) } fn parse_sql_statements(sql: &str) -> Result, ParserError> { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs new file mode 100644 index 000000000..de95e6985 --- /dev/null +++ b/tests/sqlparser_hive.rs @@ -0,0 +1,44 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![warn(clippy::all)] + +//! Test SQL syntax specific to Hive. The parser based on the generic dialect +//! is also tested (on the inputs it can handle). + +use sqlparser::ast::*; +use sqlparser::dialect::{GenericDialect, HiveDialect}; +use sqlparser::test_utils::*; +use sqlparser::parser::ParserError; + +#[test] +fn parse_table_create() -> Result<(), ParserError> { + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; + let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; + + hive().verified_stmt(sql); + hive().verified_stmt(iof); + + Ok(()) +} + +fn hive() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {})], + } +} + +fn hive_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {}), Box::new(GenericDialect {})], + } +} From 24782fb2b61f5452ef57ab12b093f5fe71852522 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 21 Jul 2020 19:15:35 -0400 Subject: [PATCH 02/97] First pass on some create table, and some analyze table parsing --- src/ast/mod.rs | 63 ++++++++++++++++++++++++--- src/dialect/keywords.rs | 4 ++ src/parser.rs | 85 ++++++++++++++++++++++++++++--------- tests/sqlparser_hive.rs | 32 +++++++++++++- tests/sqlparser_postgres.rs | 15 ++++--- 5 files changed, 168 insertions(+), 31 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ed722877e..a709145a1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -434,6 +434,19 @@ pub enum Statement { noscan: bool, compute_statistics: bool }, + /// Truncate (Hive) + Truncate { + table_name: ObjectName, + partitions: Option> + }, + /// Msck (Hive) + Msck { + table_name: ObjectName, + repair: bool, + add_partitions: bool, + drop_partitions: bool, + sync_partitions: bool + }, /// SELECT Query(Box), /// INSERT @@ -447,7 +460,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option> }, Copy { /// TABLE @@ -542,8 +555,9 @@ pub enum Statement { /// supported yet. SetVariable { local: bool, + hivevar: bool, variable: Ident, - value: SetVariableValue, + value: Vec, }, /// SHOW /// @@ -588,7 +602,40 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), - Statement::Analyze { table_name, partitions, for_columns, cache_metadata, noscan, compute_statistics } => { + Statement::Msck { table_name, repair, add_partitions, drop_partitions, sync_partitions } => { + write!(f, "MSCK {repair}TABLE {table}", repair = if *repair { "REPAIR " } else { "" }, table = table_name)?; + write!(f, "{add}{drop}{sync}", + add = if *add_partitions { " ADD PARTITIONS" } else { "" }, + drop = if *drop_partitions { " DROP PARTITIONS" } else { "" }, + sync = if *sync_partitions { " SYNC PARTITIONS" } else { "" } + ) + } + Statement::Truncate { table_name, partitions } => { + write!(f, "TRUNCATE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + Ok(()) + } + Statement::Analyze { table_name, partitions, for_columns: _, cache_metadata, noscan, compute_statistics } => { + write!(f, "ANALYZE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + //TODO: Add for columns + if *compute_statistics { + write!(f, " COMPUTE STATISTICS")?; + } + if *noscan { + write!(f, " NOSCAN")?; + } + if *cache_metadata { + write!(f, " CACHE METADATA")?; + } Ok(()) } Statement::Insert { @@ -598,10 +645,15 @@ impl fmt::Display for Statement { columns, source, } => { - write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE" } else { "INTO" })?; + write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE TABLE" } else { "INTO" })?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } + if let Some(ref parts) = partitioned { + if !parts.is_empty() { + write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; + } + } write!(f, "{}", source) } Statement::Copy { @@ -854,13 +906,14 @@ impl fmt::Display for Statement { Statement::SetVariable { local, variable, + hivevar, value, } => { f.write_str("SET ")?; if *local { f.write_str("LOCAL ")?; } - write!(f, "{} = {}", variable, value) + write!(f, "{hivevar}{name} = {value}", hivevar = if *hivevar { "HIVEVAR:" } else { "" }, name = variable, value = display_comma_separated(value)) } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 57c9b2f30..fb7647b2e 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -277,6 +277,7 @@ define_keywords!( MODIFIES, MODULE, MONTH, + MSCK, MULTISET, NATIONAL, NATURAL, @@ -319,6 +320,7 @@ define_keywords!( PARQUET, PARTITION, PARTITIONED, + PARTITIONS, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -356,6 +358,7 @@ define_keywords!( REGR_SYY, RELEASE, RENAME, + REPAIR, REPEATABLE, REPLACE, RESTRICT, @@ -409,6 +412,7 @@ define_keywords!( SUCCEEDS, SUM, SYMMETRIC, + SYNC, SYSTEM, SYSTEM_TIME, SYSTEM_USER, diff --git a/src/parser.rs b/src/parser.rs index dbf2c94ee..b40b34e60 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -133,6 +133,8 @@ impl Parser { Ok(Statement::Query(Box::new(self.parse_query()?))) } Keyword::ANALYZE => Ok(self.parse_analyze()?), + Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -160,6 +162,37 @@ impl Parser { } } + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let (mut add, mut drop, mut sync) = (false, false, false); + match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { + Some(Keyword::ADD) => { add = true; } + Some(Keyword::DROP) => { drop = true; } + Some(Keyword::SYNC) => { sync = true; } + _ => () + } + self.expect_keyword(Keyword::PARTITIONS)?; + Ok(Statement::Msck { + repair, table_name, add_partitions: add, drop_partitions: drop, sync_partitions: sync + }) + } + + pub fn parse_truncate(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Ok(Statement::Truncate { + table_name, partitions + }) + } + pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; @@ -171,7 +204,11 @@ impl Parser { loop { match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { - Some(Keyword::PARTITION) => partitions = Some(self.parse_comma_separated(Parser::parse_expr)?), + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + }, Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; @@ -905,7 +942,7 @@ impl Parser { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - println!("parse_keywords aborting .. did not find {:?}", keyword); + // println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -2026,25 +2063,31 @@ impl Parser { } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; - println!("{:?}", value); - Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - variable, - value, - }) + let mut values = vec![]; + loop { + let token = self.peek_token(); + let value = match (self.parse_value(), token) { + (Ok(value), _) => SetVariableValue::Literal(value), + (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), unexpected) => self.expected("variable value", unexpected)?, + }; + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + }) + } } else if variable.value == "TRANSACTION" && modifier.is_none() { - Ok(Statement::SetTransaction { + return Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, }) } else { - self.expected("equals sign or TO", self.peek_token()) + return self.expected("equals sign or TO", self.peek_token()) } } @@ -2286,8 +2329,12 @@ impl Parser { } let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let partitioned = if self.parse_keywords(&[Keyword::PARTITION]) { - Some(self.parse_comma_separated(Parser::parse_expr)?) + + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_identifier)?); + self.expect_token(&Token::RParen)?; + r } else { None }; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index de95e6985..837d48567 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -21,14 +21,42 @@ use sqlparser::test_utils::*; use sqlparser::parser::ParserError; #[test] -fn parse_table_create() -> Result<(), ParserError> { +fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); hive().verified_stmt(iof); +} + +#[test] +fn parse_insert_overwrite() { + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a, b) SELECT a, b, c FROM db.table"#; + hive().verified_stmt(insert_partitions); +} + +#[test] +fn test_truncate() { + let truncate = r#"TRUNCATE TABLE db.table"#; + hive().verified_stmt(truncate); +} - Ok(()) +#[test] +fn parse_analyze() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_msck() { + let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + hive().verified_stmt(msck); +} + +#[test] +fn parse_set() { + let set = "SET HIVEVAR:name = a, b, c_d"; + hive().verified_stmt(set); } fn hive() -> TestedDialects { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 481d0cbe1..57a618e22 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -334,8 +334,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); @@ -344,8 +345,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(Value::SingleQuotedString("b".into())), + value: vec![SetVariableValue::Literal(Value::SingleQuotedString("b".into()))], } ); @@ -354,8 +356,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(number("0")), + value: vec![SetVariableValue::Literal(number("0"))], } ); @@ -364,8 +367,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("DEFAULT".into()), + value: vec![SetVariableValue::Ident("DEFAULT".into())], } ); @@ -374,8 +378,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: true, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); From aa5fbd00089ff65b863f7a5d9b813de7f2555a80 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 22 Jul 2020 14:26:14 -0400 Subject: [PATCH 03/97] Speculative WITH (...) INSERT --- src/ast/query.rs | 2 + src/parser.rs | 90 ++++++++++++++++++++++++----------------- tests/sqlparser_hive.rs | 6 +++ 3 files changed, 61 insertions(+), 37 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 73477b126..56ba994ec 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -73,6 +73,7 @@ pub enum SetExpr { right: Box, }, Values(Values), + Insert(Statement), // TODO: ANSI SQL supports `TABLE` here. } @@ -82,6 +83,7 @@ impl fmt::Display for SetExpr { SetExpr::Select(s) => write!(f, "{}", s), SetExpr::Query(q) => write!(f, "({})", q), SetExpr::Values(v) => write!(f, "{}", v), + SetExpr::Insert(v) => write!(f, "{}", v), SetExpr::SetOperation { left, right, diff --git a/src/parser.rs b/src/parser.rs index b40b34e60..b67d191c9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1892,40 +1892,52 @@ impl Parser { vec![] }; - let body = self.parse_query_body(0)?; + if !self.parse_keyword(Keyword::INSERT) { + let body = self.parse_query_body(0)?; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_offset()?) - } else { - None - }; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; - Ok(Query { - ctes, - body, - limit, - order_by, - offset, - fetch, - }) + Ok(Query { + ctes, + body, + limit, + order_by, + offset, + fetch, + }) + } else { + let insert = self.parse_insert()?; + Ok(Query { + ctes, + body: SetExpr::Insert(insert), + limit: None, + order_by: vec![], + offset: None, + fetch: None + }) + } } /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) @@ -1934,11 +1946,16 @@ impl Parser { name: self.parse_identifier()?, columns: self.parse_parenthesized_column_list(Optional)?, }; - self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Cte { alias, query }) + + if self.parse_keyword(Keyword::AS) { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Cte { alias, query }) + } else { + let query = self.parse_query()?; + Ok(Cte { alias, query }) + } } /// Parse a "query body", which is an expression with roughly the @@ -2324,9 +2341,8 @@ impl Parser { pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; let overwrite = if action == Keyword::OVERWRITE { true } else { false }; - if overwrite { - self.expect_keyword(Keyword::TABLE)?; - } + // Hive lets you put table here regardless + self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 837d48567..403125743 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -59,6 +59,12 @@ fn parse_set() { hive().verified_stmt(set); } +#[test] +fn parse_with_cte() { + let with = "WITH a AS (SELECT * FROM table) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM a"; + hive().verified_stmt(with); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From a2fdaf0d49294f58adf046c454cb13ea58877c78 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:12:06 -0400 Subject: [PATCH 04/97] Speculative debugging assistance --- src/parser.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index b67d191c9..e5a17e817 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -381,7 +381,12 @@ impl Parser { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => self.expected("an expression", unexpected), + unexpected => { + self.prev_token(); + self.prev_token(); + self.prev_token(); + self.expected(format!("an expression: {} - {} {} {}", self.index, self.next_token().to_string(), self.next_token().to_string(), self.next_token().to_string()).as_str(), unexpected) + }, }?; if self.parse_keyword(Keyword::COLLATE) { From 388984a395466e9046f00b9ad87691578892cd0f Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:14:24 -0400 Subject: [PATCH 05/97] Speculative debugging assistance --- src/parser.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index e5a17e817..d1786ae56 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -385,7 +385,10 @@ impl Parser { self.prev_token(); self.prev_token(); self.prev_token(); - self.expected(format!("an expression: {} - {} {} {}", self.index, self.next_token().to_string(), self.next_token().to_string(), self.next_token().to_string()).as_str(), unexpected) + let t1 = self.next_token().to_string(); + let t2 = self.next_token().to_string(); + let t3 = self.next_token().to_string(); + self.expected(format!("an expression: {} - {} {} {}", self.index, &t1, &t2, &t3).as_str(), unexpected) }, }?; From 368fa95c6cedc714274af6e330f943534b122754 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:52:45 -0400 Subject: [PATCH 06/97] Remove debugging assistance and speculative double equals fix --- src/parser.rs | 2 +- src/tokenizer.rs | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index d1786ae56..921ed0243 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -860,7 +860,7 @@ impl Parser { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), + Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq => Ok(20), Token::Pipe => Ok(21), Token::Caret => Ok(22), Token::Ampersand => Ok(23), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 520424af3..3ca14e7e7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -44,6 +44,8 @@ pub enum Token { Comma, /// Whitespace (space, tab, etc) Whitespace(Whitespace), + /// Double equals sign `==` + DoubleEq, /// Equality operator `=` Eq, /// Not Equals operator `<>` (or `!=` in some dialects) @@ -110,6 +112,7 @@ impl fmt::Display for Token { Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), + Token::DoubleEq => f.write_str("=="), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), @@ -395,7 +398,13 @@ impl<'a> Tokenizer<'a> { _ => Ok(Some(Token::Pipe)), } } - '=' => self.consume_and_return(chars, Token::Eq), + '=' => { + chars.next(); + match chars.peek() { + Some('=') => self.consume_and_return(chars, Token::DoubleEq), + _ => Ok(Some(Token::Eq)) + } + }, '.' => self.consume_and_return(chars, Token::Period), '!' => { chars.next(); // consume From 01816802fc32ddd1867eb00ce4854f6c9aec2ea4 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:55:05 -0400 Subject: [PATCH 07/97] Remove debugging assistance and speculative double equals fix --- src/parser.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index 921ed0243..81601adfe 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,6 +57,7 @@ pub enum IsLateral { } use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; +use crate::ast::Expr::BinaryOp; impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -720,6 +721,7 @@ impl Parser { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => Some(BinaryOperator::Gt), From df215333e90a03d77d22d4533314a2696f89ade6 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 15:56:23 -0400 Subject: [PATCH 08/97] Fix partition clauses with values --- src/ast/mod.rs | 2 +- src/parser.rs | 2 +- tests/sqlparser_hive.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a709145a1..0e509f28c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -460,7 +460,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option> }, Copy { /// TABLE diff --git a/src/parser.rs b/src/parser.rs index 81601adfe..167f61678 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2358,7 +2358,7 @@ impl Parser { let partitioned = if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; - let r = Some(self.parse_comma_separated(Parser::parse_identifier)?); + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); self.expect_token(&Token::RParen)?; r } else { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 403125743..cbedfc0fc 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -31,7 +31,7 @@ fn parse_table_create() { #[test] fn parse_insert_overwrite() { - let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a, b) SELECT a, b, c FROM db.table"#; + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; hive().verified_stmt(insert_partitions); } From 9733e2ef548bb920b0e019c2d550a6c304ef171d Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 16:13:30 -0400 Subject: [PATCH 09/97] Add spaceship operator --- src/ast/operator.rs | 2 ++ src/parser.rs | 15 ++++----------- src/tokenizer.rs | 11 ++++++++++- tests/sqlparser_hive.rs | 10 ++++++++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 63e75eead..ac52aa6a8 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -47,6 +47,7 @@ pub enum BinaryOperator { Lt, GtEq, LtEq, + Spaceship, Eq, NotEq, And, @@ -71,6 +72,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Lt => "<", BinaryOperator::GtEq => ">=", BinaryOperator::LtEq => "<=", + BinaryOperator::Spaceship => "<=>", BinaryOperator::Eq => "=", BinaryOperator::NotEq => "<>", BinaryOperator::And => "AND", diff --git a/src/parser.rs b/src/parser.rs index 167f61678..d0e724c3e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,7 +57,7 @@ pub enum IsLateral { } use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; -use crate::ast::Expr::BinaryOp; + impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -382,15 +382,7 @@ impl Parser { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => { - self.prev_token(); - self.prev_token(); - self.prev_token(); - let t1 = self.next_token().to_string(); - let t2 = self.next_token().to_string(); - let t3 = self.next_token().to_string(); - self.expected(format!("an expression: {} - {} {} {}", self.index, &t1, &t2, &t3).as_str(), unexpected) - }, + unexpected => self.expected("an expression:", unexpected), }?; if self.parse_keyword(Keyword::COLLATE) { @@ -721,6 +713,7 @@ impl Parser { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), @@ -862,7 +855,7 @@ impl Parser { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq => Ok(20), + Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret => Ok(22), Token::Ampersand => Ok(23), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3ca14e7e7..55244c346 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -58,6 +58,8 @@ pub enum Token { LtEq, /// Greater Than Or Equals operator `>=` GtEq, + /// Spaceship operator <=> + Spaceship, /// Plus operator `+` Plus, /// Minus operator `-` @@ -113,6 +115,7 @@ impl fmt::Display for Token { Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), Token::DoubleEq => f.write_str("=="), + Token::Spaceship => f.write_str("<=>"), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), @@ -416,7 +419,13 @@ impl<'a> Tokenizer<'a> { '<' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::LtEq), + Some('=') => { + chars.next(); + match chars.peek() { + Some('>') => self.consume_and_return(chars, Token::Spaceship), + _ => Ok(Some(Token::LtEq)) + } + }, Some('>') => self.consume_and_return(chars, Token::Neq), _ => Ok(Some(Token::Lt)), } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index cbedfc0fc..51660940a 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,10 +15,10 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). -use sqlparser::ast::*; + use sqlparser::dialect::{GenericDialect, HiveDialect}; use sqlparser::test_utils::*; -use sqlparser::parser::ParserError; + #[test] fn parse_table_create() { @@ -59,6 +59,12 @@ fn parse_set() { hive().verified_stmt(set); } +#[test] +fn test_spaceship() { + let spaceship = "SELECT * FROM db.table WHERE a <=> b"; + hive().verified_stmt(spaceship); +} + #[test] fn parse_with_cte() { let with = "WITH a AS (SELECT * FROM table) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM a"; From 88d7b6986681615d87cf3faf7d38124b7f1fef4c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 16:31:25 -0400 Subject: [PATCH 10/97] cargo fmt --- examples/cli.rs | 2 +- src/ast/mod.rs | 172 +++++++++++++++++++++++++++--------- src/dialect/mod.rs | 4 +- src/parser.rs | 90 +++++++++++++------ src/test_utils.rs | 2 +- src/tokenizer.rs | 8 +- tests/sqlparser_hive.rs | 2 - tests/sqlparser_postgres.rs | 4 +- 8 files changed, 202 insertions(+), 82 deletions(-) diff --git a/examples/cli.rs b/examples/cli.rs index fb9b7a46d..392f6eb7b 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -39,7 +39,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--ansi" => Box::new(AnsiDialect {}), "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), - "--hive" => Box::new(HiveDialect{}), + "--hive" => Box::new(HiveDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0e509f28c..9f061bb7c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -432,12 +432,12 @@ pub enum Statement { for_columns: bool, cache_metadata: bool, noscan: bool, - compute_statistics: bool + compute_statistics: bool, }, /// Truncate (Hive) Truncate { table_name: ObjectName, - partitions: Option> + partitions: Option>, }, /// Msck (Hive) Msck { @@ -445,7 +445,7 @@ pub enum Statement { repair: bool, add_partitions: bool, drop_partitions: bool, - sync_partitions: bool + sync_partitions: bool, }, /// SELECT Query(Box), @@ -460,7 +460,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option>, }, Copy { /// TABLE @@ -585,8 +585,9 @@ pub enum Statement { /// CREATE DATABASE CreateDatabase { db_name: ObjectName, - ine: bool, location: Option, - managed_location: Option + ine: bool, + location: Option, + managed_location: Option, }, /// ASSERT [AS ] Assert { @@ -602,15 +603,43 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), - Statement::Msck { table_name, repair, add_partitions, drop_partitions, sync_partitions } => { - write!(f, "MSCK {repair}TABLE {table}", repair = if *repair { "REPAIR " } else { "" }, table = table_name)?; - write!(f, "{add}{drop}{sync}", - add = if *add_partitions { " ADD PARTITIONS" } else { "" }, - drop = if *drop_partitions { " DROP PARTITIONS" } else { "" }, - sync = if *sync_partitions { " SYNC PARTITIONS" } else { "" } + Statement::Msck { + table_name, + repair, + add_partitions, + drop_partitions, + sync_partitions, + } => { + write!( + f, + "MSCK {repair}TABLE {table}", + repair = if *repair { "REPAIR " } else { "" }, + table = table_name + )?; + write!( + f, + "{add}{drop}{sync}", + add = if *add_partitions { + " ADD PARTITIONS" + } else { + "" + }, + drop = if *drop_partitions { + " DROP PARTITIONS" + } else { + "" + }, + sync = if *sync_partitions { + " SYNC PARTITIONS" + } else { + "" + } ) } - Statement::Truncate { table_name, partitions } => { + Statement::Truncate { + table_name, + partitions, + } => { write!(f, "TRUNCATE TABLE {}", table_name)?; if let Some(ref parts) = partitions { if !parts.is_empty() { @@ -619,7 +648,14 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Analyze { table_name, partitions, for_columns: _, cache_metadata, noscan, compute_statistics } => { + Statement::Analyze { + table_name, + partitions, + for_columns: _, + cache_metadata, + noscan, + compute_statistics, + } => { write!(f, "ANALYZE TABLE {}", table_name)?; if let Some(ref parts) = partitions { if !parts.is_empty() { @@ -645,7 +681,16 @@ impl fmt::Display for Statement { columns, source, } => { - write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE TABLE" } else { "INTO" })?; + write!( + f, + "INSERT {act} {table_name} ", + table_name = table_name, + act = if *overwrite { + "OVERWRITE TABLE" + } else { + "INTO" + } + )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } @@ -706,7 +751,12 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateDatabase { db_name, ine, location, managed_location } => { + Statement::CreateDatabase { + db_name, + ine, + location, + managed_location, + } => { write!(f, "CREATE")?; if *ine { write!(f, " IF NOT EXISTS")?; @@ -796,8 +846,14 @@ impl fmt::Display for Statement { } match hive_distribution { - HiveDistributionStyle::PARTITIONED { columns } => write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?, - HiveDistributionStyle::CLUSTERED { columns, sorted_by, num_buckets } => { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))? + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; if !sorted_by.is_empty() { write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; @@ -806,26 +862,50 @@ impl fmt::Display for Statement { write!(f, " INTO {} BUCKETS", num_buckets)?; } } - HiveDistributionStyle::SKEWED { columns, on, stored_as_directories } => { - write!(f, " SKEWED BY ({})) ON ({})", display_comma_separated(&columns), display_comma_separated(&on))?; + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(&columns), + display_comma_separated(&on) + )?; if *stored_as_directories { write!(f, " STORED AS DIRECTORIES")?; } - }, - _ => () + } + _ => (), } - if let Some(HiveFormat { row_format, storage, location }) = hive_formats { - + if let Some(HiveFormat { + row_format, + storage, + location, + }) = hive_formats + { match row_format { - Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{}'", class)?, + Some(HiveRowFormat::SERDE { class }) => { + write!(f, " ROW FORMAT SERDE '{}'", class)? + } Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, - None => () + None => (), } match storage { - Some(HiveIOFormat::IOF { input_format, output_format }) => write!(f, " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format)?, - Some(HiveIOFormat::FileFormat { format }) => write!(f, " STORED AS {}", format)?, - None => () + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", + input_format, output_format + )?, + Some(HiveIOFormat::FileFormat { format }) => { + write!(f, " STORED AS {}", format)? + } + None => (), } if let Some(loc) = location { write!(f, " LOCATION '{}'", loc)?; @@ -913,7 +993,13 @@ impl fmt::Display for Statement { if *local { f.write_str("LOCAL ")?; } - write!(f, "{hivevar}{name} = {value}", hivevar = if *hivevar { "HIVEVAR:" } else { "" }, name = variable, value = display_comma_separated(value)) + write!( + f, + "{hivevar}{name} = {value}", + hivevar = if *hivevar { "HIVEVAR:" } else { "" }, + name = variable, + value = display_comma_separated(value) + ) } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { @@ -1134,40 +1220,38 @@ impl fmt::Display for ObjectType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveDistributionStyle { PARTITIONED { - columns: Vec + columns: Vec, }, CLUSTERED { columns: Vec, sorted_by: Vec, - num_buckets: i32 + num_buckets: i32, }, SKEWED { columns: Vec, on: Vec, - stored_as_directories: bool + stored_as_directories: bool, }, - NONE + NONE, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveRowFormat { - SERDE { - class: String - }, - DELIMITED + SERDE { class: String }, + DELIMITED, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveIOFormat { IOF { - input_format: Expr, - output_format: Expr, + input_format: Expr, + output_format: Expr, }, FileFormat { - format: FileFormat - } + format: FileFormat, + }, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -1175,7 +1259,7 @@ pub enum HiveIOFormat { pub struct HiveFormat { pub row_format: Option, pub storage: Option, - pub location: Option + pub location: Option, } impl Default for HiveFormat { @@ -1183,7 +1267,7 @@ impl Default for HiveFormat { HiveFormat { row_format: None, location: None, - storage: None + storage: None, } } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 98f43823b..5b52dfde8 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -12,20 +12,20 @@ mod ansi; mod generic; +mod hive; pub mod keywords; mod mssql; mod mysql; mod postgresql; -mod hive; use std::fmt::Debug; pub use self::ansi::AnsiDialect; pub use self::generic::GenericDialect; +pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; -pub use self::hive::HiveDialect; pub trait Dialect: Debug { /// Determine if a character starts a quoted identifier. The default diff --git a/src/parser.rs b/src/parser.rs index d0e724c3e..8c2392c3f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -58,7 +58,6 @@ pub enum IsLateral { use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; - impl From for ParserError { fn from(e: TokenizerError) -> Self { ParserError::TokenizerError(format!( @@ -169,14 +168,24 @@ impl Parser { let table_name = self.parse_object_name()?; let (mut add, mut drop, mut sync) = (false, false, false); match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => { add = true; } - Some(Keyword::DROP) => { drop = true; } - Some(Keyword::SYNC) => { sync = true; } - _ => () + Some(Keyword::ADD) => { + add = true; + } + Some(Keyword::DROP) => { + drop = true; + } + Some(Keyword::SYNC) => { + sync = true; + } + _ => (), } self.expect_keyword(Keyword::PARTITIONS)?; Ok(Statement::Msck { - repair, table_name, add_partitions: add, drop_partitions: drop, sync_partitions: sync + repair, + table_name, + add_partitions: add, + drop_partitions: drop, + sync_partitions: sync, }) } @@ -190,7 +199,8 @@ impl Parser { self.expect_token(&Token::RParen)?; } Ok(Statement::Truncate { - table_name, partitions + table_name, + partitions, }) } @@ -204,12 +214,18 @@ impl Parser { let mut compute_statistics = false; loop { - match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { Some(Keyword::PARTITION) => { self.expect_token(&Token::LParen)?; partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); self.expect_token(&Token::RParen)?; - }, + } Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; @@ -223,7 +239,7 @@ impl Parser { self.expect_keyword(Keyword::STATISTICS)?; compute_statistics = true } - _ => break + _ => break, } } @@ -233,7 +249,7 @@ impl Parser { partitions, cache_metadata, noscan, - compute_statistics + compute_statistics, }) } @@ -855,7 +871,14 @@ impl Parser { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq | Token::Spaceship => Ok(20), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret => Ok(22), Token::Ampersand => Ok(23), @@ -1127,11 +1150,18 @@ impl Parser { loop { match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), - Some(Keyword::MANAGEDLOCATION) => managed_location = Some(self.parse_literal_string()?), - _ => break + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, } } - Ok(Statement::CreateDatabase { db_name, ine, location, managed_location }) + Ok(Statement::CreateDatabase { + db_name, + ine, + location, + managed_location, + }) } pub fn parse_create_external_table( @@ -1252,9 +1282,7 @@ impl Parser { self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_column_def)?; self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { - columns - }) + Ok(HiveDistributionStyle::PARTITIONED { columns }) } else { Ok(HiveDistributionStyle::NONE) } @@ -1273,7 +1301,10 @@ impl Parser { let input_format = self.parse_expr()?; self.expect_keyword(Keyword::OUTPUTFORMAT)?; let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF {input_format, output_format}); + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); } else { let format = self.parse_file_format()?; hive_format.storage = Some(HiveIOFormat::FileFormat { format }); @@ -1281,9 +1312,9 @@ impl Parser { } Some(Keyword::LOCATION) => { hive_format.location = Some(self.parse_literal_string()?); - }, + } None => break, - _ => break + _ => break, } } @@ -1938,7 +1969,7 @@ impl Parser { limit: None, order_by: vec![], offset: None, - fetch: None + fetch: None, }) } } @@ -2077,7 +2108,8 @@ impl Parser { } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); if let Some(Keyword::HIVEVAR) = modifier { self.expect_token(&Token::Colon)?; } @@ -2100,14 +2132,14 @@ impl Parser { hivevar: Some(Keyword::HIVEVAR) == modifier, variable, value: values, - }) + }); } } else if variable.value == "TRANSACTION" && modifier.is_none() { return Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, - }) + }); } else { - return self.expected("equals sign or TO", self.peek_token()) + return self.expected("equals sign or TO", self.peek_token()); } } @@ -2343,7 +2375,11 @@ impl Parser { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = if action == Keyword::OVERWRITE { true } else { false }; + let overwrite = if action == Keyword::OVERWRITE { + true + } else { + false + }; // Hive lets you put table here regardless self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 834a79e35..3295726c1 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -117,7 +117,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), - Box::new(HiveDialect {}) + Box::new(HiveDialect {}), ], } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 55244c346..59d65a91f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -405,9 +405,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('=') => self.consume_and_return(chars, Token::DoubleEq), - _ => Ok(Some(Token::Eq)) + _ => Ok(Some(Token::Eq)), } - }, + } '.' => self.consume_and_return(chars, Token::Period), '!' => { chars.next(); // consume @@ -423,9 +423,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('>') => self.consume_and_return(chars, Token::Spaceship), - _ => Ok(Some(Token::LtEq)) + _ => Ok(Some(Token::LtEq)), } - }, + } Some('>') => self.consume_and_return(chars, Token::Neq), _ => Ok(Some(Token::Lt)), } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 51660940a..cdc4257d5 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,11 +15,9 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). - use sqlparser::dialect::{GenericDialect, HiveDialect}; use sqlparser::test_utils::*; - #[test] fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 57a618e22..2be5ec2c2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -347,7 +347,9 @@ fn parse_set() { local: false, hivevar: false, variable: "a".into(), - value: vec![SetVariableValue::Literal(Value::SingleQuotedString("b".into()))], + value: vec![SetVariableValue::Literal(Value::SingleQuotedString( + "b".into() + ))], } ); From 64b3953751fd55c7780efb72bec3acfbfd3dd8f3 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 16:28:57 -0400 Subject: [PATCH 11/97] fix some lints and tests, add purge on tables ad proper formatting for INSERT INTO TABLE --- src/ast/mod.rs | 19 ++++++++++++------- src/ast/query.rs | 1 + src/dialect/keywords.rs | 1 + src/parser.rs | 17 ++++++++--------- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_hive.rs | 6 ++++++ 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9f061bb7c..a3994bc5b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -461,6 +461,8 @@ pub enum Statement { source: Box, /// partitioned insert (Hive) partitioned: Option>, + /// whether the insert has the table keyword (Hive) + table: bool, }, Copy { /// TABLE @@ -547,6 +549,9 @@ pub enum Statement { /// Whether `CASCADE` was specified. This will be `false` when /// `RESTRICT` or no drop behavior at all was specified. cascade: bool, + /// Hive allows you specify whether the table's stored data will be + /// deleted along with the dropped table + purge: bool, }, /// SET /// @@ -680,16 +685,14 @@ impl fmt::Display for Statement { partitioned, columns, source, + table, } => { write!( f, - "INSERT {act} {table_name} ", + "INSERT {act}{tbl} {table_name} ", table_name = table_name, - act = if *overwrite { - "OVERWRITE TABLE" - } else { - "INTO" - } + act = if *overwrite { "OVERWRITE" } else { "INTO" }, + tbl = if *table { " TABLE" } else { "" } )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; @@ -975,13 +978,15 @@ impl fmt::Display for Statement { if_exists, names, cascade, + purge, } => write!( f, - "DROP {}{} {}{}", + "DROP {}{} {}{}{}", object_type, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), if *cascade { " CASCADE" } else { "" }, + if *purge { " PURGE" } else { "" } ), Statement::SetVariable { local, diff --git a/src/ast/query.rs b/src/ast/query.rs index 56ba994ec..5266e485c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -57,6 +57,7 @@ impl fmt::Display for Query { /// A node in a tree, representing a "query body" expression, roughly: /// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetExpr { diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index fb7647b2e..e3bb34f51 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -336,6 +336,7 @@ define_keywords!( PREPARE, PRIMARY, PROCEDURE, + PURGE, RANGE, RANK, RCFILE, diff --git a/src/parser.rs b/src/parser.rs index 8c2392c3f..501998eec 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1250,6 +1250,7 @@ impl Parser { let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); } @@ -1258,6 +1259,7 @@ impl Parser { if_exists, names, cascade, + purge, }) } @@ -2135,11 +2137,11 @@ impl Parser { }); } } else if variable.value == "TRANSACTION" && modifier.is_none() { - return Ok(Statement::SetTransaction { + Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, - }); + }) } else { - return self.expected("equals sign or TO", self.peek_token()); + self.expected("equals sign or TO", self.peek_token()) } } @@ -2375,13 +2377,9 @@ impl Parser { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = if action == Keyword::OVERWRITE { - true - } else { - false - }; + let overwrite = action == Keyword::OVERWRITE; // Hive lets you put table here regardless - self.parse_keyword(Keyword::TABLE); + let table = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; @@ -2400,6 +2398,7 @@ impl Parser { partitioned, columns, source, + table, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1d55a01f1..9f0191346 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2715,6 +2715,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(false, if_exists); assert_eq!(ObjectType::Table, object_type); @@ -2734,6 +2735,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(true, if_exists); assert_eq!(ObjectType::Table, object_type); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index cdc4257d5..5e083c745 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -69,6 +69,12 @@ fn parse_with_cte() { hive().verified_stmt(with); } +#[test] +fn drop_table_purge() { + let purge = "DROP TABLE db.table_name PURGE"; + hive().verified_stmt(purge); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 5b795391e93cc36454055d788f663d4dadbc33d1 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:18:34 -0400 Subject: [PATCH 12/97] Fix a lint, add CREATE TABLE ... LIKE ... --- src/ast/mod.rs | 10 ++++++++-- src/parser.rs | 5 +++++ tests/sqlparser_hive.rs | 12 ++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a3994bc5b..fd53366ed 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -515,6 +515,7 @@ pub enum Statement { location: Option, query: Option>, without_rowid: bool, + like: Option }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -817,6 +818,7 @@ impl fmt::Display for Statement { location, query, without_rowid, + like } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -839,7 +841,7 @@ impl fmt::Display for Statement { write!(f, ", ")?; } write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() { + } else if query.is_none() && like.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -848,9 +850,13 @@ impl fmt::Display for Statement { write!(f, " WITHOUT ROWID")?; } + // Only for Hive + if let Some(l) = like { + write!(f, " LIKE {}", l)?; + } match hive_distribution { HiveDistributionStyle::PARTITIONED { columns } => { - write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))? + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?; } HiveDistributionStyle::CLUSTERED { columns, diff --git a/src/parser.rs b/src/parser.rs index 501998eec..8408da9b7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1192,6 +1192,7 @@ impl Parser { location: Some(location), query: None, without_rowid: false, + like: None }) } @@ -1337,6 +1338,9 @@ impl Parser { pub fn parse_create_table(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; + let like = if self.parse_keyword(Keyword::LIKE) { + self.parse_object_name().ok() + } else { None }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; @@ -1369,6 +1373,7 @@ impl Parser { location: None, query, without_rowid, + like }) } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 5e083c745..2079c35f1 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -75,14 +75,14 @@ fn drop_table_purge() { hive().verified_stmt(purge); } -fn hive() -> TestedDialects { - TestedDialects { - dialects: vec![Box::new(HiveDialect {})], - } +#[test] +fn create_table_like() { + let like = "CREATE TABLE db.table_name LIKE db.other_table"; + hive().verified_stmt(like); } -fn hive_and_generic() -> TestedDialects { +fn hive() -> TestedDialects { TestedDialects { - dialects: vec![Box::new(HiveDialect {}), Box::new(GenericDialect {})], + dialects: vec![Box::new(HiveDialect {})], } } From c1d4cbe2ee3d71709721fed79216fab612ad17f7 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:20:15 -0400 Subject: [PATCH 13/97] cargo fmt --- src/ast/mod.rs | 4 ++-- src/parser.rs | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fd53366ed..994a60bd2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -515,7 +515,7 @@ pub enum Statement { location: Option, query: Option>, without_rowid: bool, - like: Option + like: Option, }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -818,7 +818,7 @@ impl fmt::Display for Statement { location, query, without_rowid, - like + like, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: diff --git a/src/parser.rs b/src/parser.rs index 8408da9b7..30106a3fd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1192,7 +1192,7 @@ impl Parser { location: Some(location), query: None, without_rowid: false, - like: None + like: None, }) } @@ -1340,7 +1340,9 @@ impl Parser { let table_name = self.parse_object_name()?; let like = if self.parse_keyword(Keyword::LIKE) { self.parse_object_name().ok() - } else { None }; + } else { + None + }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; @@ -1373,7 +1375,7 @@ impl Parser { location: None, query, without_rowid, - like + like, }) } From 18050aa94729a1458006cd0ee870f133c84c0bb4 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:20:56 -0400 Subject: [PATCH 14/97] Fixed a lint --- tests/sqlparser_hive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 2079c35f1..a8617eb6d 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,7 +15,7 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). -use sqlparser::dialect::{GenericDialect, HiveDialect}; +use sqlparser::dialect::HiveDialect; use sqlparser::test_utils::*; #[test] From 77092e6c19e63352cda37bcfb7daf90bf7cbd697 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sat, 25 Jul 2020 14:55:42 -0400 Subject: [PATCH 15/97] Address some PR changes --- src/ast/data_type.rs | 2 +- src/ast/mod.rs | 55 ++++++++++++++++++++++---------------------- src/parser.rs | 26 ++++++++------------- 3 files changed, 37 insertions(+), 46 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index cc8bd3260..388703e76 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,7 +61,7 @@ pub enum DataType { Regclass, /// Text Text, - /// String (Hive) + /// String String, /// Bytea Bytea, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 994a60bd2..9c78b3fbd 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -420,6 +420,24 @@ impl fmt::Display for WindowFrameBound { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum PartitionAction { + ADD, + DROP, + SYNC, +} + +impl fmt::Display for PartitionAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PartitionAction::SYNC => f.write_str("SYNC PARTITIONS"), + PartitionAction::DROP => f.write_str("DROP PARTITIONS"), + PartitionAction::ADD => f.write_str("ADD PARTITIONS"), + } + } +} + /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -443,9 +461,7 @@ pub enum Statement { Msck { table_name: ObjectName, repair: bool, - add_partitions: bool, - drop_partitions: bool, - sync_partitions: bool, + partition_action: Option, }, /// SELECT Query(Box), @@ -591,7 +607,7 @@ pub enum Statement { /// CREATE DATABASE CreateDatabase { db_name: ObjectName, - ine: bool, + if_not_exists: bool, location: Option, managed_location: Option, }, @@ -612,9 +628,7 @@ impl fmt::Display for Statement { Statement::Msck { table_name, repair, - add_partitions, - drop_partitions, - sync_partitions, + partition_action, } => { write!( f, @@ -622,25 +636,10 @@ impl fmt::Display for Statement { repair = if *repair { "REPAIR " } else { "" }, table = table_name )?; - write!( - f, - "{add}{drop}{sync}", - add = if *add_partitions { - " ADD PARTITIONS" - } else { - "" - }, - drop = if *drop_partitions { - " DROP PARTITIONS" - } else { - "" - }, - sync = if *sync_partitions { - " SYNC PARTITIONS" - } else { - "" - } - ) + if let Some(pa) = partition_action { + write!(f, " {}", pa)?; + } + Ok(()) } Statement::Truncate { table_name, @@ -757,12 +756,12 @@ impl fmt::Display for Statement { } Statement::CreateDatabase { db_name, - ine, + if_not_exists, location, managed_location, } => { write!(f, "CREATE")?; - if *ine { + if *if_not_exists { write!(f, " IF NOT EXISTS")?; } write!(f, " {}", db_name)?; diff --git a/src/parser.rs b/src/parser.rs index 30106a3fd..d74b3b943 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -166,26 +166,18 @@ impl Parser { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; - let (mut add, mut drop, mut sync) = (false, false, false); - match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => { - add = true; - } - Some(Keyword::DROP) => { - drop = true; - } - Some(Keyword::SYNC) => { - sync = true; - } - _ => (), - } + let partition_action = + match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { + Some(Keyword::ADD) => Some(PartitionAction::ADD), + Some(Keyword::DROP) => Some(PartitionAction::DROP), + Some(Keyword::SYNC) => Some(PartitionAction::SYNC), + _ => None, + }; self.expect_keyword(Keyword::PARTITIONS)?; Ok(Statement::Msck { repair, table_name, - add_partitions: add, - drop_partitions: drop, - sync_partitions: sync, + partition_action, }) } @@ -1158,7 +1150,7 @@ impl Parser { } Ok(Statement::CreateDatabase { db_name, - ine, + if_not_exists: ine, location, managed_location, }) From f4449593dbe35f72dbf696261cbb78937d0c0bfd Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 17:48:42 -0400 Subject: [PATCH 16/97] Disabled hive dialect from SQL common for now, added to the dialect to allow alises starting with numbers --- src/ast/mod.rs | 12 ++++++------ src/dialect/hive.rs | 2 +- src/parser.rs | 6 +++--- src/test_utils.rs | 2 +- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9c78b3fbd..b4a6fc054 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -422,18 +422,18 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub enum PartitionAction { +pub enum AddDropSync { ADD, DROP, SYNC, } -impl fmt::Display for PartitionAction { +impl fmt::Display for AddDropSync { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - PartitionAction::SYNC => f.write_str("SYNC PARTITIONS"), - PartitionAction::DROP => f.write_str("DROP PARTITIONS"), - PartitionAction::ADD => f.write_str("ADD PARTITIONS"), + AddDropSync::SYNC => f.write_str("SYNC PARTITIONS"), + AddDropSync::DROP => f.write_str("DROP PARTITIONS"), + AddDropSync::ADD => f.write_str("ADD PARTITIONS"), } } } @@ -461,7 +461,7 @@ pub enum Statement { Msck { table_name: ObjectName, repair: bool, - partition_action: Option, + partition_action: Option, }, /// SELECT Query(Box), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 71a5eee26..0513b1175 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index d74b3b943..34284dd0c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -168,9 +168,9 @@ impl Parser { let table_name = self.parse_object_name()?; let partition_action = match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => Some(PartitionAction::ADD), - Some(Keyword::DROP) => Some(PartitionAction::DROP), - Some(Keyword::SYNC) => Some(PartitionAction::SYNC), + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), _ => None, }; self.expect_keyword(Keyword::PARTITIONS)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 3295726c1..c3490ee60 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -117,7 +117,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), - Box::new(HiveDialect {}), + // Box::new(HiveDialect {}), // TODO: Re-enable when we can parse aliases that begin wtih numbers ], } } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a8617eb6d..b4ef30b96 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -81,6 +81,12 @@ fn create_table_like() { hive().verified_stmt(like); } +#[test] +fn test_identifier() { + let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; + hive().verified_stmt(between); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From b53d78e12e28c3ce0d1f29fb533209b54b637793 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 18:01:24 -0400 Subject: [PATCH 17/97] Cargo fmt --- src/dialect/hive.rs | 5 ++++- src/parser.rs | 9 ++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 0513b1175..c09570c59 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '$' + || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index 34284dd0c..0a9e5ea34 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1133,7 +1133,6 @@ impl Parser { Ok(Statement::CreateSchema { schema_name }) } - pub fn parse_create_database(&mut self) -> Result { let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let db_name = self.parse_object_name()?; @@ -1156,10 +1155,10 @@ impl Parser { }) } - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; From d076d4142d7b34e1e35b84e933526073b25fb26b Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 23:46:15 -0400 Subject: [PATCH 18/97] Support for ALTER TABLE with PARTITION --- src/ast/ddl.rs | 48 ++++++++++++++++++++++++++++++--- src/parser.rs | 59 ++++++++++++++++++++++++++++++++--------- tests/sqlparser_hive.rs | 18 +++++++++++++ 3 files changed, 110 insertions(+), 15 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f46364251..8c1a230d2 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -24,31 +24,64 @@ pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), /// `ADD [ COLUMN ] ` - AddColumn { column_def: ColumnDef }, + AddColumn { + column_def: ColumnDef, + }, /// TODO: implement `DROP CONSTRAINT ` - DropConstraint { name: Ident }, + DropConstraint { + name: Ident, + }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ CASCADE ]` DropColumn { column_name: Ident, if_exists: bool, cascade: bool, }, + /// `RENAME TO PARTITION (partition=val)` + RenamePartitions { + old_partitions: Vec, + new_partitions: Vec, + }, + /// Add Partitions + AddPartitions { + if_not_exists: bool, + new_partitions: Vec, + }, + DropPartitions { + partitions: Vec, + }, /// `RENAME [ COLUMN ] TO ` RenameColumn { old_column_name: Ident, new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { + table_name: Ident, + }, } impl fmt::Display for AlterTableOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, + } => write!( + f, + "ADD{ine} PARTITION ({})", + display_comma_separated(new_partitions), + ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } + ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {}", c), AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } + AlterTableOperation::DropPartitions { partitions } => write!( + f, + "DROP PARTITION ({})", + display_comma_separated(partitions) + ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { column_name, @@ -61,6 +94,15 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::RenamePartitions { + old_partitions, + new_partitions, + } => write!( + f, + "PARTITION ({}) RENAME TO PARTITION ({})", + display_comma_separated(old_partitions), + display_comma_separated(new_partitions) + ), AlterTableOperation::RenameColumn { old_column_name, new_column_name, diff --git a/src/parser.rs b/src/parser.rs index 0a9e5ea34..fd0f01ff8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1570,9 +1570,21 @@ impl Parser { if let Some(constraint) = self.parse_optional_table_constraint()? { AlterTableOperation::AddConstraint(constraint) } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { column_def } + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { column_def } + } } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { @@ -1589,17 +1601,40 @@ impl Parser { } } } else if self.parse_keyword(Keyword::DROP) { - let _ = self.parse_keyword(Keyword::COLUMN); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { partitions } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } } else { - return self.expected("ADD, RENAME, or DROP after ALTER TABLE", self.peek_token()); + return self.expected( + "ADD, RENAME, PARTITION or DROP after ALTER TABLE", + self.peek_token(), + ); }; Ok(Statement::AlterTable { name: table_name, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index b4ef30b96..23b50d3e0 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -87,6 +87,24 @@ fn test_identifier() { hive().verified_stmt(between); } +#[test] +fn test_alter_partition() { + let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)"; + hive().verified_stmt(alter); +} + +#[test] +fn test_add_partition() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)"; + hive().verified_stmt(add); +} + +#[test] +fn test_drop_partition() { + let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; + hive().verified_stmt(drop); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From b6c545d99070ad256e17ac63b22cb03d4211f214 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 00:06:45 -0400 Subject: [PATCH 19/97] Support for ALTER TABLE DROP IF EXISTS --- src/ast/ddl.rs | 23 +++++++++++------------ src/parser.rs | 15 +++++++++++++-- tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 8c1a230d2..d4e7a9be0 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -24,13 +24,9 @@ pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), /// `ADD [ COLUMN ] ` - AddColumn { - column_def: ColumnDef, - }, + AddColumn { column_def: ColumnDef }, /// TODO: implement `DROP CONSTRAINT ` - DropConstraint { - name: Ident, - }, + DropConstraint { name: Ident }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ CASCADE ]` DropColumn { column_name: Ident, @@ -49,6 +45,7 @@ pub enum AlterTableOperation { }, DropPartitions { partitions: Vec, + if_exists: bool, }, /// `RENAME [ COLUMN ] TO ` RenameColumn { @@ -56,9 +53,7 @@ pub enum AlterTableOperation { new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { - table_name: Ident, - }, + RenameTable { table_name: Ident }, } impl fmt::Display for AlterTableOperation { @@ -77,10 +72,14 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } - AlterTableOperation::DropPartitions { partitions } => write!( + AlterTableOperation::DropPartitions { + partitions, + if_exists, + } => write!( f, - "DROP PARTITION ({})", - display_comma_separated(partitions) + "DROP{ie} PARTITION ({})", + display_comma_separated(partitions), + ie = if *if_exists { " IF EXISTS" } else { "" } ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { diff --git a/src/parser.rs b/src/parser.rs index fd0f01ff8..1fcbc649f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1601,11 +1601,22 @@ impl Parser { } } } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { partitions } + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } } else { let _ = self.parse_keyword(Keyword::COLUMN); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 23b50d3e0..e8790d5bf 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -105,6 +105,12 @@ fn test_drop_partition() { hive().verified_stmt(drop); } +#[test] +fn test_drop_if_exists() { + let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')"; + hive().verified_stmt(drop); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 78b53dc61dc52a8c729171dbce229f9deaaf6f27 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 10:02:06 -0400 Subject: [PATCH 20/97] Support CLUSTER BY and JOIN with no join condition --- src/ast/query.rs | 10 ++++++++++ src/dialect/keywords.rs | 3 +++ src/parser.rs | 10 +++++++++- tests/sqlparser_hive.rs | 12 ++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 5266e485c..1466f2b64 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -133,6 +133,8 @@ pub struct Select { pub selection: Option, /// GROUP BY pub group_by: Vec, + /// CLUSTER BY (Hive) + pub cluster_by: Vec, /// HAVING pub having: Option, } @@ -153,6 +155,13 @@ impl fmt::Display for Select { if !self.group_by.is_empty() { write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; } + if !self.cluster_by.is_empty() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(&self.cluster_by) + )?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } @@ -389,6 +398,7 @@ pub enum JoinConstraint { On(Expr), Using(Vec), Natural, + None, } /// An `ORDER BY` expression diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e3bb34f51..4ec5be073 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -116,6 +116,7 @@ define_keywords!( CHECK, CLOB, CLOSE, + CLUSTER, COALESCE, COLLATE, COLLECT, @@ -504,6 +505,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::RIGHT, Keyword::NATURAL, Keyword::USING, + Keyword::CLUSTER, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -525,6 +527,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::UNION, Keyword::EXCEPT, Keyword::INTERSECT, + Keyword::CLUSTER, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/parser.rs b/src/parser.rs index 1fcbc649f..af0a297cb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2136,6 +2136,12 @@ impl Parser { vec![] }; + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2149,6 +2155,7 @@ impl Parser { from, selection, group_by, + cluster_by, having, }) } @@ -2414,7 +2421,8 @@ impl Parser { let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(JoinConstraint::Using(columns)) } else { - self.expected("ON, or USING after JOIN", self.peek_token()) + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) } } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e8790d5bf..22375d5ff 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -111,6 +111,18 @@ fn test_drop_if_exists() { hive().verified_stmt(drop); } +#[test] +fn test_cluster_by() { + let cluster = "SELECT a FROM db.table CLUSTER BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn no_join_condition() { + let join = "SELECT a, b FROM db.table_name JOIN a"; + hive().verified_stmt(join); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From c56cfccf9d705014395c196102b3744f444a8207 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 11:13:29 -0400 Subject: [PATCH 21/97] Support for columns defined after partition --- src/ast/mod.rs | 6 ++++++ src/parser.rs | 5 +++++ tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b4a6fc054..ad1b46eec 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,8 @@ pub enum Statement { source: Box, /// partitioned insert (Hive) partitioned: Option>, + /// Columns defined after PARTITION + after_columns: Vec, /// whether the insert has the table keyword (Hive) table: bool, }, @@ -684,6 +686,7 @@ impl fmt::Display for Statement { overwrite, partitioned, columns, + after_columns, source, table, } => { @@ -702,6 +705,9 @@ impl fmt::Display for Statement { write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; } } + if !after_columns.is_empty() { + write!(f, "({}) ", display_comma_separated(after_columns))?; + } write!(f, "{}", source) } Statement::Copy { diff --git a/src/parser.rs b/src/parser.rs index af0a297cb..8d775e613 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2443,12 +2443,17 @@ impl Parser { } else { None }; + + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + let source = Box::new(self.parse_query()?); Ok(Statement::Insert { table_name, overwrite, partitioned, columns, + after_columns, source, table, }) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 22375d5ff..a686e669f 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -123,6 +123,12 @@ fn no_join_condition() { hive().verified_stmt(join); } +#[test] +fn columns_after_partition() { + let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table"; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 3db166d94c8d8fcc91ca8a47519499c484e7d6e3 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 11:36:05 -0400 Subject: [PATCH 22/97] Support ANALYZE FOR COLUMNS --- src/ast/mod.rs | 9 +++++++-- src/parser.rs | 7 ++++++- tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ad1b46eec..e99343342 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -448,6 +448,7 @@ pub enum Statement { table_name: ObjectName, partitions: Option>, for_columns: bool, + columns: Vec, cache_metadata: bool, noscan: bool, compute_statistics: bool, @@ -658,7 +659,8 @@ impl fmt::Display for Statement { Statement::Analyze { table_name, partitions, - for_columns: _, + for_columns, + columns, cache_metadata, noscan, compute_statistics, @@ -669,7 +671,7 @@ impl fmt::Display for Statement { write!(f, " PARTITION ({})", display_comma_separated(parts))?; } } - //TODO: Add for columns + if *compute_statistics { write!(f, " COMPUTE STATISTICS")?; } @@ -679,6 +681,9 @@ impl fmt::Display for Statement { if *cache_metadata { write!(f, " CACHE METADATA")?; } + if *for_columns { + write!(f, " FOR COLUMNS {}", display_comma_separated(columns))?; + } Ok(()) } Statement::Insert { diff --git a/src/parser.rs b/src/parser.rs index 8d775e613..f74e50cdd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -204,7 +204,7 @@ impl Parser { let mut noscan = false; let mut partitions = None; let mut compute_statistics = false; - + let mut columns = vec![]; loop { match self.parse_one_of_keywords(&[ Keyword::PARTITION, @@ -221,6 +221,10 @@ impl Parser { Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; + columns = self.parse_comma_separated(Parser::parse_identifier)?; + if columns.is_empty() { + self.expected("columns identifiers", self.peek_token())?; + } for_columns = true } Some(Keyword::CACHE) => { @@ -238,6 +242,7 @@ impl Parser { Ok(Statement::Analyze { table_name, for_columns, + columns, partitions, cache_metadata, noscan, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a686e669f..e33a60f47 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -45,6 +45,12 @@ fn parse_analyze() { hive().verified_stmt(analyze); } +#[test] +fn parse_analyze_for_columns() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS a, b, c"#; + hive().verified_stmt(analyze); +} + #[test] fn parse_msck() { let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; From a9e2e301bff6dced2f43560a7bdffffb3467a469 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 12:24:44 -0400 Subject: [PATCH 23/97] Support numeric literals ending with 'L' --- src/ast/value.rs | 6 +++--- src/dialect/hive.rs | 1 - src/parser.rs | 10 +++++----- src/test_utils.rs | 2 +- src/tokenizer.rs | 32 +++++++++++++++++++------------- tests/sqlparser_common.rs | 2 +- tests/sqlparser_hive.rs | 9 ++++++++- 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 9e82c175d..901fa5158 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -22,9 +22,9 @@ use std::fmt; pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] - Number(String), + Number(String, bool), #[cfg(feature = "bigdecimal")] - Number(BigDecimal), + Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), /// N'string value' @@ -59,7 +59,7 @@ pub enum Value { impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Value::Number(v) => write!(f, "{}", v), + Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index c09570c59..aaec9888f 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -12,7 +12,6 @@ impl Dialect for HiveDialect { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' - || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index f74e50cdd..f5be87002 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -377,7 +377,7 @@ impl Parser { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::Number(_) + Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { @@ -1722,8 +1722,8 @@ impl Parser { // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n) => match n.parse() { - Ok(n) => Ok(Value::Number(n)), + Token::Number(ref n, l) => match n.parse() { + Ok(n) => Ok(Value::Number(n, l)), Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), @@ -1735,7 +1735,7 @@ impl Parser { pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { - v @ Value::Number(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), _ => { self.prev_token(); self.expected("literal number", self.peek_token()) @@ -1746,7 +1746,7 @@ impl Parser { /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { match self.next_token() { - Token::Number(s) => s.parse::().map_err(|e| { + Token::Number(s, _) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e)) }), unexpected => self.expected("literal int", unexpected), diff --git a/src/test_utils.rs b/src/test_utils.rs index c3490ee60..b45938c9a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -139,5 +139,5 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } pub fn number(n: &'static str) -> Value { - Value::Number(n.parse().unwrap()) + Value::Number(n.parse().unwrap(), false) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 59d65a91f..002b7ffb6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -31,7 +31,7 @@ pub enum Token { /// A keyword (like SELECT) or an optionally quoted SQL identifier Word(Word), /// An unsigned numeric literal - Number(String), + Number(String, bool), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' @@ -107,7 +107,7 @@ impl fmt::Display for Token { match self { Token::EOF => f.write_str("EOF"), Token::Word(ref w) => write!(f, "{}", w), - Token::Number(ref n) => f.write_str(n), + Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), @@ -266,7 +266,7 @@ impl<'a> Tokenizer<'a> { Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64, Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, - Token::Number(s) => self.col += s.len() as u64, + Token::Number(s, _) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -356,7 +356,13 @@ impl<'a> Tokenizer<'a> { '0'..='9' | '.' => true, _ => false, }); - Ok(Some(Token::Number(s))) + let long = if chars.peek() == Some(&'L') { + chars.next(); + true + } else { + false + }; + Ok(Some(Token::Number(s, long))) } // punctuation '(' => self.consume_and_return(chars, Token::LParen), @@ -575,7 +581,7 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -593,7 +599,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::RParen, ]; @@ -665,11 +671,11 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), ]; compare(expected, tokens); @@ -805,11 +811,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment( "this is a comment\n".to_string(), )), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -835,11 +841,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( "multi-line\n* /comment".to_string(), )), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -906,7 +912,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), Token::Whitespace(Whitespace::Space), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f0191346..3a6910de6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -455,7 +455,7 @@ fn parse_number() { ); #[cfg(not(feature = "bigdecimal"))] - assert_eq!(expr, Expr::Value(Value::Number("1.0".into()))); + assert_eq!(expr, Expr::Value(Value::Number("1.0".into(), false))); } #[test] diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e33a60f47..0418999dc 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -87,7 +87,8 @@ fn create_table_like() { hive().verified_stmt(like); } -#[test] +// Turning off this test until we can parse identifiers starting with numbers :( +#[ignore] fn test_identifier() { let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; hive().verified_stmt(between); @@ -135,6 +136,12 @@ fn columns_after_partition() { hive().verified_stmt(query); } +#[test] +fn long_numerics() { + let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From eccb86d660bf1a428ef0e29eb990d047c0e7de13 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 15:57:35 -0400 Subject: [PATCH 24/97] Support CREATE [TEMP|TEMPORARY] TABLE --- src/ast/mod.rs | 5 ++++- src/dialect/hive.rs | 4 +--- src/dialect/keywords.rs | 2 ++ src/parser.rs | 13 +++++++++++-- tests/sqlparser_hive.rs | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e99343342..b10b34716 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -520,6 +520,7 @@ pub enum Statement { /// CREATE TABLE CreateTable { or_replace: bool, + temporary: bool, external: bool, if_not_exists: bool, /// Table name @@ -824,6 +825,7 @@ impl fmt::Display for Statement { hive_distribution, hive_formats, external, + temporary, file_format, location, query, @@ -839,10 +841,11 @@ impl fmt::Display for Statement { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{temporary}TABLE {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, external = if *external { "EXTERNAL " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if *temporary { "TEMPORARY " } else { "" }, name = name, )?; if !columns.is_empty() || !constraints.is_empty() { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index aaec9888f..71a5eee26 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,9 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 4ec5be073..f1b4a7c47 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -420,6 +420,8 @@ define_keywords!( SYSTEM_USER, TABLE, TABLESAMPLE, + TEMP, + TEMPORARY, TEXT, TEXTFILE, THEN, diff --git a/src/parser.rs b/src/parser.rs index f5be87002..cecd27b2f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1088,8 +1088,11 @@ impl Parser { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace) + self.parse_create_table(or_replace, temporary) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); self.parse_create_view(or_replace) @@ -1184,6 +1187,7 @@ impl Parser { or_replace, if_not_exists: false, external: true, + temporary: false, file_format: Some(file_format), location: Some(location), query: None, @@ -1331,7 +1335,11 @@ impl Parser { } } - pub fn parse_create_table(&mut self, or_replace: bool) -> Result { + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let like = if self.parse_keyword(Keyword::LIKE) { @@ -1359,6 +1367,7 @@ impl Parser { Ok(Statement::CreateTable { name: table_name, + temporary, columns, constraints, with_options, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 0418999dc..0e168f084 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -142,6 +142,22 @@ fn long_numerics() { hive().verified_stmt(query); } +#[test] +fn decimal_precision() { + let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table"; + let expected = "SELECT CAST(a AS NUMERIC(18,2)) FROM db.table"; + hive().one_statement_parses_to(query, expected); +} + +#[test] +fn create_temp_table() { + let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)"; + let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)"; + + hive().verified_stmt(query); + hive().one_statement_parses_to(query2, query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From d2c9344dfbb068c34378506d31283a95a276db25 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 22:59:13 -0400 Subject: [PATCH 25/97] Support DIRECTORY INSERTS --- src/ast/mod.rs | 27 +++++++++++++++++ src/dialect/keywords.rs | 1 + src/parser.rs | 65 ++++++++++++++++++++++++++--------------- tests/sqlparser_hive.rs | 6 ++++ 4 files changed, 76 insertions(+), 23 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b10b34716..067a251e5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -483,6 +483,14 @@ pub enum Statement { /// whether the insert has the table keyword (Hive) table: bool, }, + // TODO: Support ROW FORMAT + Directory { + overwrite: bool, + local: bool, + path: String, + file_format: Option, + source: Box + }, Copy { /// TABLE table_name: ObjectName, @@ -629,6 +637,25 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), + Statement::Directory { + overwrite, + local, + path, + file_format, + source + } => { + write!( + f, + "INSERT{overwrite}{local} DIRECTORY '{path}'", + overwrite = if *overwrite { " OVERWRITE" } else { "" }, + local = if *local { " LOCAL" } else { "" }, + path = path + )?; + if let Some(ref ff) = file_format { + write!(f, " STORED AS {}", ff)? + } + write!(f, " {}", source) + } Statement::Msck { table_name, repair, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index f1b4a7c47..d5464c1e0 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -170,6 +170,7 @@ define_keywords!( DESC, DESCRIBE, DETERMINISTIC, + DIRECTORY, DISCONNECT, DISTINCT, DOUBLE, diff --git a/src/parser.rs b/src/parser.rs index cecd27b2f..993b09440 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2444,33 +2444,52 @@ impl Parser { pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; let overwrite = action == Keyword::OVERWRITE; - // Hive lets you put table here regardless - let table = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; + let local = self.parse_keyword(Keyword::LOCAL); - let partitioned = if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let r = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - r + if self.parse_keyword(Keyword::DIRECTORY) { + let path = match self.next_token() { + Token::SingleQuotedString(w) => w, + _ => self.expected("A file path", self.peek_token())?, + }; + let _ = self.expect_keywords(&[Keyword::STORED, Keyword::AS]); + let file_format = Some(self.parse_file_format()?); + let source = Box::new(self.parse_query()?); + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source + }) } else { - None - }; + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional)?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = self.parse_parenthesized_column_list(Optional)?; + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + r + } else { + None + }; - let source = Box::new(self.parse_query()?); - Ok(Statement::Insert { - table_name, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - }) + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + + let source = Box::new(self.parse_query()?); + Ok(Statement::Insert { + table_name, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + }) + } } pub fn parse_update(&mut self) -> Result { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 0e168f084..9ac20b1c4 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -158,6 +158,12 @@ fn create_temp_table() { hive().one_statement_parses_to(query2, query); } +#[test] +fn create_local_directory() { + let query = "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 946d1088702e9db72213b658cb63b7e8d840c22a Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 13:08:44 -0400 Subject: [PATCH 26/97] Support LATERAL VIEW and DISTRIBUTE BY --- src/ast/mod.rs | 4 ++-- src/ast/query.rs | 24 ++++++++++++++++++++++++ src/dialect/keywords.rs | 7 +++++++ src/parser.rs | 38 +++++++++++++++++++++++++++++++++++++- tests/sqlparser_hive.rs | 15 ++++++++++++++- 5 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 067a251e5..92f3f1281 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -489,7 +489,7 @@ pub enum Statement { local: bool, path: String, file_format: Option, - source: Box + source: Box, }, Copy { /// TABLE @@ -642,7 +642,7 @@ impl fmt::Display for Statement { local, path, file_format, - source + source, } => { write!( f, diff --git a/src/ast/query.rs b/src/ast/query.rs index 1466f2b64..545792f9c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -129,12 +129,20 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, + /// LATERAL VIEW + pub lateral_view: Option, + /// LATERAL VIEW optional name + pub lateral_view_name: Option, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Option, /// WHERE pub selection: Option, /// GROUP BY pub group_by: Vec, /// CLUSTER BY (Hive) pub cluster_by: Vec, + /// DISTRIBUTE BY (Hive) + pub distribute_by: Vec, /// HAVING pub having: Option, } @@ -149,6 +157,15 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } + if let Some(ref lv) = self.lateral_view { + write!(f, " LATERAL VIEW {}", lv)?; + if let Some(ref a) = self.lateral_view_name { + write!(f, " {}", a)?; + } + if let Some(ref c) = self.lateral_col_alias { + write!(f, " AS {}", c)?; + } + } if let Some(ref selection) = self.selection { write!(f, " WHERE {}", selection)?; } @@ -162,6 +179,13 @@ impl fmt::Display for Select { display_comma_separated(&self.cluster_by) )?; } + if !self.distribute_by.is_empty() { + write!( + f, + " DISTRIBUTE BY {}", + display_comma_separated(&self.distribute_by) + )?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d5464c1e0..3c9204869 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -173,6 +173,7 @@ define_keywords!( DIRECTORY, DISCONNECT, DISTINCT, + DISTRIBUTE, DOUBLE, DROP, DYNAMIC, @@ -492,6 +493,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -509,6 +512,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::NATURAL, Keyword::USING, Keyword::CLUSTER, + Keyword::DISTRIBUTE, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -524,6 +528,8 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -531,6 +537,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::EXCEPT, Keyword::INTERSECT, Keyword::CLUSTER, + Keyword::DISTRIBUTE, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/parser.rs b/src/parser.rs index 993b09440..e572a1a92 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2138,6 +2138,32 @@ impl Parser { vec![] }; + let lateral_view = if self + .expect_keywords(&[Keyword::LATERAL, Keyword::VIEW]) + .is_ok() + { + Some(self.parse_expr()?) + } else { + None + }; + + let lateral_view_name = if lateral_view.is_some() { + Some(self.parse_object_name()?) + } else { + None + }; + + let lateral_col_alias = if lateral_view_name.is_some() { + self.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ])? + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { @@ -2156,6 +2182,12 @@ impl Parser { vec![] }; + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2168,8 +2200,12 @@ impl Parser { projection, from, selection, + lateral_view, + lateral_view_name, + lateral_col_alias, group_by, cluster_by, + distribute_by, having, }) } @@ -2459,7 +2495,7 @@ impl Parser { path, overwrite, file_format, - source + source, }) } else { // Hive lets you put table here regardless diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 9ac20b1c4..5be0bb79c 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -124,6 +124,12 @@ fn test_cluster_by() { hive().verified_stmt(cluster); } +#[test] +fn test_distribute_by() { + let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b"; + hive().verified_stmt(cluster); +} + #[test] fn no_join_condition() { let join = "SELECT a, b FROM db.table_name JOIN a"; @@ -160,10 +166,17 @@ fn create_temp_table() { #[test] fn create_local_directory() { - let query = "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + let query = + "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; hive().verified_stmt(query); } +#[test] +fn lateral_view() { + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t WHERE a = 1"; + hive().verified_stmt(view); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 5fa4be03be0aa9a2863fff03b872dc53f6f17fb5 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 13:24:19 -0400 Subject: [PATCH 27/97] Add Dan's suggestion to the tokenizer and re-enable tests --- src/dialect/hive.rs | 2 +- src/tokenizer.rs | 13 +++++++++++++ tests/sqlparser_hive.rs | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 71a5eee26..0513b1175 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 002b7ffb6..c3d9a1b31 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -328,6 +328,19 @@ impl<'a> Tokenizer<'a> { ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume the first char let s = self.tokenize_word(ch, chars); + + if s.chars().all(|x| x >= '0' && x <= '9' || x == '.') { + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); + let s2 = peeking_take_while(chars, |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); + s += s2.as_str(); + return Ok(Some(Token::Number(s, false))); + } Ok(Some(Token::make_word(&s, None))) } // string diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 5be0bb79c..9dad62514 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -88,7 +88,7 @@ fn create_table_like() { } // Turning off this test until we can parse identifiers starting with numbers :( -#[ignore] +#[test] fn test_identifier() { let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; hive().verified_stmt(between); From 4b6653b9a7ffb50a32372b0596d308f9011decd4 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:20:05 -0400 Subject: [PATCH 28/97] Speculative array index for function call --- src/ast/mod.rs | 4 ++++ src/dialect/hive.rs | 5 ++++- src/parser.rs | 9 +++++++++ tests/sqlparser_common.rs | 8 +++++++- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 92f3f1281..5c6ef3fa3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1128,6 +1128,7 @@ pub struct Function { pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, + pub array_element: Option, } impl fmt::Display for Function { @@ -1142,6 +1143,9 @@ impl fmt::Display for Function { if let Some(o) = &self.over { write!(f, " OVER ({})", o)?; } + if let Some(ae) = &self.array_element { + write!(f, "[{}]", ae)?; + } Ok(()) } } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 0513b1175..c09570c59 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '$' + || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index e572a1a92..dbab3a1b1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -443,11 +443,20 @@ impl Parser { None }; + let array_element = if self.consume_token(&Token::LBracket) { + let num = Some(self.parse_number_value()?); + self.expect_token(&Token::RBracket)?; + num + } else { + None + }; + Ok(Expr::Function(Function { name, args, over, distinct, + array_element, })) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3a6910de6..810a3d698 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -328,6 +328,7 @@ fn parse_select_count_wildcard() { args: vec![Expr::Wildcard], over: None, distinct: false, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -346,6 +347,7 @@ fn parse_select_count_distinct() { }], over: None, distinct: true, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -885,7 +887,8 @@ fn parse_select_having() { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![Expr::Wildcard], over: None, - distinct: false + distinct: false, + array_element: None })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1592,6 +1595,7 @@ fn parse_scalar_function_in_projection() { args: vec![Expr::Identifier(Ident::new("id"))], over: None, distinct: false, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -1625,6 +1629,7 @@ fn parse_window_functions() { window_frame: None, }), distinct: false, + array_element: None }), expr_from_projection(&select.projection[0]) ); @@ -1875,6 +1880,7 @@ fn parse_delimited_identifiers() { args: vec![], over: None, distinct: false, + array_element: None }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 9dad62514..61f0fcacf 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -177,6 +177,12 @@ fn lateral_view() { hive().verified_stmt(view); } +#[test] +fn test_array_elements() { + let elements = "SELECT collect_list(a)[0] FROM db.table"; + hive().verified_stmt(elements); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From ccfbee3dd7697e56c201469d18ef7c64851ead79 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:27:32 -0400 Subject: [PATCH 29/97] Make file format optional for directory inserts --- src/parser.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index dbab3a1b1..15b052533 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2496,8 +2496,11 @@ impl Parser { Token::SingleQuotedString(w) => w, _ => self.expected("A file path", self.peek_token())?, }; - let _ = self.expect_keywords(&[Keyword::STORED, Keyword::AS]); - let file_format = Some(self.parse_file_format()?); + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; let source = Box::new(self.parse_query()?); Ok(Statement::Directory { local, From 307a3716d2d648704c6c2240984e6e30625064ec Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:49:12 -0400 Subject: [PATCH 30/97] Allow multiple aliases in LATERAL VIEW --- src/ast/query.rs | 10 +++++++--- src/parser.rs | 20 +++++++++++++------- tests/sqlparser_hive.rs | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 545792f9c..f0d883b35 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -134,7 +134,7 @@ pub struct Select { /// LATERAL VIEW optional name pub lateral_view_name: Option, /// LATERAL VIEW optional column aliases - pub lateral_col_alias: Option, + pub lateral_col_alias: Vec, /// WHERE pub selection: Option, /// GROUP BY @@ -162,8 +162,12 @@ impl fmt::Display for Select { if let Some(ref a) = self.lateral_view_name { write!(f, " {}", a)?; } - if let Some(ref c) = self.lateral_col_alias { - write!(f, " AS {}", c)?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; } } if let Some(ref selection) = self.selection { diff --git a/src/parser.rs b/src/parser.rs index 15b052533..8fab518d7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2163,14 +2163,20 @@ impl Parser { }; let lateral_col_alias = if lateral_view_name.is_some() { - self.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - ])? + self.parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect() } else { - None + vec![] }; let selection = if self.parse_keyword(Keyword::WHERE) { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 61f0fcacf..14fbae30e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 5bcd4c37b4575eb9cf4f45f8b8a5f08b5cc5b2f3 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 16:50:24 -0400 Subject: [PATCH 31/97] Allow multiple lateral views and an insanely janky implementation for row formats in external tables --- src/ast/mod.rs | 15 +++++--- src/ast/query.rs | 52 ++++++++++++++++--------- src/parser.rs | 85 +++++++++++++++++++++-------------------- tests/sqlparser_hive.rs | 2 +- 4 files changed, 88 insertions(+), 66 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5c6ef3fa3..fa768e9bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,8 +29,9 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, - SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, + Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr, + Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, + Values, }; pub use self::value::{DateTimeField, Value}; @@ -951,13 +952,15 @@ impl fmt::Display for Statement { " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format )?, - Some(HiveIOFormat::FileFormat { format }) => { + Some(HiveIOFormat::FileFormat { format }) if !*external => { write!(f, " STORED AS {}", format)? } - None => (), + _ => (), } - if let Some(loc) = location { - write!(f, " LOCATION '{}'", loc)?; + if !*external { + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } } } if *external { diff --git a/src/ast/query.rs b/src/ast/query.rs index f0d883b35..20c87c698 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -129,12 +129,8 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, - /// LATERAL VIEW - pub lateral_view: Option, - /// LATERAL VIEW optional name - pub lateral_view_name: Option, - /// LATERAL VIEW optional column aliases - pub lateral_col_alias: Vec, + /// LATERAL VIEWs + pub lateral_views: Vec, /// WHERE pub selection: Option, /// GROUP BY @@ -157,17 +153,9 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } - if let Some(ref lv) = self.lateral_view { - write!(f, " LATERAL VIEW {}", lv)?; - if let Some(ref a) = self.lateral_view_name { - write!(f, " {}", a)?; - } - if !self.lateral_col_alias.is_empty() { - write!( - f, - " AS {}", - display_comma_separated(&self.lateral_col_alias) - )?; + if !self.lateral_views.is_empty() { + for lv in &self.lateral_views { + write!(f, "{}", lv)?; } } if let Some(ref selection) = self.selection { @@ -197,6 +185,36 @@ impl fmt::Display for Select { } } +/// A hive LATERAL VIEW with potential column aliases +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LateralView { + /// LATERAL VIEW + pub lateral_view: Expr, + /// LATERAL VIEW table name + pub lateral_view_name: ObjectName, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Vec, +} + +impl fmt::Display for LateralView { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + " LATERAL VIEW {} {}", + self.lateral_view, self.lateral_view_name + )?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; + } + Ok(()) + } +} + /// A single CTE (used after `WITH`): `alias [(col1, col2, ...)] AS ( query )` /// The names in the column list before `AS`, when specified, replace the names /// of the columns returned by the query. The parser does not validate that the diff --git a/src/parser.rs b/src/parser.rs index 8fab518d7..34a85e1b7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1179,26 +1179,32 @@ impl Parser { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; - let hive_distribution = self.parse_hive_distribution()?; - self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; - let file_format = self.parse_file_format()?; - self.expect_keyword(Keyword::LOCATION)?; - let location = self.parse_literal_string()?; + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(format.clone()), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); Ok(Statement::CreateTable { name: table_name, columns, constraints, hive_distribution, - hive_formats: None, + hive_formats: Some(hive_formats), with_options: vec![], or_replace, if_not_exists: false, external: true, temporary: false, - file_format: Some(file_format), - location: Some(location), + file_format, + location, query: None, without_rowid: false, like: None, @@ -2147,37 +2153,34 @@ impl Parser { vec![] }; - let lateral_view = if self - .expect_keywords(&[Keyword::LATERAL, Keyword::VIEW]) - .is_ok() - { - Some(self.parse_expr()?) - } else { - None - }; - - let lateral_view_name = if lateral_view.is_some() { - Some(self.parse_object_name()?) - } else { - None - }; - - let lateral_col_alias = if lateral_view_name.is_some() { - self.parse_comma_separated(|parser| { - parser.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - ]) // This couldn't possibly be a bad idea - })? - .into_iter() - .filter(|i| i.is_some()) - .map(|i| i.unwrap()) - .collect() - } else { - vec![] - }; + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name()?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + }); + } else { + break; + } + } let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -2215,9 +2218,7 @@ impl Parser { projection, from, selection, - lateral_view, - lateral_view_name, - lateral_col_alias, + lateral_views, group_by, cluster_by, distribute_by, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 14fbae30e..e169f4a03 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 4328a2d280586557289f192ca36bc2018f710698 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 17:24:08 -0400 Subject: [PATCH 32/97] All multiple quote styles for directory paths --- src/parser.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 34a85e1b7..e5c44c41d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1780,6 +1780,7 @@ impl Parser { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { + Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), unexpected => self.expected("literal string", unexpected), } @@ -2499,10 +2500,7 @@ impl Parser { let local = self.parse_keyword(Keyword::LOCAL); if self.parse_keyword(Keyword::DIRECTORY) { - let path = match self.next_token() { - Token::SingleQuotedString(w) => w, - _ => self.expected("A file path", self.peek_token())?, - }; + let path = self.parse_literal_string()?; let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { Some(self.parse_file_format()?) } else { From c15e958afee6621bc60dccd004490bf89fe621b9 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 18:15:40 -0400 Subject: [PATCH 33/97] Speculative test on table properties --- src/ast/mod.rs | 9 +++++++++ src/ast/value.rs | 3 +++ src/dialect/keywords.rs | 1 + src/parser.rs | 18 +++++++++++++----- tests/sqlparser_hive.rs | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fa768e9bc..d1783b3fb 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -539,6 +539,7 @@ pub enum Statement { constraints: Vec, hive_distribution: HiveDistributionStyle, hive_formats: Option, + table_properties: Vec, with_options: Vec, file_format: Option, location: Option, @@ -847,6 +848,7 @@ impl fmt::Display for Statement { name, columns, constraints, + table_properties, with_options, or_replace, if_not_exists, @@ -971,6 +973,13 @@ impl fmt::Display for Statement { location.as_ref().unwrap() )?; } + if !table_properties.is_empty() { + write!( + f, + " TABLEPROPERTIES ({})", + display_comma_separated(table_properties) + )?; + } if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; } diff --git a/src/ast/value.rs b/src/ast/value.rs index 901fa5158..9240d7479 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -31,6 +31,8 @@ pub enum Value { NationalStringLiteral(String), /// X'hex value' HexStringLiteral(String), + + LiteralString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -60,6 +62,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), + Value::LiteralString(v) => write!(f, "{}", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 3c9204869..e2b9cbe49 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -421,6 +421,7 @@ define_keywords!( SYSTEM_TIME, SYSTEM_USER, TABLE, + TABLEPROPERTIES, TABLESAMPLE, TEMP, TEMPORARY, diff --git a/src/parser.rs b/src/parser.rs index e5c44c41d..91e915420 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1192,6 +1192,7 @@ impl Parser { None }; let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, @@ -1199,6 +1200,7 @@ impl Parser { hive_distribution, hive_formats: Some(hive_formats), with_options: vec![], + table_properties, or_replace, if_not_exists: false, external: true, @@ -1234,7 +1236,7 @@ impl Parser { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = self.parse_with_options()?; + let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -1371,8 +1373,8 @@ impl Parser { let hive_distribution = self.parse_hive_distribution()?; let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_with_options()?; - + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) @@ -1386,6 +1388,7 @@ impl Parser { columns, constraints, with_options, + table_properties, or_replace, if_not_exists, hive_distribution, @@ -1573,8 +1576,8 @@ impl Parser { } } - pub fn parse_with_options(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::WITH) { + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Parser::parse_sql_option)?; self.expect_token(&Token::RParen)?; @@ -1741,6 +1744,11 @@ impl Parser { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword => Ok(Value::LiteralString(format!( + "{quote}{}{quote}", + w.value, + quote = w.quote_style.map(|q| q.to_string()).unwrap_or("".into()) + ))), _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e169f4a03..8ec182784 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TABLEPROPERTIES ("prop" = "2")"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); From 6abd52b980e2382b2d981a41156152a0b617b2cf Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 18:21:36 -0400 Subject: [PATCH 34/97] Spell keyword tblproperties correctly this time --- src/ast/mod.rs | 2 +- src/dialect/keywords.rs | 2 +- src/parser.rs | 4 ++-- tests/sqlparser_hive.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d1783b3fb..a82cd5905 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -976,7 +976,7 @@ impl fmt::Display for Statement { if !table_properties.is_empty() { write!( f, - " TABLEPROPERTIES ({})", + " TBLPROPERTIES ({})", display_comma_separated(table_properties) )?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e2b9cbe49..e146b41ec 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -421,8 +421,8 @@ define_keywords!( SYSTEM_TIME, SYSTEM_USER, TABLE, - TABLEPROPERTIES, TABLESAMPLE, + TBLPROPERTIES, TEMP, TEMPORARY, TEXT, diff --git a/src/parser.rs b/src/parser.rs index 91e915420..33cb1b69a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1192,7 +1192,7 @@ impl Parser { None }; let location = hive_formats.location.clone(); - let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, @@ -1374,7 +1374,7 @@ impl Parser { let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 8ec182784..11df1dd76 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TABLEPROPERTIES ("prop" = "2")"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2")"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); From 3c2b0b269c4cf37442a27314f00d5daaf6042a6b Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 30 Jul 2020 12:28:45 -0400 Subject: [PATCH 35/97] Fix issue with multiple lateral views and no aliases --- src/parser.rs | 1 + tests/sqlparser_hive.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 33cb1b69a..ebf1e324b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2174,6 +2174,7 @@ impl Parser { Keyword::GROUP, Keyword::CLUSTER, Keyword::HAVING, + Keyword::LATERAL ]) // This couldn't possibly be a bad idea })? .into_iter() diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 11df1dd76..1770d4661 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From c26d77fdf2bba060d8896ef29a68b634cf49ed28 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 12:29:41 -0400 Subject: [PATCH 36/97] Rebase, fix lints, removed some janky array element parsing that just didn't work, and allow analyze for columns to sometimes not actually have columns --- src/ast/mod.rs | 9 ++++----- src/ast/value.rs | 4 ++-- src/dialect/hive.rs | 10 ++-------- src/parser.rs | 32 +++++++++++++------------------- tests/sqlparser_common.rs | 6 ------ tests/sqlparser_hive.rs | 11 +++-------- 6 files changed, 24 insertions(+), 48 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a82cd5905..13ffabf70 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -712,7 +712,10 @@ impl fmt::Display for Statement { write!(f, " CACHE METADATA")?; } if *for_columns { - write!(f, " FOR COLUMNS {}", display_comma_separated(columns))?; + write!(f, " FOR COLUMNS")?; + if !columns.is_empty() { + write!(f, " {}", display_comma_separated(columns))?; + } } Ok(()) } @@ -1140,7 +1143,6 @@ pub struct Function { pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, - pub array_element: Option, } impl fmt::Display for Function { @@ -1155,9 +1157,6 @@ impl fmt::Display for Function { if let Some(o) = &self.over { write!(f, " OVER ({})", o)?; } - if let Some(ae) = &self.array_element { - write!(f, "[{}]", ae)?; - } Ok(()) } } diff --git a/src/ast/value.rs b/src/ast/value.rs index 9240d7479..2afdfaeae 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -32,7 +32,7 @@ pub enum Value { /// X'hex value' HexStringLiteral(String), - LiteralString(String), + DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -62,7 +62,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), - Value::LiteralString(v) => write!(f, "{}", v), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index c09570c59..4fc23ed28 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -5,14 +5,11 @@ pub struct HiveDialect {} impl Dialect for HiveDialect { fn is_delimited_identifier_start(&self, ch: char) -> bool { - (ch == '"') || (ch == '\'') || (ch == '`') + (ch == '"') || (ch == '`') } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '$' - || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { @@ -20,8 +17,5 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' - || ch == '$' - || ch == '{' - || ch == '}' } } diff --git a/src/parser.rs b/src/parser.rs index ebf1e324b..afaee2889 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -221,10 +221,12 @@ impl Parser { Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; - columns = self.parse_comma_separated(Parser::parse_identifier)?; - if columns.is_empty() { - self.expected("columns identifiers", self.peek_token())?; - } + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(Parser::parse_identifier) + }) + .unwrap_or_default(); for_columns = true } Some(Keyword::CACHE) => { @@ -443,20 +445,11 @@ impl Parser { None }; - let array_element = if self.consume_token(&Token::LBracket) { - let num = Some(self.parse_number_value()?); - self.expect_token(&Token::RBracket)?; - num - } else { - None - }; - Ok(Expr::Function(Function { name, args, over, distinct, - array_element, })) } @@ -1744,11 +1737,11 @@ impl Parser { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), - Keyword::NoKeyword => Ok(Value::LiteralString(format!( - "{quote}{}{quote}", - w.value, - quote = w.quote_style.map(|q| q.to_string()).unwrap_or("".into()) - ))), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected("A value?", Token::Word(w))?, + }, _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the @@ -1936,6 +1929,7 @@ impl Parser { pub fn parse_identifier(&mut self) -> Result { match self.next_token() { Token::Word(w) => Ok(w.to_ident()), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), unexpected => self.expected("identifier", unexpected), } } @@ -2174,7 +2168,7 @@ impl Parser { Keyword::GROUP, Keyword::CLUSTER, Keyword::HAVING, - Keyword::LATERAL + Keyword::LATERAL, ]) // This couldn't possibly be a bad idea })? .into_iter() diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 810a3d698..6c824d55b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -328,7 +328,6 @@ fn parse_select_count_wildcard() { args: vec![Expr::Wildcard], over: None, distinct: false, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -347,7 +346,6 @@ fn parse_select_count_distinct() { }], over: None, distinct: true, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -888,7 +886,6 @@ fn parse_select_having() { args: vec![Expr::Wildcard], over: None, distinct: false, - array_element: None })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1595,7 +1592,6 @@ fn parse_scalar_function_in_projection() { args: vec![Expr::Identifier(Ident::new("id"))], over: None, distinct: false, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -1629,7 +1625,6 @@ fn parse_window_functions() { window_frame: None, }), distinct: false, - array_element: None }), expr_from_projection(&select.projection[0]) ); @@ -1880,7 +1875,6 @@ fn parse_delimited_identifiers() { args: vec![], over: None, distinct: false, - array_element: None }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 1770d4661..e01281801 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2")"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); @@ -47,7 +47,8 @@ fn parse_analyze() { #[test] fn parse_analyze_for_columns() { - let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS a, b, c"#; + let analyze = + r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#; hive().verified_stmt(analyze); } @@ -177,12 +178,6 @@ fn lateral_view() { hive().verified_stmt(view); } -#[test] -fn test_array_elements() { - let elements = "SELECT collect_list(a)[0] FROM db.table"; - hive().verified_stmt(elements); -} - fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 1515e63d918470b9c7abdd3a301f89b263309a90 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 14:34:22 -0400 Subject: [PATCH 37/97] Reenable some identifier parts in dialect, support LATERAL VIEW OUTER --- src/ast/query.rs | 8 ++++++-- src/dialect/hive.rs | 7 ++++++- src/parser.rs | 2 ++ tests/sqlparser_hive.rs | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 20c87c698..4b956e031 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -195,14 +195,18 @@ pub struct LateralView { pub lateral_view_name: ObjectName, /// LATERAL VIEW optional column aliases pub lateral_col_alias: Vec, + /// LATERAL VIEW OUTER + pub outer: bool, } impl fmt::Display for LateralView { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - " LATERAL VIEW {} {}", - self.lateral_view, self.lateral_view_name + " LATERAL VIEW{outer} {} {}", + self.lateral_view, + self.lateral_view_name, + outer = if self.outer { " OUTER" } else { "" } )?; if !self.lateral_col_alias.is_empty() { write!( diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 4fc23ed28..7d60e010e 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') + || ch == '$' } fn is_identifier_part(&self, ch: char) -> bool { @@ -17,5 +20,7 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' + || ch == '{' + || ch == '}' } } diff --git a/src/parser.rs b/src/parser.rs index 79f45dc34..5e1931dc3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2172,6 +2172,7 @@ impl Parser { let mut lateral_views = vec![]; loop { if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); let lateral_view = self.parse_expr()?; let lateral_view_name = self.parse_object_name()?; let lateral_col_alias = self @@ -2193,6 +2194,7 @@ impl Parser { lateral_view, lateral_view_name, lateral_col_alias, + outer }); } else { break; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e01281801..4854f27d8 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -174,7 +174,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From ce0d86f144655a6c1e2a071157bf7b046ef49b39 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 16:28:46 -0400 Subject: [PATCH 38/97] Cargo fmt --- src/ast/mod.rs | 1 - src/parser.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 815050ce4..3bb33d4a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -904,7 +904,6 @@ impl fmt::Display for Statement { write!(f, " WITHOUT ROWID")?; } - // Only for Hive if let Some(l) = like { write!(f, " LIKE {}", l)?; diff --git a/src/parser.rs b/src/parser.rs index 5e1931dc3..966ed1a77 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2194,7 +2194,7 @@ impl Parser { lateral_view, lateral_view_name, lateral_col_alias, - outer + outer, }); } else { break; From 35945e12869c528fe0f705019f5d5af1ad8f823c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 16:48:11 -0400 Subject: [PATCH 39/97] Add license to hive.rs --- src/dialect/hive.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 7d60e010e..66241b89f 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::dialect::Dialect; #[derive(Debug)] From 9f981a524460962956b40dc3e2a0eb9ef2ee119d Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 17:24:40 -0400 Subject: [PATCH 40/97] Allow ommitting add/drop/sync partitions in msck --- src/parser.rs | 24 ++++++++++++++++-------- tests/sqlparser_hive.rs | 2 ++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 966ed1a77..50f1ca9bd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -171,14 +171,22 @@ impl Parser { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; - let partition_action = - match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => Some(AddDropSync::ADD), - Some(Keyword::DROP) => Some(AddDropSync::DROP), - Some(Keyword::SYNC) => Some(AddDropSync::SYNC), - _ => None, - }; - self.expect_keyword(Keyword::PARTITIONS)?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + }) + .unwrap_or_default(); Ok(Statement::Msck { repair, table_name, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 4854f27d8..e08cdf3f5 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -55,7 +55,9 @@ fn parse_analyze_for_columns() { #[test] fn parse_msck() { let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + let msck2 = r#"MSCK REPAIR TABLE db.table_name"#; hive().verified_stmt(msck); + hive().verified_stmt(msck2); } #[test] From 6cbd11c0c107c6237d050bba020e495a3e10a2f5 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 17:40:36 -0400 Subject: [PATCH 41/97] Fix a bigdecimal compile error --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d8643298c..d9783ee8f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -451,7 +451,7 @@ fn parse_number() { #[cfg(feature = "bigdecimal")] assert_eq!( expr, - Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1))) + Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1), false)) ); #[cfg(not(feature = "bigdecimal"))] From 25515d9fb0cb3a63a72a4283291a719bff8a6d10 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sat, 1 Aug 2020 12:38:24 -0400 Subject: [PATCH 42/97] Re-add dollar sign for idents --- src/dialect/hive.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 66241b89f..24c26b98d 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -32,6 +32,7 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' + || ch == '$' || ch == '{' || ch == '}' } From 5630f24dcf19866dcde5d30be1b6ab47c01a1707 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sun, 2 Aug 2020 14:52:09 -0400 Subject: [PATCH 43/97] Support SORT BY in selects --- src/ast/query.rs | 5 +++++ src/dialect/keywords.rs | 3 +++ src/parser.rs | 7 +++++++ tests/sqlparser_hive.rs | 6 ++++++ 4 files changed, 21 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 4b956e031..0c0212425 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -139,6 +139,8 @@ pub struct Select { pub cluster_by: Vec, /// DISTRIBUTE BY (Hive) pub distribute_by: Vec, + /// SORT BY (Hive) + pub sort_by: Vec, /// HAVING pub having: Option, } @@ -178,6 +180,9 @@ impl fmt::Display for Select { display_comma_separated(&self.distribute_by) )?; } + if !self.sort_by.is_empty() { + write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d3ff6cef9..c9ff00995 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -400,6 +400,7 @@ define_keywords!( SIMILAR, SMALLINT, SOME, + SORT, SPECIFIC, SPECIFICTYPE, SQL, @@ -496,6 +497,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, @@ -531,6 +533,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, diff --git a/src/parser.rs b/src/parser.rs index 50f1ca9bd..cc45c849b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2233,6 +2233,12 @@ impl Parser { vec![] }; + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2249,6 +2255,7 @@ impl Parser { group_by, cluster_by, distribute_by, + sort_by, having, }) } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e08cdf3f5..c0f4d585f 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -180,6 +180,12 @@ fn lateral_view() { hive().verified_stmt(view); } +#[test] +fn sort_by() { + let sort_by = "SELECT * FROM db.table SORT BY a"; + hive().verified_stmt(sort_by); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 445860d44b7067b931e219578272b743a5eae6da Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 3 Aug 2020 00:26:20 -0400 Subject: [PATCH 44/97] Fix ALTER TABLE RENAME TO as taking an ident when should take an ObjectName --- src/ast/ddl.rs | 2 +- src/parser.rs | 5 +++-- tests/sqlparser_hive.rs | 8 +++++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3ed2a6918..b88709040 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -55,7 +55,7 @@ pub enum AlterTableOperation { new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { table_name: ObjectName }, } impl fmt::Display for AlterTableOperation { diff --git a/src/parser.rs b/src/parser.rs index cc45c849b..a981c219e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1183,6 +1183,7 @@ impl Parser { or_replace: bool, ) -> Result { self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; @@ -1208,7 +1209,7 @@ impl Parser { with_options: vec![], table_properties, or_replace, - if_not_exists: false, + if_not_exists, external: true, temporary: false, file_format, @@ -1634,7 +1635,7 @@ impl Parser { } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_identifier()?; + let table_name = self.parse_object_name()?; AlterTableOperation::RenameTable { table_name } } else { let _ = self.parse_keyword(Keyword::COLUMN); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index c0f4d585f..e2ead0603 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -176,7 +176,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } @@ -186,6 +186,12 @@ fn sort_by() { hive().verified_stmt(sort_by); } +#[test] +fn rename_table() { + let rename = "ALTER TABLE db.table_name RENAME TO db.table_2"; + hive().verified_stmt(rename); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From c45592f608acf5a7eca884986cb3f4a8fcc6f9e5 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 21 Jul 2020 10:08:04 -0400 Subject: [PATCH 45/97] First pass on some create table, and some analyze table parsing --- examples/cli.rs | 1 + src/ast/data_type.rs | 3 + src/ast/mod.rs | 139 ++++++++++++++++++++++++++++++++++- src/dialect/hive.rs | 24 ++++++ src/dialect/keywords.rs | 17 +++++ src/dialect/mod.rs | 2 + src/parser.rs | 149 ++++++++++++++++++++++++++++++++++++-- src/test_utils.rs | 1 + tests/sqlparser_common.rs | 2 +- tests/sqlparser_hive.rs | 44 +++++++++++ 10 files changed, 373 insertions(+), 9 deletions(-) create mode 100644 src/dialect/hive.rs create mode 100644 tests/sqlparser_hive.rs diff --git a/examples/cli.rs b/examples/cli.rs index 5a3a3034b..bd1be5244 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -40,6 +40,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), + "--hive" => Box::new(HiveDialect{}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 53122ab5d..cc8bd3260 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,6 +61,8 @@ pub enum DataType { Regclass, /// Text Text, + /// String (Hive) + String, /// Bytea Bytea, /// Custom type such as enums @@ -101,6 +103,7 @@ impl fmt::Display for DataType { DataType::Interval => write!(f, "INTERVAL"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), + DataType::String => write!(f, "STRING"), DataType::Bytea => write!(f, "BYTEA"), DataType::Array(ty) => write!(f, "{}[]", ty), DataType::Custom(ty) => write!(f, "{}", ty), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a726b299d..e9ab618a8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -431,6 +431,15 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { + /// Analyze (Hive) + Analyze { + table_name: ObjectName, + partitions: Option>, + for_columns: bool, + cache_metadata: bool, + noscan: bool, + compute_statistics: bool + }, /// SELECT Query(Box), /// INSERT @@ -439,8 +448,12 @@ pub enum Statement { table_name: ObjectName, /// COLUMNS columns: Vec, + /// Overwrite (Hive) + overwrite: bool, /// A SQL query that specifies what to insert source: Box, + /// partitioned insert (Hive) + partitioned: Option> }, Copy { /// TABLE @@ -486,6 +499,8 @@ pub enum Statement { /// Optional schema columns: Vec, constraints: Vec, + hive_distribution: HiveDistributionStyle, + hive_formats: Option, with_options: Vec, file_format: Option, location: Option, @@ -562,6 +577,12 @@ pub enum Statement { schema_name: ObjectName, if_not_exists: bool, }, + /// CREATE DATABASE + CreateDatabase { + db_name: ObjectName, + ine: bool, location: Option, + managed_location: Option + }, /// `ASSERT [AS ]` Assert { condition: Expr, @@ -592,12 +613,17 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), + Statement::Analyze { table_name, partitions, for_columns, cache_metadata, noscan, compute_statistics } => { + Ok(()) + } Statement::Insert { table_name, + overwrite, + partitioned, columns, source, } => { - write!(f, "INSERT INTO {} ", table_name)?; + write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE" } else { "INTO" })?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } @@ -652,6 +678,20 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateDatabase { db_name, ine, location, managed_location } => { + write!(f, "CREATE")?; + if *ine { + write!(f, " IF NOT EXISTS")?; + } + write!(f, " {}", db_name)?; + if let Some(l) = location { + write!(f, " LOCATION '{}'", l)?; + } + if let Some(ml) = managed_location { + write!(f, " MANAGEDLOCATION '{}'", ml)?; + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -682,6 +722,8 @@ impl fmt::Display for Statement { with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats, external, file_format, location, @@ -717,6 +759,43 @@ impl fmt::Display for Statement { if *without_rowid { write!(f, " WITHOUT ROWID")?; } + + match hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?, + HiveDistributionStyle::CLUSTERED { columns, sorted_by, num_buckets } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {} BUCKETS", num_buckets)?; + } + } + HiveDistributionStyle::SKEWED { columns, on, stored_as_directories } => { + write!(f, " SKEWED BY ({})) ON ({})", display_comma_separated(&columns), display_comma_separated(&on))?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + }, + _ => () + } + + if let Some(HiveFormat { row_format, storage, location }) = hive_formats { + + match row_format { + Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{}'", class)?, + Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + None => () + } + match storage { + Some(HiveIOFormat::IOF { input_format, output_format }) => write!(f, " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format)?, + Some(HiveIOFormat::FileFormat { format }) => write!(f, " STORED AS {}", format)?, + None => () + } + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } + } if *external { write!( f, @@ -1054,6 +1133,64 @@ impl fmt::Display for ObjectType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveDistributionStyle { + PARTITIONED { + columns: Vec + }, + CLUSTERED { + columns: Vec, + sorted_by: Vec, + num_buckets: i32 + }, + SKEWED { + columns: Vec, + on: Vec, + stored_as_directories: bool + }, + NONE +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveRowFormat { + SERDE { + class: String + }, + DELIMITED +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveIOFormat { + IOF { + input_format: Expr, + output_format: Expr, + }, + FileFormat { + format: FileFormat + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct HiveFormat { + pub row_format: Option, + pub storage: Option, + pub location: Option +} + +impl Default for HiveFormat { + fn default() -> Self { + HiveFormat { + row_format: None, + location: None, + storage: None + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs new file mode 100644 index 000000000..71a5eee26 --- /dev/null +++ b/src/dialect/hive.rs @@ -0,0 +1,24 @@ +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct HiveDialect {} + +impl Dialect for HiveDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + (ch == '"') || (ch == '\'') || (ch == '`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + } + + fn is_identifier_part(&self, ch: char) -> bool { + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') + || ch == '_' + || ch == '$' + || ch == '{' + || ch == '}' + } +} diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d14534881..316f611cb 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -72,6 +72,7 @@ define_keywords!( ALL, ALLOCATE, ALTER, + ANALYZE, AND, ANY, APPLY, @@ -102,6 +103,7 @@ define_keywords!( BOTH, BY, BYTEA, + CACHE, CALL, CALLED, CARDINALITY, @@ -126,6 +128,7 @@ define_keywords!( COLUMNS, COMMIT, COMMITTED, + COMPUTE, CONDITION, CONNECT, CONSTRAINT, @@ -156,6 +159,7 @@ define_keywords!( CURRENT_USER, CURSOR, CYCLE, + DATABASE, DATE, DAY, DEALLOCATE, @@ -164,6 +168,7 @@ define_keywords!( DECLARE, DEFAULT, DELETE, + DELIMITED, DENSE_RANK, DEREF, DESC, @@ -204,6 +209,7 @@ define_keywords!( FOLLOWING, FOR, FOREIGN, + FORMAT, FRAME_ROW, FREE, FROM, @@ -218,6 +224,7 @@ define_keywords!( GROUPS, HAVING, HEADER, + HIVEVAR, HOLD, HOUR, IDENTITY, @@ -227,6 +234,7 @@ define_keywords!( INDICATOR, INNER, INOUT, + INPUTFORMAT, INSENSITIVE, INSERT, INT, @@ -260,11 +268,13 @@ define_keywords!( LOCALTIMESTAMP, LOCATION, LOWER, + MANAGEDLOCATION, MATCH, MATERIALIZED, MAX, MEMBER, MERGE, + METADATA, METHOD, MIN, MINUTE, @@ -282,6 +292,7 @@ define_keywords!( NO, NONE, NORMALIZE, + NOSCAN, NOT, NTH_VALUE, NTILE, @@ -303,13 +314,16 @@ define_keywords!( ORDER, OUT, OUTER, + OUTPUTFORMAT, OVER, OVERFLOW, OVERLAPS, OVERLAY, + OVERWRITE, PARAMETER, PARQUET, PARTITION, + PARTITIONED, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -370,6 +384,7 @@ define_keywords!( SELECT, SENSITIVE, SEQUENCEFILE, + SERDE, SERIALIZABLE, SESSION, SESSION_USER, @@ -387,10 +402,12 @@ define_keywords!( SQRT, START, STATIC, + STATISTICS, STDDEV_POP, STDDEV_SAMP, STDIN, STORED, + STRING, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e656ab269..091e95594 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -18,6 +18,7 @@ mod mysql; mod postgresql; mod snowflake; mod sqlite; +mod hive; use std::any::{Any, TypeId}; use std::fmt::Debug; @@ -29,6 +30,7 @@ pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; +pub use self::hive::HiveDialect; /// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates /// to `true` iff `parser.dialect` is one of the `Dialect`s specified. diff --git a/src/parser.rs b/src/parser.rs index b40e94de7..a8bf344d8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -135,6 +135,7 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } + Keyword::ANALYZE => Ok(self.parse_analyze()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -167,6 +168,45 @@ impl<'a> Parser<'a> { } } + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + + loop { + match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { + Some(Keyword::PARTITION) => partitions = Some(self.parse_comma_separated(Parser::parse_expr)?), + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + partitions, + cache_metadata, + noscan, + compute_statistics + }) + } + /// Parse a new expression pub fn parse_expr(&mut self) -> Result { self.parse_subexpr(0) @@ -909,7 +949,7 @@ impl<'a> Parser<'a> { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - //println!("parse_keywords aborting .. did not find {}", keyword); + println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -1086,13 +1126,30 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { + + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name()?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => managed_location = Some(self.parse_literal_string()?), + _ => break + } + } + Ok(Statement::CreateDatabase { db_name, ine, location, managed_location }) + } + + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; + let hive_distribution = self.parse_hive_distribution()?; self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; let file_format = self.parse_file_format()?; @@ -1103,6 +1160,8 @@ impl<'a> Parser<'a> { name: table_name, columns, constraints, + hive_distribution, + hive_formats: None, with_options: vec![], or_replace, if_not_exists: false, @@ -1197,6 +1256,61 @@ impl<'a> Parser<'a> { }) } + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { + columns + }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF {input_format, output_format}); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + }, + None => break, + _ => break + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } + pub fn parse_create_table(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; @@ -1206,6 +1320,8 @@ impl<'a> Parser<'a> { // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` let with_options = self.parse_with_options()?; @@ -1223,6 +1339,8 @@ impl<'a> Parser<'a> { with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats: Some(hive_formats), external: false, file_format: None, location: None, @@ -1630,6 +1748,7 @@ impl<'a> Parser<'a> { // parse_interval_literal for a taste. Keyword::INTERVAL => Ok(DataType::Interval), Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String), Keyword::TEXT => { if self.consume_token(&Token::LBracket) { // Note: this is postgresql-specific @@ -1969,15 +2088,20 @@ impl<'a> Parser<'a> { } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL]); + let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let token = self.peek_token(); let value = match (self.parse_value(), token) { (Ok(value), _) => SetVariableValue::Literal(value), (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), (Err(_), unexpected) => self.expected("variable value", unexpected)?, }; + println!("{:?}", value); Ok(Statement::SetVariable { local: modifier == Some(Keyword::LOCAL), variable, @@ -2272,12 +2396,23 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { - self.expect_keyword(Keyword::INTO)?; + let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; + let overwrite = if action == Keyword::OVERWRITE { true } else { false }; + if overwrite { + self.expect_keyword(Keyword::TABLE)?; + } let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; + let partitioned = if self.parse_keywords(&[Keyword::PARTITION]) { + Some(self.parse_comma_separated(Parser::parse_expr)?) + } else { + None + }; let source = Box::new(self.parse_query()?); Ok(Statement::Insert { table_name, + overwrite, + partitioned, columns, source, }) diff --git a/src/test_utils.rs b/src/test_utils.rs index 2fcacffa9..cb54328e8 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -132,6 +132,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}) ], } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7fba5dcb9..6f7d7fa47 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -92,7 +92,7 @@ fn parse_insert_invalid() { let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected INTO, found: public".to_string()), + ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), res.unwrap_err() ); } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs new file mode 100644 index 000000000..de95e6985 --- /dev/null +++ b/tests/sqlparser_hive.rs @@ -0,0 +1,44 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![warn(clippy::all)] + +//! Test SQL syntax specific to Hive. The parser based on the generic dialect +//! is also tested (on the inputs it can handle). + +use sqlparser::ast::*; +use sqlparser::dialect::{GenericDialect, HiveDialect}; +use sqlparser::test_utils::*; +use sqlparser::parser::ParserError; + +#[test] +fn parse_table_create() -> Result<(), ParserError> { + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; + let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; + + hive().verified_stmt(sql); + hive().verified_stmt(iof); + + Ok(()) +} + +fn hive() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {})], + } +} + +fn hive_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {}), Box::new(GenericDialect {})], + } +} From 9bdd021fea63425e3df75f552a50eba3eb77eb3a Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 21 Jul 2020 19:15:35 -0400 Subject: [PATCH 46/97] First pass on some create table, and some analyze table parsing --- src/ast/mod.rs | 75 +++++++++++++++++++++++++++----- src/dialect/keywords.rs | 4 ++ src/parser.rs | 85 ++++++++++++++++++++++++++++--------- tests/sqlparser_hive.rs | 32 +++++++++++++- tests/sqlparser_postgres.rs | 15 ++++--- 5 files changed, 174 insertions(+), 37 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e9ab618a8..f6fb91f14 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -440,6 +440,19 @@ pub enum Statement { noscan: bool, compute_statistics: bool }, + /// Truncate (Hive) + Truncate { + table_name: ObjectName, + partitions: Option> + }, + /// Msck (Hive) + Msck { + table_name: ObjectName, + repair: bool, + add_partitions: bool, + drop_partitions: bool, + sync_partitions: bool + }, /// SELECT Query(Box), /// INSERT @@ -453,7 +466,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option> }, Copy { /// TABLE @@ -548,8 +561,9 @@ pub enum Statement { /// supported yet. SetVariable { local: bool, + hivevar: bool, variable: Ident, - value: SetVariableValue, + value: Vec, }, /// SHOW /// @@ -613,7 +627,40 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), - Statement::Analyze { table_name, partitions, for_columns, cache_metadata, noscan, compute_statistics } => { + Statement::Msck { table_name, repair, add_partitions, drop_partitions, sync_partitions } => { + write!(f, "MSCK {repair}TABLE {table}", repair = if *repair { "REPAIR " } else { "" }, table = table_name)?; + write!(f, "{add}{drop}{sync}", + add = if *add_partitions { " ADD PARTITIONS" } else { "" }, + drop = if *drop_partitions { " DROP PARTITIONS" } else { "" }, + sync = if *sync_partitions { " SYNC PARTITIONS" } else { "" } + ) + } + Statement::Truncate { table_name, partitions } => { + write!(f, "TRUNCATE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + Ok(()) + } + Statement::Analyze { table_name, partitions, for_columns: _, cache_metadata, noscan, compute_statistics } => { + write!(f, "ANALYZE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + //TODO: Add for columns + if *compute_statistics { + write!(f, " COMPUTE STATISTICS")?; + } + if *noscan { + write!(f, " NOSCAN")?; + } + if *cache_metadata { + write!(f, " CACHE METADATA")?; + } Ok(()) } Statement::Insert { @@ -623,10 +670,15 @@ impl fmt::Display for Statement { columns, source, } => { - write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE" } else { "INTO" })?; + write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE TABLE" } else { "INTO" })?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } + if let Some(ref parts) = partitioned { + if !parts.is_empty() { + write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; + } + } write!(f, "{}", source) } Statement::Copy { @@ -864,14 +916,15 @@ impl fmt::Display for Statement { Statement::SetVariable { local, variable, + hivevar, value, - } => write!( - f, - "SET{local} {variable} = {value}", - local = if *local { " LOCAL" } else { "" }, - variable = variable, - value = value - ), + } => { + f.write_str("SET ")?; + if *local { + f.write_str("LOCAL ")?; + } + write!(f, "{hivevar}{name} = {value}", hivevar = if *hivevar { "HIVEVAR:" } else { "" }, name = variable, value = display_comma_separated(value)) + } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { extended, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 316f611cb..69329c582 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -282,6 +282,7 @@ define_keywords!( MODIFIES, MODULE, MONTH, + MSCK, MULTISET, NATIONAL, NATURAL, @@ -324,6 +325,7 @@ define_keywords!( PARQUET, PARTITION, PARTITIONED, + PARTITIONS, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -361,6 +363,7 @@ define_keywords!( REGR_SYY, RELEASE, RENAME, + REPAIR, REPEATABLE, REPLACE, RESTRICT, @@ -414,6 +417,7 @@ define_keywords!( SUCCEEDS, SUM, SYMMETRIC, + SYNC, SYSTEM, SYSTEM_TIME, SYSTEM_USER, diff --git a/src/parser.rs b/src/parser.rs index a8bf344d8..0ec5222a8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -136,6 +136,8 @@ impl<'a> Parser<'a> { Ok(Statement::Query(Box::new(self.parse_query()?))) } Keyword::ANALYZE => Ok(self.parse_analyze()?), + Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -168,6 +170,37 @@ impl<'a> Parser<'a> { } } + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let (mut add, mut drop, mut sync) = (false, false, false); + match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { + Some(Keyword::ADD) => { add = true; } + Some(Keyword::DROP) => { drop = true; } + Some(Keyword::SYNC) => { sync = true; } + _ => () + } + self.expect_keyword(Keyword::PARTITIONS)?; + Ok(Statement::Msck { + repair, table_name, add_partitions: add, drop_partitions: drop, sync_partitions: sync + }) + } + + pub fn parse_truncate(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Ok(Statement::Truncate { + table_name, partitions + }) + } + pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; @@ -179,7 +212,11 @@ impl<'a> Parser<'a> { loop { match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { - Some(Keyword::PARTITION) => partitions = Some(self.parse_comma_separated(Parser::parse_expr)?), + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + }, Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; @@ -949,7 +986,7 @@ impl<'a> Parser<'a> { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - println!("parse_keywords aborting .. did not find {:?}", keyword); + // println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -2094,25 +2131,31 @@ impl<'a> Parser<'a> { } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; - println!("{:?}", value); - Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - variable, - value, - }) + let mut values = vec![]; + loop { + let token = self.peek_token(); + let value = match (self.parse_value(), token) { + (Ok(value), _) => SetVariableValue::Literal(value), + (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), unexpected) => self.expected("variable value", unexpected)?, + }; + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + }) + } } else if variable.value == "TRANSACTION" && modifier.is_none() { - Ok(Statement::SetTransaction { + return Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, }) } else { - self.expected("equals sign or TO", self.peek_token()) + return self.expected("equals sign or TO", self.peek_token()) } } @@ -2403,8 +2446,12 @@ impl<'a> Parser<'a> { } let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let partitioned = if self.parse_keywords(&[Keyword::PARTITION]) { - Some(self.parse_comma_separated(Parser::parse_expr)?) + + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_identifier)?); + self.expect_token(&Token::RParen)?; + r } else { None }; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index de95e6985..837d48567 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -21,14 +21,42 @@ use sqlparser::test_utils::*; use sqlparser::parser::ParserError; #[test] -fn parse_table_create() -> Result<(), ParserError> { +fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); hive().verified_stmt(iof); +} + +#[test] +fn parse_insert_overwrite() { + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a, b) SELECT a, b, c FROM db.table"#; + hive().verified_stmt(insert_partitions); +} + +#[test] +fn test_truncate() { + let truncate = r#"TRUNCATE TABLE db.table"#; + hive().verified_stmt(truncate); +} - Ok(()) +#[test] +fn parse_analyze() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_msck() { + let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + hive().verified_stmt(msck); +} + +#[test] +fn parse_set() { + let set = "SET HIVEVAR:name = a, b, c_d"; + hive().verified_stmt(set); } fn hive() -> TestedDialects { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 20f186100..893ddfff2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -364,8 +364,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); @@ -374,8 +375,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(Value::SingleQuotedString("b".into())), + value: vec![SetVariableValue::Literal(Value::SingleQuotedString("b".into()))], } ); @@ -384,8 +386,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(number("0")), + value: vec![SetVariableValue::Literal(number("0"))], } ); @@ -394,8 +397,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("DEFAULT".into()), + value: vec![SetVariableValue::Ident("DEFAULT".into())], } ); @@ -404,8 +408,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: true, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); From ff19e200c3257ea8b61945ffba635432403c2d35 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 22 Jul 2020 14:26:14 -0400 Subject: [PATCH 47/97] Speculative WITH (...) INSERT --- src/ast/query.rs | 2 ++ src/parser.rs | 20 ++++++++++++-------- tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index b28fa92a9..2c4280ada 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -73,6 +73,7 @@ pub enum SetExpr { right: Box, }, Values(Values), + Insert(Statement), // TODO: ANSI SQL supports `TABLE` here. } @@ -82,6 +83,7 @@ impl fmt::Display for SetExpr { SetExpr::Select(s) => write!(f, "{}", s), SetExpr::Query(q) => write!(f, "({})", q), SetExpr::Values(v) => write!(f, "{}", v), + SetExpr::Insert(v) => write!(f, "{}", v), SetExpr::SetOperation { left, right, diff --git a/src/parser.rs b/src/parser.rs index 0ec5222a8..ea2c7d865 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2002,11 +2002,16 @@ impl<'a> Parser<'a> { name: self.parse_identifier()?, columns: self.parse_parenthesized_column_list(Optional)?, }; - self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Cte { alias, query }) + + if self.parse_keyword(Keyword::AS) { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Cte { alias, query }) + } else { + let query = self.parse_query()?; + Ok(Cte { alias, query }) + } } /// Parse a "query body", which is an expression with roughly the @@ -2441,9 +2446,8 @@ impl<'a> Parser<'a> { pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; let overwrite = if action == Keyword::OVERWRITE { true } else { false }; - if overwrite { - self.expect_keyword(Keyword::TABLE)?; - } + // Hive lets you put table here regardless + self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 837d48567..403125743 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -59,6 +59,12 @@ fn parse_set() { hive().verified_stmt(set); } +#[test] +fn parse_with_cte() { + let with = "WITH a AS (SELECT * FROM table) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM a"; + hive().verified_stmt(with); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From a290f5f4a71a8f84843b545353f96b12235d1ae7 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:12:06 -0400 Subject: [PATCH 48/97] Speculative debugging assistance --- src/parser.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index ea2c7d865..a1ffe3e75 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -409,7 +409,12 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => self.expected("an expression", unexpected), + unexpected => { + self.prev_token(); + self.prev_token(); + self.prev_token(); + self.expected(format!("an expression: {} - {} {} {}", self.index, self.next_token().to_string(), self.next_token().to_string(), self.next_token().to_string()).as_str(), unexpected) + }, }?; if self.parse_keyword(Keyword::COLLATE) { From 2a567c1fd7cfafa54d954b9daad16a8b023ed00c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:14:24 -0400 Subject: [PATCH 49/97] Speculative debugging assistance --- src/parser.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index a1ffe3e75..28e85c518 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -413,7 +413,10 @@ impl<'a> Parser<'a> { self.prev_token(); self.prev_token(); self.prev_token(); - self.expected(format!("an expression: {} - {} {} {}", self.index, self.next_token().to_string(), self.next_token().to_string(), self.next_token().to_string()).as_str(), unexpected) + let t1 = self.next_token().to_string(); + let t2 = self.next_token().to_string(); + let t3 = self.next_token().to_string(); + self.expected(format!("an expression: {} - {} {} {}", self.index, &t1, &t2, &t3).as_str(), unexpected) }, }?; From 39a73d2ade5d3891a5432374c5d785946526b58f Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:52:45 -0400 Subject: [PATCH 50/97] Remove debugging assistance and speculative double equals fix --- src/parser.rs | 2 +- src/tokenizer.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 28e85c518..a5b2b3272 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -903,7 +903,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), + Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq => Ok(20), Token::Pipe => Ok(21), Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 70587f18b..d86c5ae6d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -48,6 +48,8 @@ pub enum Token { Comma, /// Whitespace (space, tab, etc) Whitespace(Whitespace), + /// Double equals sign `==` + DoubleEq, /// Equality operator `=` Eq, /// Not Equals operator `<>` (or `!=` in some dialects) @@ -134,6 +136,7 @@ impl fmt::Display for Token { Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), + Token::DoubleEq => f.write_str("=="), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), From 5bb533f06597165e83aefcf44c4ae142ba87f38b Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 14:55:05 -0400 Subject: [PATCH 51/97] Remove debugging assistance and speculative double equals fix --- src/parser.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index a5b2b3272..997204b1e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -56,6 +56,7 @@ pub enum IsLateral { } use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; +use crate::ast::Expr::BinaryOp; impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -748,6 +749,7 @@ impl<'a> Parser<'a> { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => Some(BinaryOperator::Gt), From df7c30a0c3a447ca320051aed61d882bb51d20d2 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 15:56:23 -0400 Subject: [PATCH 52/97] Fix partition clauses with values --- src/ast/mod.rs | 2 +- src/parser.rs | 2 +- tests/sqlparser_hive.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f6fb91f14..c3efa7087 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -466,7 +466,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option> }, Copy { /// TABLE diff --git a/src/parser.rs b/src/parser.rs index 997204b1e..eebbb5f69 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2463,7 +2463,7 @@ impl<'a> Parser<'a> { let partitioned = if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; - let r = Some(self.parse_comma_separated(Parser::parse_identifier)?); + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); self.expect_token(&Token::RParen)?; r } else { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 403125743..cbedfc0fc 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -31,7 +31,7 @@ fn parse_table_create() { #[test] fn parse_insert_overwrite() { - let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a, b) SELECT a, b, c FROM db.table"#; + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; hive().verified_stmt(insert_partitions); } From bb6f7004f3a0907f3fc4539dc74422cbcd3b3b9c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 16:13:30 -0400 Subject: [PATCH 53/97] Add spaceship operator --- src/ast/operator.rs | 2 ++ src/parser.rs | 15 ++++----------- src/tokenizer.rs | 11 ++++++++++- tests/sqlparser_hive.rs | 10 ++++++++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 57e70982f..732c81232 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -65,6 +65,7 @@ pub enum BinaryOperator { Lt, GtEq, LtEq, + Spaceship, Eq, NotEq, And, @@ -92,6 +93,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Lt => "<", BinaryOperator::GtEq => ">=", BinaryOperator::LtEq => "<=", + BinaryOperator::Spaceship => "<=>", BinaryOperator::Eq => "=", BinaryOperator::NotEq => "<>", BinaryOperator::And => "AND", diff --git a/src/parser.rs b/src/parser.rs index eebbb5f69..01d4a946f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -56,7 +56,7 @@ pub enum IsLateral { } use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; -use crate::ast::Expr::BinaryOp; + impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -410,15 +410,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => { - self.prev_token(); - self.prev_token(); - self.prev_token(); - let t1 = self.next_token().to_string(); - let t2 = self.next_token().to_string(); - let t3 = self.next_token().to_string(); - self.expected(format!("an expression: {} - {} {} {}", self.index, &t1, &t2, &t3).as_str(), unexpected) - }, + unexpected => self.expected("an expression:", unexpected), }?; if self.parse_keyword(Keyword::COLLATE) { @@ -749,6 +741,7 @@ impl<'a> Parser<'a> { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), @@ -905,7 +898,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq => Ok(20), + Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d86c5ae6d..67fd6c395 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -62,6 +62,8 @@ pub enum Token { LtEq, /// Greater Than Or Equals operator `>=` GtEq, + /// Spaceship operator <=> + Spaceship, /// Plus operator `+` Plus, /// Minus operator `-` @@ -137,6 +139,7 @@ impl fmt::Display for Token { Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), Token::DoubleEq => f.write_str("=="), + Token::Spaceship => f.write_str("<=>"), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), @@ -464,7 +467,13 @@ impl<'a> Tokenizer<'a> { '<' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::LtEq), + Some('=') => { + chars.next(); + match chars.peek() { + Some('>') => self.consume_and_return(chars, Token::Spaceship), + _ => Ok(Some(Token::LtEq)) + } + }, Some('>') => self.consume_and_return(chars, Token::Neq), Some('<') => self.consume_and_return(chars, Token::ShiftLeft), _ => Ok(Some(Token::Lt)), diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index cbedfc0fc..51660940a 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,10 +15,10 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). -use sqlparser::ast::*; + use sqlparser::dialect::{GenericDialect, HiveDialect}; use sqlparser::test_utils::*; -use sqlparser::parser::ParserError; + #[test] fn parse_table_create() { @@ -59,6 +59,12 @@ fn parse_set() { hive().verified_stmt(set); } +#[test] +fn test_spaceship() { + let spaceship = "SELECT * FROM db.table WHERE a <=> b"; + hive().verified_stmt(spaceship); +} + #[test] fn parse_with_cte() { let with = "WITH a AS (SELECT * FROM table) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM a"; From 2cf808ab56bc80de347e5447480a9793a74c899e Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 23 Jul 2020 16:31:25 -0400 Subject: [PATCH 54/97] cargo fmt --- src/ast/mod.rs | 172 +++++++++++++++++++++++++++--------- src/dialect/mod.rs | 4 +- src/parser.rs | 88 ++++++++++++------ src/tokenizer.rs | 4 +- tests/sqlparser_hive.rs | 2 - tests/sqlparser_postgres.rs | 4 +- 6 files changed, 197 insertions(+), 77 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c3efa7087..217b3ccb6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -438,12 +438,12 @@ pub enum Statement { for_columns: bool, cache_metadata: bool, noscan: bool, - compute_statistics: bool + compute_statistics: bool, }, /// Truncate (Hive) Truncate { table_name: ObjectName, - partitions: Option> + partitions: Option>, }, /// Msck (Hive) Msck { @@ -451,7 +451,7 @@ pub enum Statement { repair: bool, add_partitions: bool, drop_partitions: bool, - sync_partitions: bool + sync_partitions: bool, }, /// SELECT Query(Box), @@ -466,7 +466,7 @@ pub enum Statement { /// A SQL query that specifies what to insert source: Box, /// partitioned insert (Hive) - partitioned: Option> + partitioned: Option>, }, Copy { /// TABLE @@ -594,8 +594,9 @@ pub enum Statement { /// CREATE DATABASE CreateDatabase { db_name: ObjectName, - ine: bool, location: Option, - managed_location: Option + ine: bool, + location: Option, + managed_location: Option, }, /// `ASSERT [AS ]` Assert { @@ -627,15 +628,43 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), - Statement::Msck { table_name, repair, add_partitions, drop_partitions, sync_partitions } => { - write!(f, "MSCK {repair}TABLE {table}", repair = if *repair { "REPAIR " } else { "" }, table = table_name)?; - write!(f, "{add}{drop}{sync}", - add = if *add_partitions { " ADD PARTITIONS" } else { "" }, - drop = if *drop_partitions { " DROP PARTITIONS" } else { "" }, - sync = if *sync_partitions { " SYNC PARTITIONS" } else { "" } + Statement::Msck { + table_name, + repair, + add_partitions, + drop_partitions, + sync_partitions, + } => { + write!( + f, + "MSCK {repair}TABLE {table}", + repair = if *repair { "REPAIR " } else { "" }, + table = table_name + )?; + write!( + f, + "{add}{drop}{sync}", + add = if *add_partitions { + " ADD PARTITIONS" + } else { + "" + }, + drop = if *drop_partitions { + " DROP PARTITIONS" + } else { + "" + }, + sync = if *sync_partitions { + " SYNC PARTITIONS" + } else { + "" + } ) } - Statement::Truncate { table_name, partitions } => { + Statement::Truncate { + table_name, + partitions, + } => { write!(f, "TRUNCATE TABLE {}", table_name)?; if let Some(ref parts) = partitions { if !parts.is_empty() { @@ -644,7 +673,14 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Analyze { table_name, partitions, for_columns: _, cache_metadata, noscan, compute_statistics } => { + Statement::Analyze { + table_name, + partitions, + for_columns: _, + cache_metadata, + noscan, + compute_statistics, + } => { write!(f, "ANALYZE TABLE {}", table_name)?; if let Some(ref parts) = partitions { if !parts.is_empty() { @@ -670,7 +706,16 @@ impl fmt::Display for Statement { columns, source, } => { - write!(f, "INSERT {act} {table_name} ", table_name = table_name, act = if *overwrite { "OVERWRITE TABLE" } else { "INTO" })?; + write!( + f, + "INSERT {act} {table_name} ", + table_name = table_name, + act = if *overwrite { + "OVERWRITE TABLE" + } else { + "INTO" + } + )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } @@ -730,7 +775,12 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateDatabase { db_name, ine, location, managed_location } => { + Statement::CreateDatabase { + db_name, + ine, + location, + managed_location, + } => { write!(f, "CREATE")?; if *ine { write!(f, " IF NOT EXISTS")?; @@ -813,8 +863,14 @@ impl fmt::Display for Statement { } match hive_distribution { - HiveDistributionStyle::PARTITIONED { columns } => write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?, - HiveDistributionStyle::CLUSTERED { columns, sorted_by, num_buckets } => { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))? + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; if !sorted_by.is_empty() { write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; @@ -823,26 +879,50 @@ impl fmt::Display for Statement { write!(f, " INTO {} BUCKETS", num_buckets)?; } } - HiveDistributionStyle::SKEWED { columns, on, stored_as_directories } => { - write!(f, " SKEWED BY ({})) ON ({})", display_comma_separated(&columns), display_comma_separated(&on))?; + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(&columns), + display_comma_separated(&on) + )?; if *stored_as_directories { write!(f, " STORED AS DIRECTORIES")?; } - }, - _ => () + } + _ => (), } - if let Some(HiveFormat { row_format, storage, location }) = hive_formats { - + if let Some(HiveFormat { + row_format, + storage, + location, + }) = hive_formats + { match row_format { - Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{}'", class)?, + Some(HiveRowFormat::SERDE { class }) => { + write!(f, " ROW FORMAT SERDE '{}'", class)? + } Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, - None => () + None => (), } match storage { - Some(HiveIOFormat::IOF { input_format, output_format }) => write!(f, " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format)?, - Some(HiveIOFormat::FileFormat { format }) => write!(f, " STORED AS {}", format)?, - None => () + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", + input_format, output_format + )?, + Some(HiveIOFormat::FileFormat { format }) => { + write!(f, " STORED AS {}", format)? + } + None => (), } if let Some(loc) = location { write!(f, " LOCATION '{}'", loc)?; @@ -923,7 +1003,13 @@ impl fmt::Display for Statement { if *local { f.write_str("LOCAL ")?; } - write!(f, "{hivevar}{name} = {value}", hivevar = if *hivevar { "HIVEVAR:" } else { "" }, name = variable, value = display_comma_separated(value)) + write!( + f, + "{hivevar}{name} = {value}", + hivevar = if *hivevar { "HIVEVAR:" } else { "" }, + name = variable, + value = display_comma_separated(value) + ) } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { @@ -1190,40 +1276,38 @@ impl fmt::Display for ObjectType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveDistributionStyle { PARTITIONED { - columns: Vec + columns: Vec, }, CLUSTERED { columns: Vec, sorted_by: Vec, - num_buckets: i32 + num_buckets: i32, }, SKEWED { columns: Vec, on: Vec, - stored_as_directories: bool + stored_as_directories: bool, }, - NONE + NONE, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveRowFormat { - SERDE { - class: String - }, - DELIMITED + SERDE { class: String }, + DELIMITED, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveIOFormat { IOF { - input_format: Expr, - output_format: Expr, + input_format: Expr, + output_format: Expr, }, FileFormat { - format: FileFormat - } + format: FileFormat, + }, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -1231,7 +1315,7 @@ pub enum HiveIOFormat { pub struct HiveFormat { pub row_format: Option, pub storage: Option, - pub location: Option + pub location: Option, } impl Default for HiveFormat { @@ -1239,7 +1323,7 @@ impl Default for HiveFormat { HiveFormat { row_format: None, location: None, - storage: None + storage: None, } } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 091e95594..c7041ad93 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -12,25 +12,25 @@ mod ansi; mod generic; +mod hive; pub mod keywords; mod mssql; mod mysql; mod postgresql; mod snowflake; mod sqlite; -mod hive; use std::any::{Any, TypeId}; use std::fmt::Debug; pub use self::ansi::AnsiDialect; pub use self::generic::GenericDialect; +pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; -pub use self::hive::HiveDialect; /// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates /// to `true` iff `parser.dialect` is one of the `Dialect`s specified. diff --git a/src/parser.rs b/src/parser.rs index 01d4a946f..04af6902f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,7 +57,6 @@ pub enum IsLateral { use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; - impl From for ParserError { fn from(e: TokenizerError) -> Self { ParserError::TokenizerError(format!( @@ -177,14 +176,24 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name()?; let (mut add, mut drop, mut sync) = (false, false, false); match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => { add = true; } - Some(Keyword::DROP) => { drop = true; } - Some(Keyword::SYNC) => { sync = true; } - _ => () + Some(Keyword::ADD) => { + add = true; + } + Some(Keyword::DROP) => { + drop = true; + } + Some(Keyword::SYNC) => { + sync = true; + } + _ => (), } self.expect_keyword(Keyword::PARTITIONS)?; Ok(Statement::Msck { - repair, table_name, add_partitions: add, drop_partitions: drop, sync_partitions: sync + repair, + table_name, + add_partitions: add, + drop_partitions: drop, + sync_partitions: sync, }) } @@ -198,7 +207,8 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; } Ok(Statement::Truncate { - table_name, partitions + table_name, + partitions, }) } @@ -212,12 +222,18 @@ impl<'a> Parser<'a> { let mut compute_statistics = false; loop { - match self.parse_one_of_keywords(&[Keyword::PARTITION, Keyword::FOR, Keyword::CACHE, Keyword::NOSCAN, Keyword::COMPUTE]) { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { Some(Keyword::PARTITION) => { self.expect_token(&Token::LParen)?; partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); self.expect_token(&Token::RParen)?; - }, + } Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; @@ -231,7 +247,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::STATISTICS)?; compute_statistics = true } - _ => break + _ => break, } } @@ -241,7 +257,7 @@ impl<'a> Parser<'a> { partitions, cache_metadata, noscan, - compute_statistics + compute_statistics, }) } @@ -898,7 +914,14 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq | Token::DoubleEq | Token::Spaceship => Ok(20), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), @@ -1175,11 +1198,18 @@ impl<'a> Parser<'a> { loop { match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), - Some(Keyword::MANAGEDLOCATION) => managed_location = Some(self.parse_literal_string()?), - _ => break + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, } } - Ok(Statement::CreateDatabase { db_name, ine, location, managed_location }) + Ok(Statement::CreateDatabase { + db_name, + ine, + location, + managed_location, + }) } pub fn parse_create_external_table( @@ -1302,9 +1332,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_column_def)?; self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { - columns - }) + Ok(HiveDistributionStyle::PARTITIONED { columns }) } else { Ok(HiveDistributionStyle::NONE) } @@ -1323,7 +1351,10 @@ impl<'a> Parser<'a> { let input_format = self.parse_expr()?; self.expect_keyword(Keyword::OUTPUTFORMAT)?; let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF {input_format, output_format}); + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); } else { let format = self.parse_file_format()?; hive_format.storage = Some(HiveIOFormat::FileFormat { format }); @@ -1331,9 +1362,9 @@ impl<'a> Parser<'a> { } Some(Keyword::LOCATION) => { hive_format.location = Some(self.parse_literal_string()?); - }, + } None => break, - _ => break + _ => break, } } @@ -2133,7 +2164,8 @@ impl<'a> Parser<'a> { } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); if let Some(Keyword::HIVEVAR) = modifier { self.expect_token(&Token::Colon)?; } @@ -2156,14 +2188,14 @@ impl<'a> Parser<'a> { hivevar: Some(Keyword::HIVEVAR) == modifier, variable, value: values, - }) + }); } } else if variable.value == "TRANSACTION" && modifier.is_none() { return Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, - }) + }); } else { - return self.expected("equals sign or TO", self.peek_token()) + return self.expected("equals sign or TO", self.peek_token()); } } @@ -2448,7 +2480,11 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = if action == Keyword::OVERWRITE { true } else { false }; + let overwrite = if action == Keyword::OVERWRITE { + true + } else { + false + }; // Hive lets you put table here regardless self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 67fd6c395..3baae9071 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -471,9 +471,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('>') => self.consume_and_return(chars, Token::Spaceship), - _ => Ok(Some(Token::LtEq)) + _ => Ok(Some(Token::LtEq)), } - }, + } Some('>') => self.consume_and_return(chars, Token::Neq), Some('<') => self.consume_and_return(chars, Token::ShiftLeft), _ => Ok(Some(Token::Lt)), diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 51660940a..cdc4257d5 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,11 +15,9 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). - use sqlparser::dialect::{GenericDialect, HiveDialect}; use sqlparser::test_utils::*; - #[test] fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 893ddfff2..2abd8ae9b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -377,7 +377,9 @@ fn parse_set() { local: false, hivevar: false, variable: "a".into(), - value: vec![SetVariableValue::Literal(Value::SingleQuotedString("b".into()))], + value: vec![SetVariableValue::Literal(Value::SingleQuotedString( + "b".into() + ))], } ); From c671a292c048c656ef5464e6999c4121d47d214d Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 16:28:57 -0400 Subject: [PATCH 55/97] fix some lints and tests, add purge on tables ad proper formatting for INSERT INTO TABLE --- src/ast/mod.rs | 19 ++++++++++++------- src/ast/query.rs | 1 + src/dialect/keywords.rs | 1 + src/parser.rs | 17 ++++++++--------- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_hive.rs | 6 ++++++ 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 217b3ccb6..389f06947 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -467,6 +467,8 @@ pub enum Statement { source: Box, /// partitioned insert (Hive) partitioned: Option>, + /// whether the insert has the table keyword (Hive) + table: bool, }, Copy { /// TABLE @@ -553,6 +555,9 @@ pub enum Statement { /// Whether `CASCADE` was specified. This will be `false` when /// `RESTRICT` or no drop behavior at all was specified. cascade: bool, + /// Hive allows you specify whether the table's stored data will be + /// deleted along with the dropped table + purge: bool, }, /// SET /// @@ -705,16 +710,14 @@ impl fmt::Display for Statement { partitioned, columns, source, + table, } => { write!( f, - "INSERT {act} {table_name} ", + "INSERT {act}{tbl} {table_name} ", table_name = table_name, - act = if *overwrite { - "OVERWRITE TABLE" - } else { - "INTO" - } + act = if *overwrite { "OVERWRITE" } else { "INTO" }, + tbl = if *table { " TABLE" } else { "" } )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; @@ -985,13 +988,15 @@ impl fmt::Display for Statement { if_exists, names, cascade, + purge, } => write!( f, - "DROP {}{} {}{}", + "DROP {}{} {}{}{}", object_type, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), if *cascade { " CASCADE" } else { "" }, + if *purge { " PURGE" } else { "" } ), Statement::SetVariable { local, diff --git a/src/ast/query.rs b/src/ast/query.rs index 2c4280ada..79197ec18 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -57,6 +57,7 @@ impl fmt::Display for Query { /// A node in a tree, representing a "query body" expression, roughly: /// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetExpr { diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 69329c582..5705d5d76 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -341,6 +341,7 @@ define_keywords!( PREPARE, PRIMARY, PROCEDURE, + PURGE, RANGE, RANK, RCFILE, diff --git a/src/parser.rs b/src/parser.rs index 04af6902f..cce6585ee 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1298,6 +1298,7 @@ impl<'a> Parser<'a> { let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); } @@ -1306,6 +1307,7 @@ impl<'a> Parser<'a> { if_exists, names, cascade, + purge, }) } @@ -2191,11 +2193,11 @@ impl<'a> Parser<'a> { }); } } else if variable.value == "TRANSACTION" && modifier.is_none() { - return Ok(Statement::SetTransaction { + Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, - }); + }) } else { - return self.expected("equals sign or TO", self.peek_token()); + self.expected("equals sign or TO", self.peek_token()) } } @@ -2480,13 +2482,9 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = if action == Keyword::OVERWRITE { - true - } else { - false - }; + let overwrite = action == Keyword::OVERWRITE; // Hive lets you put table here regardless - self.parse_keyword(Keyword::TABLE); + let table = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; @@ -2505,6 +2503,7 @@ impl<'a> Parser<'a> { partitioned, columns, source, + table, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6f7d7fa47..f376a67cb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2751,6 +2751,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(false, if_exists); assert_eq!(ObjectType::Table, object_type); @@ -2770,6 +2771,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(true, if_exists); assert_eq!(ObjectType::Table, object_type); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index cdc4257d5..5e083c745 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -69,6 +69,12 @@ fn parse_with_cte() { hive().verified_stmt(with); } +#[test] +fn drop_table_purge() { + let purge = "DROP TABLE db.table_name PURGE"; + hive().verified_stmt(purge); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 00b5d93d63f01f696a87db1427f7fe01776f95d8 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:18:34 -0400 Subject: [PATCH 56/97] Fix a lint, add CREATE TABLE ... LIKE ... --- src/ast/mod.rs | 10 ++++++++-- src/parser.rs | 5 +++++ tests/sqlparser_hive.rs | 12 ++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 389f06947..89d21c0ab 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -521,6 +521,7 @@ pub enum Statement { location: Option, query: Option>, without_rowid: bool, + like: Option }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -834,6 +835,7 @@ impl fmt::Display for Statement { location, query, without_rowid, + like } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -856,7 +858,7 @@ impl fmt::Display for Statement { write!(f, ", ")?; } write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() { + } else if query.is_none() && like.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -865,9 +867,13 @@ impl fmt::Display for Statement { write!(f, " WITHOUT ROWID")?; } + // Only for Hive + if let Some(l) = like { + write!(f, " LIKE {}", l)?; + } match hive_distribution { HiveDistributionStyle::PARTITIONED { columns } => { - write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))? + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?; } HiveDistributionStyle::CLUSTERED { columns, diff --git a/src/parser.rs b/src/parser.rs index cce6585ee..3252f5354 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1240,6 +1240,7 @@ impl<'a> Parser<'a> { location: Some(location), query: None, without_rowid: false, + like: None }) } @@ -1387,6 +1388,9 @@ impl<'a> Parser<'a> { pub fn parse_create_table(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; + let like = if self.parse_keyword(Keyword::LIKE) { + self.parse_object_name().ok() + } else { None }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; @@ -1419,6 +1423,7 @@ impl<'a> Parser<'a> { location: None, query, without_rowid, + like }) } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 5e083c745..2079c35f1 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -75,14 +75,14 @@ fn drop_table_purge() { hive().verified_stmt(purge); } -fn hive() -> TestedDialects { - TestedDialects { - dialects: vec![Box::new(HiveDialect {})], - } +#[test] +fn create_table_like() { + let like = "CREATE TABLE db.table_name LIKE db.other_table"; + hive().verified_stmt(like); } -fn hive_and_generic() -> TestedDialects { +fn hive() -> TestedDialects { TestedDialects { - dialects: vec![Box::new(HiveDialect {}), Box::new(GenericDialect {})], + dialects: vec![Box::new(HiveDialect {})], } } From 695b1a0f8876d56cb47c744856c96fb199eb34f9 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:20:15 -0400 Subject: [PATCH 57/97] cargo fmt --- src/ast/mod.rs | 4 ++-- src/parser.rs | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 89d21c0ab..f539cd0ec 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -521,7 +521,7 @@ pub enum Statement { location: Option, query: Option>, without_rowid: bool, - like: Option + like: Option, }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -835,7 +835,7 @@ impl fmt::Display for Statement { location, query, without_rowid, - like + like, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: diff --git a/src/parser.rs b/src/parser.rs index 3252f5354..f9f019877 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1240,7 +1240,7 @@ impl<'a> Parser<'a> { location: Some(location), query: None, without_rowid: false, - like: None + like: None, }) } @@ -1390,7 +1390,9 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name()?; let like = if self.parse_keyword(Keyword::LIKE) { self.parse_object_name().ok() - } else { None }; + } else { + None + }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; @@ -1423,7 +1425,7 @@ impl<'a> Parser<'a> { location: None, query, without_rowid, - like + like, }) } From 6cc9056c4d5a97272e83239e6d57b20696910975 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 24 Jul 2020 17:20:56 -0400 Subject: [PATCH 58/97] Fixed a lint --- tests/sqlparser_hive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 2079c35f1..a8617eb6d 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,7 +15,7 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). -use sqlparser::dialect::{GenericDialect, HiveDialect}; +use sqlparser::dialect::HiveDialect; use sqlparser::test_utils::*; #[test] From c22e7fb31a1a7cbf595ab0bce75b773780d0d930 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sat, 25 Jul 2020 14:55:42 -0400 Subject: [PATCH 59/97] Address some PR changes --- src/ast/data_type.rs | 2 +- src/ast/mod.rs | 55 ++++++++++++++++++++++---------------------- src/parser.rs | 26 ++++++++------------- 3 files changed, 37 insertions(+), 46 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index cc8bd3260..388703e76 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,7 +61,7 @@ pub enum DataType { Regclass, /// Text Text, - /// String (Hive) + /// String String, /// Bytea Bytea, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f539cd0ec..a83676744 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -426,6 +426,24 @@ impl fmt::Display for WindowFrameBound { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum PartitionAction { + ADD, + DROP, + SYNC, +} + +impl fmt::Display for PartitionAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PartitionAction::SYNC => f.write_str("SYNC PARTITIONS"), + PartitionAction::DROP => f.write_str("DROP PARTITIONS"), + PartitionAction::ADD => f.write_str("ADD PARTITIONS"), + } + } +} + /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -449,9 +467,7 @@ pub enum Statement { Msck { table_name: ObjectName, repair: bool, - add_partitions: bool, - drop_partitions: bool, - sync_partitions: bool, + partition_action: Option, }, /// SELECT Query(Box), @@ -600,7 +616,7 @@ pub enum Statement { /// CREATE DATABASE CreateDatabase { db_name: ObjectName, - ine: bool, + if_not_exists: bool, location: Option, managed_location: Option, }, @@ -637,9 +653,7 @@ impl fmt::Display for Statement { Statement::Msck { table_name, repair, - add_partitions, - drop_partitions, - sync_partitions, + partition_action, } => { write!( f, @@ -647,25 +661,10 @@ impl fmt::Display for Statement { repair = if *repair { "REPAIR " } else { "" }, table = table_name )?; - write!( - f, - "{add}{drop}{sync}", - add = if *add_partitions { - " ADD PARTITIONS" - } else { - "" - }, - drop = if *drop_partitions { - " DROP PARTITIONS" - } else { - "" - }, - sync = if *sync_partitions { - " SYNC PARTITIONS" - } else { - "" - } - ) + if let Some(pa) = partition_action { + write!(f, " {}", pa)?; + } + Ok(()) } Statement::Truncate { table_name, @@ -781,12 +780,12 @@ impl fmt::Display for Statement { } Statement::CreateDatabase { db_name, - ine, + if_not_exists, location, managed_location, } => { write!(f, "CREATE")?; - if *ine { + if *if_not_exists { write!(f, " IF NOT EXISTS")?; } write!(f, " {}", db_name)?; diff --git a/src/parser.rs b/src/parser.rs index f9f019877..b417c5420 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -174,26 +174,18 @@ impl<'a> Parser<'a> { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; - let (mut add, mut drop, mut sync) = (false, false, false); - match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => { - add = true; - } - Some(Keyword::DROP) => { - drop = true; - } - Some(Keyword::SYNC) => { - sync = true; - } - _ => (), - } + let partition_action = + match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { + Some(Keyword::ADD) => Some(PartitionAction::ADD), + Some(Keyword::DROP) => Some(PartitionAction::DROP), + Some(Keyword::SYNC) => Some(PartitionAction::SYNC), + _ => None, + }; self.expect_keyword(Keyword::PARTITIONS)?; Ok(Statement::Msck { repair, table_name, - add_partitions: add, - drop_partitions: drop, - sync_partitions: sync, + partition_action, }) } @@ -1206,7 +1198,7 @@ impl<'a> Parser<'a> { } Ok(Statement::CreateDatabase { db_name, - ine, + if_not_exists: ine, location, managed_location, }) From 2a53953be27f4fc28cce46df55b3113ec7207654 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 17:48:42 -0400 Subject: [PATCH 60/97] Disabled hive dialect from SQL common for now, added to the dialect to allow alises starting with numbers --- src/ast/mod.rs | 12 ++++++------ src/dialect/hive.rs | 2 +- src/parser.rs | 6 +++--- tests/sqlparser_hive.rs | 6 ++++++ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a83676744..8a6905337 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -428,18 +428,18 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub enum PartitionAction { +pub enum AddDropSync { ADD, DROP, SYNC, } -impl fmt::Display for PartitionAction { +impl fmt::Display for AddDropSync { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - PartitionAction::SYNC => f.write_str("SYNC PARTITIONS"), - PartitionAction::DROP => f.write_str("DROP PARTITIONS"), - PartitionAction::ADD => f.write_str("ADD PARTITIONS"), + AddDropSync::SYNC => f.write_str("SYNC PARTITIONS"), + AddDropSync::DROP => f.write_str("DROP PARTITIONS"), + AddDropSync::ADD => f.write_str("ADD PARTITIONS"), } } } @@ -467,7 +467,7 @@ pub enum Statement { Msck { table_name: ObjectName, repair: bool, - partition_action: Option, + partition_action: Option, }, /// SELECT Query(Box), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 71a5eee26..0513b1175 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index b417c5420..c26ec0901 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -176,9 +176,9 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name()?; let partition_action = match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => Some(PartitionAction::ADD), - Some(Keyword::DROP) => Some(PartitionAction::DROP), - Some(Keyword::SYNC) => Some(PartitionAction::SYNC), + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), _ => None, }; self.expect_keyword(Keyword::PARTITIONS)?; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a8617eb6d..b4ef30b96 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -81,6 +81,12 @@ fn create_table_like() { hive().verified_stmt(like); } +#[test] +fn test_identifier() { + let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; + hive().verified_stmt(between); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 7eab40a6c326bf0c6c697a2743a2e544002706e9 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 18:01:24 -0400 Subject: [PATCH 61/97] Cargo fmt --- src/dialect/hive.rs | 5 ++++- src/parser.rs | 9 ++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 0513b1175..c09570c59 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '$' + || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index c26ec0901..3f3811c6f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1181,7 +1181,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_database(&mut self) -> Result { let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let db_name = self.parse_object_name()?; @@ -1204,10 +1203,10 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; From 0d6abd42c15edce784870b2284e2960354d3ee12 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 27 Jul 2020 23:46:15 -0400 Subject: [PATCH 62/97] Support for ALTER TABLE with PARTITION --- src/ast/ddl.rs | 48 ++++++++++++++++++++++++++++++--- src/parser.rs | 59 ++++++++++++++++++++++++++++++++--------- tests/sqlparser_hive.rs | 18 +++++++++++++ 3 files changed, 110 insertions(+), 15 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 847ee71a3..111fc1837 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -26,31 +26,64 @@ pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), /// `ADD [ COLUMN ] ` - AddColumn { column_def: ColumnDef }, + AddColumn { + column_def: ColumnDef, + }, /// TODO: implement `DROP CONSTRAINT ` - DropConstraint { name: Ident }, + DropConstraint { + name: Ident, + }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ CASCADE ]` DropColumn { column_name: Ident, if_exists: bool, cascade: bool, }, + /// `RENAME TO PARTITION (partition=val)` + RenamePartitions { + old_partitions: Vec, + new_partitions: Vec, + }, + /// Add Partitions + AddPartitions { + if_not_exists: bool, + new_partitions: Vec, + }, + DropPartitions { + partitions: Vec, + }, /// `RENAME [ COLUMN ] TO ` RenameColumn { old_column_name: Ident, new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { + table_name: Ident, + }, } impl fmt::Display for AlterTableOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, + } => write!( + f, + "ADD{ine} PARTITION ({})", + display_comma_separated(new_partitions), + ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } + ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {}", c), AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } + AlterTableOperation::DropPartitions { partitions } => write!( + f, + "DROP PARTITION ({})", + display_comma_separated(partitions) + ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { column_name, @@ -63,6 +96,15 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::RenamePartitions { + old_partitions, + new_partitions, + } => write!( + f, + "PARTITION ({}) RENAME TO PARTITION ({})", + display_comma_separated(old_partitions), + display_comma_separated(new_partitions) + ), AlterTableOperation::RenameColumn { old_column_name, new_column_name, diff --git a/src/parser.rs b/src/parser.rs index 3f3811c6f..691e81abe 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1636,9 +1636,21 @@ impl<'a> Parser<'a> { if let Some(constraint) = self.parse_optional_table_constraint()? { AlterTableOperation::AddConstraint(constraint) } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { column_def } + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { column_def } + } } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { @@ -1655,17 +1667,40 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::DROP) { - let _ = self.parse_keyword(Keyword::COLUMN); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { partitions } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } } else { - return self.expected("ADD, RENAME, or DROP after ALTER TABLE", self.peek_token()); + return self.expected( + "ADD, RENAME, PARTITION or DROP after ALTER TABLE", + self.peek_token(), + ); }; Ok(Statement::AlterTable { name: table_name, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index b4ef30b96..23b50d3e0 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -87,6 +87,24 @@ fn test_identifier() { hive().verified_stmt(between); } +#[test] +fn test_alter_partition() { + let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)"; + hive().verified_stmt(alter); +} + +#[test] +fn test_add_partition() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)"; + hive().verified_stmt(add); +} + +#[test] +fn test_drop_partition() { + let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; + hive().verified_stmt(drop); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 38b4539c6fc7be0eabd7fb52970d99328f768d41 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 00:06:45 -0400 Subject: [PATCH 63/97] Support for ALTER TABLE DROP IF EXISTS --- src/ast/ddl.rs | 23 +++++++++++------------ src/parser.rs | 15 +++++++++++++-- tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 111fc1837..3ed2a6918 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -26,13 +26,9 @@ pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), /// `ADD [ COLUMN ] ` - AddColumn { - column_def: ColumnDef, - }, + AddColumn { column_def: ColumnDef }, /// TODO: implement `DROP CONSTRAINT ` - DropConstraint { - name: Ident, - }, + DropConstraint { name: Ident }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ CASCADE ]` DropColumn { column_name: Ident, @@ -51,6 +47,7 @@ pub enum AlterTableOperation { }, DropPartitions { partitions: Vec, + if_exists: bool, }, /// `RENAME [ COLUMN ] TO ` RenameColumn { @@ -58,9 +55,7 @@ pub enum AlterTableOperation { new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { - table_name: Ident, - }, + RenameTable { table_name: Ident }, } impl fmt::Display for AlterTableOperation { @@ -79,10 +74,14 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } - AlterTableOperation::DropPartitions { partitions } => write!( + AlterTableOperation::DropPartitions { + partitions, + if_exists, + } => write!( f, - "DROP PARTITION ({})", - display_comma_separated(partitions) + "DROP{ie} PARTITION ({})", + display_comma_separated(partitions), + ie = if *if_exists { " IF EXISTS" } else { "" } ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { diff --git a/src/parser.rs b/src/parser.rs index 691e81abe..bf724c68a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1667,11 +1667,22 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { partitions } + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } } else { let _ = self.parse_keyword(Keyword::COLUMN); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 23b50d3e0..e8790d5bf 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -105,6 +105,12 @@ fn test_drop_partition() { hive().verified_stmt(drop); } +#[test] +fn test_drop_if_exists() { + let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')"; + hive().verified_stmt(drop); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From c3eb4d405ff459df3b9d72f14ea5f9de3e5f57f3 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 10:02:06 -0400 Subject: [PATCH 64/97] Support CLUSTER BY and JOIN with no join condition --- src/ast/query.rs | 10 ++++++++++ src/dialect/keywords.rs | 3 +++ src/parser.rs | 10 +++++++++- tests/sqlparser_hive.rs | 12 ++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 79197ec18..b33f6940d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -133,6 +133,8 @@ pub struct Select { pub selection: Option, /// GROUP BY pub group_by: Vec, + /// CLUSTER BY (Hive) + pub cluster_by: Vec, /// HAVING pub having: Option, } @@ -153,6 +155,13 @@ impl fmt::Display for Select { if !self.group_by.is_empty() { write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; } + if !self.cluster_by.is_empty() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(&self.cluster_by) + )?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } @@ -420,6 +429,7 @@ pub enum JoinConstraint { On(Expr), Using(Vec), Natural, + None, } /// An `ORDER BY` expression diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 5705d5d76..0305aa591 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -121,6 +121,7 @@ define_keywords!( CHECK, CLOB, CLOSE, + CLUSTER, COALESCE, COLLATE, COLLECT, @@ -509,6 +510,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::RIGHT, Keyword::NATURAL, Keyword::USING, + Keyword::CLUSTER, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -530,6 +532,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::UNION, Keyword::EXCEPT, Keyword::INTERSECT, + Keyword::CLUSTER, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/parser.rs b/src/parser.rs index bf724c68a..424f69267 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2192,6 +2192,12 @@ impl<'a> Parser<'a> { vec![] }; + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2205,6 +2211,7 @@ impl<'a> Parser<'a> { from, selection, group_by, + cluster_by, having, }) } @@ -2519,7 +2526,8 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(JoinConstraint::Using(columns)) } else { - self.expected("ON, or USING after JOIN", self.peek_token()) + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) } } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e8790d5bf..22375d5ff 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -111,6 +111,18 @@ fn test_drop_if_exists() { hive().verified_stmt(drop); } +#[test] +fn test_cluster_by() { + let cluster = "SELECT a FROM db.table CLUSTER BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn no_join_condition() { + let join = "SELECT a, b FROM db.table_name JOIN a"; + hive().verified_stmt(join); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 292f00b8497a99050fdea4f0b128d0bef58ac42c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 11:13:29 -0400 Subject: [PATCH 65/97] Support for columns defined after partition --- src/ast/mod.rs | 6 ++++++ src/parser.rs | 5 +++++ tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8a6905337..f911bbac7 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -483,6 +483,8 @@ pub enum Statement { source: Box, /// partitioned insert (Hive) partitioned: Option>, + /// Columns defined after PARTITION + after_columns: Vec, /// whether the insert has the table keyword (Hive) table: bool, }, @@ -709,6 +711,7 @@ impl fmt::Display for Statement { overwrite, partitioned, columns, + after_columns, source, table, } => { @@ -727,6 +730,9 @@ impl fmt::Display for Statement { write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; } } + if !after_columns.is_empty() { + write!(f, "({}) ", display_comma_separated(after_columns))?; + } write!(f, "{}", source) } Statement::Copy { diff --git a/src/parser.rs b/src/parser.rs index 424f69267..bf48937ce 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2548,12 +2548,17 @@ impl<'a> Parser<'a> { } else { None }; + + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + let source = Box::new(self.parse_query()?); Ok(Statement::Insert { table_name, overwrite, partitioned, columns, + after_columns, source, table, }) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 22375d5ff..a686e669f 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -123,6 +123,12 @@ fn no_join_condition() { hive().verified_stmt(join); } +#[test] +fn columns_after_partition() { + let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table"; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 7a3144f4af666c4d2a9afac727ab039c71ccd8ad Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 11:36:05 -0400 Subject: [PATCH 66/97] Support ANALYZE FOR COLUMNS --- src/ast/mod.rs | 9 +++++++-- src/parser.rs | 7 ++++++- tests/sqlparser_hive.rs | 6 ++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f911bbac7..676a9408e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -454,6 +454,7 @@ pub enum Statement { table_name: ObjectName, partitions: Option>, for_columns: bool, + columns: Vec, cache_metadata: bool, noscan: bool, compute_statistics: bool, @@ -683,7 +684,8 @@ impl fmt::Display for Statement { Statement::Analyze { table_name, partitions, - for_columns: _, + for_columns, + columns, cache_metadata, noscan, compute_statistics, @@ -694,7 +696,7 @@ impl fmt::Display for Statement { write!(f, " PARTITION ({})", display_comma_separated(parts))?; } } - //TODO: Add for columns + if *compute_statistics { write!(f, " COMPUTE STATISTICS")?; } @@ -704,6 +706,9 @@ impl fmt::Display for Statement { if *cache_metadata { write!(f, " CACHE METADATA")?; } + if *for_columns { + write!(f, " FOR COLUMNS {}", display_comma_separated(columns))?; + } Ok(()) } Statement::Insert { diff --git a/src/parser.rs b/src/parser.rs index bf48937ce..5c8ee7d74 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -212,7 +212,7 @@ impl<'a> Parser<'a> { let mut noscan = false; let mut partitions = None; let mut compute_statistics = false; - + let mut columns = vec![]; loop { match self.parse_one_of_keywords(&[ Keyword::PARTITION, @@ -229,6 +229,10 @@ impl<'a> Parser<'a> { Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; + columns = self.parse_comma_separated(Parser::parse_identifier)?; + if columns.is_empty() { + self.expected("columns identifiers", self.peek_token())?; + } for_columns = true } Some(Keyword::CACHE) => { @@ -246,6 +250,7 @@ impl<'a> Parser<'a> { Ok(Statement::Analyze { table_name, for_columns, + columns, partitions, cache_metadata, noscan, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a686e669f..e33a60f47 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -45,6 +45,12 @@ fn parse_analyze() { hive().verified_stmt(analyze); } +#[test] +fn parse_analyze_for_columns() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS a, b, c"#; + hive().verified_stmt(analyze); +} + #[test] fn parse_msck() { let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; From 02a568b1d7b7e9931915c39d22df3eeaddfecc95 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 12:24:44 -0400 Subject: [PATCH 67/97] Support numeric literals ending with 'L' --- src/ast/value.rs | 6 +++--- src/dialect/hive.rs | 1 - src/parser.rs | 10 +++++----- src/test_utils.rs | 2 +- src/tokenizer.rs | 37 +++++++++++++++++++++++-------------- tests/sqlparser_common.rs | 2 +- tests/sqlparser_hive.rs | 9 ++++++++- 7 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 9e82c175d..901fa5158 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -22,9 +22,9 @@ use std::fmt; pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] - Number(String), + Number(String, bool), #[cfg(feature = "bigdecimal")] - Number(BigDecimal), + Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), /// N'string value' @@ -59,7 +59,7 @@ pub enum Value { impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Value::Number(v) => write!(f, "{}", v), + Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index c09570c59..aaec9888f 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -12,7 +12,6 @@ impl Dialect for HiveDialect { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' - || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index 5c8ee7d74..be6b957e8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -405,7 +405,7 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::Number(_) + Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { @@ -1788,8 +1788,8 @@ impl<'a> Parser<'a> { // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n) => match n.parse() { - Ok(n) => Ok(Value::Number(n)), + Token::Number(ref n, l) => match n.parse() { + Ok(n) => Ok(Value::Number(n, l)), Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), @@ -1801,7 +1801,7 @@ impl<'a> Parser<'a> { pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { - v @ Value::Number(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), _ => { self.prev_token(); self.expected("literal number", self.peek_token()) @@ -1812,7 +1812,7 @@ impl<'a> Parser<'a> { /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { match self.next_token() { - Token::Number(s) => s.parse::().map_err(|e| { + Token::Number(s, _) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e)) }), unexpected => self.expected("literal int", unexpected), diff --git a/src/test_utils.rs b/src/test_utils.rs index cb54328e8..ab9ed802a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -154,7 +154,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } pub fn number(n: &'static str) -> Value { - Value::Number(n.parse().unwrap()) + Value::Number(n.parse().unwrap(), false) } pub fn table_alias(name: impl Into) -> Option { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3baae9071..16ce4e676 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ pub enum Token { /// A keyword (like SELECT) or an optionally quoted SQL identifier Word(Word), /// An unsigned numeric literal - Number(String), + Number(String, bool), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' @@ -131,7 +131,7 @@ impl fmt::Display for Token { match self { Token::EOF => f.write_str("EOF"), Token::Word(ref w) => write!(f, "{}", w), - Token::Number(ref n) => f.write_str(n), + Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), @@ -302,7 +302,7 @@ impl<'a> Tokenizer<'a> { Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64, Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, - Token::Number(s) => self.col += s.len() as u64, + Token::Number(s, _) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -388,8 +388,17 @@ impl<'a> Tokenizer<'a> { // numbers '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal - let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); - Ok(Some(Token::Number(s))) + let s = peeking_take_while(chars, |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); + let long = if chars.peek() == Some(&'L') { + chars.next(); + true + } else { + false + }; + Ok(Some(Token::Number(s, long))) } // punctuation '(' => self.consume_and_return(chars, Token::LParen), @@ -646,7 +655,7 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -664,7 +673,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::RParen, ]; @@ -736,11 +745,11 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), ]; compare(expected, tokens); @@ -893,12 +902,12 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment\n".to_string(), }), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -925,11 +934,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( "multi-line\n* /comment".to_string(), )), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -996,7 +1005,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), Token::Whitespace(Whitespace::Space), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f376a67cb..4f9d4157b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -458,7 +458,7 @@ fn parse_number() { ); #[cfg(not(feature = "bigdecimal"))] - assert_eq!(expr, Expr::Value(Value::Number("1.0".into()))); + assert_eq!(expr, Expr::Value(Value::Number("1.0".into(), false))); } #[test] diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e33a60f47..0418999dc 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -87,7 +87,8 @@ fn create_table_like() { hive().verified_stmt(like); } -#[test] +// Turning off this test until we can parse identifiers starting with numbers :( +#[ignore] fn test_identifier() { let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; hive().verified_stmt(between); @@ -135,6 +136,12 @@ fn columns_after_partition() { hive().verified_stmt(query); } +#[test] +fn long_numerics() { + let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 57f111aeef389aec75715f70ed203f2e04eec769 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 15:57:35 -0400 Subject: [PATCH 68/97] Support CREATE [TEMP|TEMPORARY] TABLE --- src/ast/mod.rs | 5 ++++- src/dialect/hive.rs | 4 +--- src/dialect/keywords.rs | 2 ++ src/parser.rs | 13 +++++++++++-- tests/sqlparser_hive.rs | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 676a9408e..34ff64ab6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -526,6 +526,7 @@ pub enum Statement { /// CREATE TABLE CreateTable { or_replace: bool, + temporary: bool, external: bool, if_not_exists: bool, /// Table name @@ -841,6 +842,7 @@ impl fmt::Display for Statement { hive_distribution, hive_formats, external, + temporary, file_format, location, query, @@ -856,10 +858,11 @@ impl fmt::Display for Statement { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{temporary}TABLE {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, external = if *external { "EXTERNAL " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if *temporary { "TEMPORARY " } else { "" }, name = name, )?; if !columns.is_empty() || !constraints.is_empty() { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index aaec9888f..71a5eee26 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,9 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 0305aa591..44196bee5 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -425,6 +425,8 @@ define_keywords!( SYSTEM_USER, TABLE, TABLESAMPLE, + TEMP, + TEMPORARY, TEXT, TEXTFILE, THEN, diff --git a/src/parser.rs b/src/parser.rs index be6b957e8..f28f0e57f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1132,8 +1132,11 @@ impl<'a> Parser<'a> { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace) + self.parse_create_table(or_replace, temporary) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); self.parse_create_view(or_replace) @@ -1232,6 +1235,7 @@ impl<'a> Parser<'a> { or_replace, if_not_exists: false, external: true, + temporary: false, file_format: Some(file_format), location: Some(location), query: None, @@ -1381,7 +1385,11 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_table(&mut self, or_replace: bool) -> Result { + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let like = if self.parse_keyword(Keyword::LIKE) { @@ -1409,6 +1417,7 @@ impl<'a> Parser<'a> { Ok(Statement::CreateTable { name: table_name, + temporary, columns, constraints, with_options, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 0418999dc..0e168f084 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -142,6 +142,22 @@ fn long_numerics() { hive().verified_stmt(query); } +#[test] +fn decimal_precision() { + let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table"; + let expected = "SELECT CAST(a AS NUMERIC(18,2)) FROM db.table"; + hive().one_statement_parses_to(query, expected); +} + +#[test] +fn create_temp_table() { + let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)"; + let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)"; + + hive().verified_stmt(query); + hive().one_statement_parses_to(query2, query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From e078f65048a530e3dcfe8778d78d2fd3b0779cad Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 28 Jul 2020 22:59:13 -0400 Subject: [PATCH 69/97] Support DIRECTORY INSERTS --- src/ast/mod.rs | 27 +++++++++++++++++ src/dialect/keywords.rs | 1 + src/parser.rs | 65 ++++++++++++++++++++++++++--------------- tests/sqlparser_hive.rs | 6 ++++ 4 files changed, 76 insertions(+), 23 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 34ff64ab6..b8a1b53fd 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -489,6 +489,14 @@ pub enum Statement { /// whether the insert has the table keyword (Hive) table: bool, }, + // TODO: Support ROW FORMAT + Directory { + overwrite: bool, + local: bool, + path: String, + file_format: Option, + source: Box + }, Copy { /// TABLE table_name: ObjectName, @@ -654,6 +662,25 @@ impl fmt::Display for Statement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Statement::Query(s) => write!(f, "{}", s), + Statement::Directory { + overwrite, + local, + path, + file_format, + source + } => { + write!( + f, + "INSERT{overwrite}{local} DIRECTORY '{path}'", + overwrite = if *overwrite { " OVERWRITE" } else { "" }, + local = if *local { " LOCAL" } else { "" }, + path = path + )?; + if let Some(ref ff) = file_format { + write!(f, " STORED AS {}", ff)? + } + write!(f, " {}", source) + } Statement::Msck { table_name, repair, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 44196bee5..70d0e76c0 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -175,6 +175,7 @@ define_keywords!( DESC, DESCRIBE, DETERMINISTIC, + DIRECTORY, DISCONNECT, DISTINCT, DOUBLE, diff --git a/src/parser.rs b/src/parser.rs index f28f0e57f..9daffee89 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2549,33 +2549,52 @@ impl<'a> Parser<'a> { pub fn parse_insert(&mut self) -> Result { let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; let overwrite = action == Keyword::OVERWRITE; - // Hive lets you put table here regardless - let table = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; + let local = self.parse_keyword(Keyword::LOCAL); - let partitioned = if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let r = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - r + if self.parse_keyword(Keyword::DIRECTORY) { + let path = match self.next_token() { + Token::SingleQuotedString(w) => w, + _ => self.expected("A file path", self.peek_token())?, + }; + let _ = self.expect_keywords(&[Keyword::STORED, Keyword::AS]); + let file_format = Some(self.parse_file_format()?); + let source = Box::new(self.parse_query()?); + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source + }) } else { - None - }; + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional)?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = self.parse_parenthesized_column_list(Optional)?; + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + r + } else { + None + }; - let source = Box::new(self.parse_query()?); - Ok(Statement::Insert { - table_name, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - }) + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + + let source = Box::new(self.parse_query()?); + Ok(Statement::Insert { + table_name, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + }) + } } pub fn parse_update(&mut self) -> Result { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 0e168f084..9ac20b1c4 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -158,6 +158,12 @@ fn create_temp_table() { hive().one_statement_parses_to(query2, query); } +#[test] +fn create_local_directory() { + let query = "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + hive().verified_stmt(query); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 6fd9d3f9b8826e5f8e8c28e4ca78347b6ea14050 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 13:08:44 -0400 Subject: [PATCH 70/97] Support LATERAL VIEW and DISTRIBUTE BY --- src/ast/mod.rs | 4 ++-- src/ast/query.rs | 24 ++++++++++++++++++++++++ src/dialect/keywords.rs | 7 +++++++ src/parser.rs | 38 +++++++++++++++++++++++++++++++++++++- tests/sqlparser_hive.rs | 15 ++++++++++++++- 5 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b8a1b53fd..6b64d6a80 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -495,7 +495,7 @@ pub enum Statement { local: bool, path: String, file_format: Option, - source: Box + source: Box, }, Copy { /// TABLE @@ -667,7 +667,7 @@ impl fmt::Display for Statement { local, path, file_format, - source + source, } => { write!( f, diff --git a/src/ast/query.rs b/src/ast/query.rs index b33f6940d..39dd14aa5 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -129,12 +129,20 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, + /// LATERAL VIEW + pub lateral_view: Option, + /// LATERAL VIEW optional name + pub lateral_view_name: Option, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Option, /// WHERE pub selection: Option, /// GROUP BY pub group_by: Vec, /// CLUSTER BY (Hive) pub cluster_by: Vec, + /// DISTRIBUTE BY (Hive) + pub distribute_by: Vec, /// HAVING pub having: Option, } @@ -149,6 +157,15 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } + if let Some(ref lv) = self.lateral_view { + write!(f, " LATERAL VIEW {}", lv)?; + if let Some(ref a) = self.lateral_view_name { + write!(f, " {}", a)?; + } + if let Some(ref c) = self.lateral_col_alias { + write!(f, " AS {}", c)?; + } + } if let Some(ref selection) = self.selection { write!(f, " WHERE {}", selection)?; } @@ -162,6 +179,13 @@ impl fmt::Display for Select { display_comma_separated(&self.cluster_by) )?; } + if !self.distribute_by.is_empty() { + write!( + f, + " DISTRIBUTE BY {}", + display_comma_separated(&self.distribute_by) + )?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 70d0e76c0..2ce701a28 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -178,6 +178,7 @@ define_keywords!( DIRECTORY, DISCONNECT, DISTINCT, + DISTRIBUTE, DOUBLE, DROP, DYNAMIC, @@ -497,6 +498,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -514,6 +517,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::NATURAL, Keyword::USING, Keyword::CLUSTER, + Keyword::DISTRIBUTE, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -529,6 +533,8 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -536,6 +542,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::EXCEPT, Keyword::INTERSECT, Keyword::CLUSTER, + Keyword::DISTRIBUTE, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/parser.rs b/src/parser.rs index 9daffee89..6324af991 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2194,6 +2194,32 @@ impl<'a> Parser<'a> { vec![] }; + let lateral_view = if self + .expect_keywords(&[Keyword::LATERAL, Keyword::VIEW]) + .is_ok() + { + Some(self.parse_expr()?) + } else { + None + }; + + let lateral_view_name = if lateral_view.is_some() { + Some(self.parse_object_name()?) + } else { + None + }; + + let lateral_col_alias = if lateral_view_name.is_some() { + self.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ])? + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { @@ -2212,6 +2238,12 @@ impl<'a> Parser<'a> { vec![] }; + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2224,8 +2256,12 @@ impl<'a> Parser<'a> { projection, from, selection, + lateral_view, + lateral_view_name, + lateral_col_alias, group_by, cluster_by, + distribute_by, having, }) } @@ -2564,7 +2600,7 @@ impl<'a> Parser<'a> { path, overwrite, file_format, - source + source, }) } else { // Hive lets you put table here regardless diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 9ac20b1c4..5be0bb79c 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -124,6 +124,12 @@ fn test_cluster_by() { hive().verified_stmt(cluster); } +#[test] +fn test_distribute_by() { + let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b"; + hive().verified_stmt(cluster); +} + #[test] fn no_join_condition() { let join = "SELECT a, b FROM db.table_name JOIN a"; @@ -160,10 +166,17 @@ fn create_temp_table() { #[test] fn create_local_directory() { - let query = "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + let query = + "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; hive().verified_stmt(query); } +#[test] +fn lateral_view() { + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t WHERE a = 1"; + hive().verified_stmt(view); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 7983e477eac06692a9281401e3b42611eee87858 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 13:24:19 -0400 Subject: [PATCH 71/97] Add Dan's suggestion to the tokenizer and re-enable tests --- src/dialect/hive.rs | 2 +- src/tokenizer.rs | 13 +++++++++++++ tests/sqlparser_hive.rs | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 71a5eee26..0513b1175 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,7 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 16ce4e676..f83b7430c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -364,6 +364,19 @@ impl<'a> Tokenizer<'a> { ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume the first char let s = self.tokenize_word(ch, chars); + + if s.chars().all(|x| x >= '0' && x <= '9' || x == '.') { + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); + let s2 = peeking_take_while(chars, |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); + s += s2.as_str(); + return Ok(Some(Token::Number(s, false))); + } Ok(Some(Token::make_word(&s, None))) } // string diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 5be0bb79c..9dad62514 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -88,7 +88,7 @@ fn create_table_like() { } // Turning off this test until we can parse identifiers starting with numbers :( -#[ignore] +#[test] fn test_identifier() { let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; hive().verified_stmt(between); From 19e4e976f95ca8fd57257e5b728b209c696a0513 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:20:05 -0400 Subject: [PATCH 72/97] Speculative array index for function call --- src/ast/mod.rs | 4 ++++ src/dialect/hive.rs | 5 ++++- src/parser.rs | 9 +++++++++ tests/sqlparser_common.rs | 8 +++++++- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6b64d6a80..e210fe40a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1184,6 +1184,7 @@ pub struct Function { pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, + pub array_element: Option, } impl fmt::Display for Function { @@ -1198,6 +1199,9 @@ impl fmt::Display for Function { if let Some(o) = &self.over { write!(f, " OVER ({})", o)?; } + if let Some(ae) = &self.array_element { + write!(f, "[{}]", ae)?; + } Ok(()) } } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 0513b1175..c09570c59 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '$' + || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/parser.rs b/src/parser.rs index 6324af991..4fad9242d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -471,11 +471,20 @@ impl<'a> Parser<'a> { None }; + let array_element = if self.consume_token(&Token::LBracket) { + let num = Some(self.parse_number_value()?); + self.expect_token(&Token::RBracket)?; + num + } else { + None + }; + Ok(Expr::Function(Function { name, args, over, distinct, + array_element, })) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4f9d4157b..35a4270d3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -331,6 +331,7 @@ fn parse_select_count_wildcard() { args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, distinct: false, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -349,6 +350,7 @@ fn parse_select_count_distinct() { })], over: None, distinct: true, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -888,7 +890,8 @@ fn parse_select_having() { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, - distinct: false + distinct: false, + array_element: None })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1626,6 +1629,7 @@ fn parse_named_argument_function() { ], over: None, distinct: false, + array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -1659,6 +1663,7 @@ fn parse_window_functions() { window_frame: None, }), distinct: false, + array_element: None }), expr_from_projection(&select.projection[0]) ); @@ -1942,6 +1947,7 @@ fn parse_delimited_identifiers() { args: vec![], over: None, distinct: false, + array_element: None }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 9dad62514..61f0fcacf 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -177,6 +177,12 @@ fn lateral_view() { hive().verified_stmt(view); } +#[test] +fn test_array_elements() { + let elements = "SELECT collect_list(a)[0] FROM db.table"; + hive().verified_stmt(elements); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 5e57789b57f72fe34ffd3e286265defe8991175e Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:27:32 -0400 Subject: [PATCH 73/97] Make file format optional for directory inserts --- src/parser.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 4fad9242d..a75cf37bb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2601,8 +2601,11 @@ impl<'a> Parser<'a> { Token::SingleQuotedString(w) => w, _ => self.expected("A file path", self.peek_token())?, }; - let _ = self.expect_keywords(&[Keyword::STORED, Keyword::AS]); - let file_format = Some(self.parse_file_format()?); + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; let source = Box::new(self.parse_query()?); Ok(Statement::Directory { local, From 6d76abce2ec8ba81208f006560b262174db0961c Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 14:49:12 -0400 Subject: [PATCH 74/97] Allow multiple aliases in LATERAL VIEW --- src/ast/query.rs | 10 +++++++--- src/parser.rs | 20 +++++++++++++------- tests/sqlparser_hive.rs | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 39dd14aa5..6d5fa7449 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -134,7 +134,7 @@ pub struct Select { /// LATERAL VIEW optional name pub lateral_view_name: Option, /// LATERAL VIEW optional column aliases - pub lateral_col_alias: Option, + pub lateral_col_alias: Vec, /// WHERE pub selection: Option, /// GROUP BY @@ -162,8 +162,12 @@ impl fmt::Display for Select { if let Some(ref a) = self.lateral_view_name { write!(f, " {}", a)?; } - if let Some(ref c) = self.lateral_col_alias { - write!(f, " AS {}", c)?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; } } if let Some(ref selection) = self.selection { diff --git a/src/parser.rs b/src/parser.rs index a75cf37bb..ab5110845 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2219,14 +2219,20 @@ impl<'a> Parser<'a> { }; let lateral_col_alias = if lateral_view_name.is_some() { - self.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - ])? + self.parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect() } else { - None + vec![] }; let selection = if self.parse_keyword(Keyword::WHERE) { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 61f0fcacf..14fbae30e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 109363c0278c8752f255c98836fa180957b31093 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 16:50:24 -0400 Subject: [PATCH 75/97] Allow multiple lateral views and an insanely janky implementation for row formats in external tables --- src/ast/mod.rs | 13 +++++-- src/ast/query.rs | 52 ++++++++++++++++--------- src/parser.rs | 85 +++++++++++++++++++++-------------------- tests/sqlparser_hive.rs | 2 +- 4 files changed, 88 insertions(+), 64 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e210fe40a..776e4d6ab 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -31,6 +31,9 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, With, + Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr, + Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, + Values, }; pub use self::value::{DateTimeField, Value}; @@ -968,13 +971,15 @@ impl fmt::Display for Statement { " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", input_format, output_format )?, - Some(HiveIOFormat::FileFormat { format }) => { + Some(HiveIOFormat::FileFormat { format }) if !*external => { write!(f, " STORED AS {}", format)? } - None => (), + _ => (), } - if let Some(loc) = location { - write!(f, " LOCATION '{}'", loc)?; + if !*external { + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } } } if *external { diff --git a/src/ast/query.rs b/src/ast/query.rs index 6d5fa7449..d778280a9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -129,12 +129,8 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, - /// LATERAL VIEW - pub lateral_view: Option, - /// LATERAL VIEW optional name - pub lateral_view_name: Option, - /// LATERAL VIEW optional column aliases - pub lateral_col_alias: Vec, + /// LATERAL VIEWs + pub lateral_views: Vec, /// WHERE pub selection: Option, /// GROUP BY @@ -157,17 +153,9 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } - if let Some(ref lv) = self.lateral_view { - write!(f, " LATERAL VIEW {}", lv)?; - if let Some(ref a) = self.lateral_view_name { - write!(f, " {}", a)?; - } - if !self.lateral_col_alias.is_empty() { - write!( - f, - " AS {}", - display_comma_separated(&self.lateral_col_alias) - )?; + if !self.lateral_views.is_empty() { + for lv in &self.lateral_views { + write!(f, "{}", lv)?; } } if let Some(ref selection) = self.selection { @@ -197,6 +185,36 @@ impl fmt::Display for Select { } } +/// A hive LATERAL VIEW with potential column aliases +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LateralView { + /// LATERAL VIEW + pub lateral_view: Expr, + /// LATERAL VIEW table name + pub lateral_view_name: ObjectName, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Vec, +} + +impl fmt::Display for LateralView { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + " LATERAL VIEW {} {}", + self.lateral_view, self.lateral_view_name + )?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct With { diff --git a/src/parser.rs b/src/parser.rs index ab5110845..fd5cf72d2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1227,26 +1227,32 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; - let hive_distribution = self.parse_hive_distribution()?; - self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; - let file_format = self.parse_file_format()?; - self.expect_keyword(Keyword::LOCATION)?; - let location = self.parse_literal_string()?; + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(format.clone()), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); Ok(Statement::CreateTable { name: table_name, columns, constraints, hive_distribution, - hive_formats: None, + hive_formats: Some(hive_formats), with_options: vec![], or_replace, if_not_exists: false, external: true, temporary: false, - file_format: Some(file_format), - location: Some(location), + file_format, + location, query: None, without_rowid: false, like: None, @@ -2203,37 +2209,34 @@ impl<'a> Parser<'a> { vec![] }; - let lateral_view = if self - .expect_keywords(&[Keyword::LATERAL, Keyword::VIEW]) - .is_ok() - { - Some(self.parse_expr()?) - } else { - None - }; - - let lateral_view_name = if lateral_view.is_some() { - Some(self.parse_object_name()?) - } else { - None - }; - - let lateral_col_alias = if lateral_view_name.is_some() { - self.parse_comma_separated(|parser| { - parser.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - ]) // This couldn't possibly be a bad idea - })? - .into_iter() - .filter(|i| i.is_some()) - .map(|i| i.unwrap()) - .collect() - } else { - vec![] - }; + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name()?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + }); + } else { + break; + } + } let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -2271,9 +2274,7 @@ impl<'a> Parser<'a> { projection, from, selection, - lateral_view, - lateral_view_name, - lateral_col_alias, + lateral_views, group_by, cluster_by, distribute_by, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 14fbae30e..e169f4a03 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 42fb1b944928d885e1abe0c04a37591cbf41a768 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 17:24:08 -0400 Subject: [PATCH 76/97] All multiple quote styles for directory paths --- src/parser.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index fd5cf72d2..b32918184 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1846,6 +1846,7 @@ impl<'a> Parser<'a> { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { + Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), unexpected => self.expected("literal string", unexpected), } @@ -2604,10 +2605,7 @@ impl<'a> Parser<'a> { let local = self.parse_keyword(Keyword::LOCAL); if self.parse_keyword(Keyword::DIRECTORY) { - let path = match self.next_token() { - Token::SingleQuotedString(w) => w, - _ => self.expected("A file path", self.peek_token())?, - }; + let path = self.parse_literal_string()?; let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { Some(self.parse_file_format()?) } else { From 425cab6a10a0a46b685c5eb6359e14ea1a0531d1 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 18:15:40 -0400 Subject: [PATCH 77/97] Speculative test on table properties --- src/ast/mod.rs | 9 +++++++++ src/ast/value.rs | 3 +++ src/dialect/keywords.rs | 1 + src/parser.rs | 18 +++++++++++++----- tests/sqlparser_hive.rs | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 776e4d6ab..51ebfe65a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -547,6 +547,7 @@ pub enum Statement { constraints: Vec, hive_distribution: HiveDistributionStyle, hive_formats: Option, + table_properties: Vec, with_options: Vec, file_format: Option, location: Option, @@ -866,6 +867,7 @@ impl fmt::Display for Statement { name, columns, constraints, + table_properties, with_options, or_replace, if_not_exists, @@ -990,6 +992,13 @@ impl fmt::Display for Statement { location.as_ref().unwrap() )?; } + if !table_properties.is_empty() { + write!( + f, + " TABLEPROPERTIES ({})", + display_comma_separated(table_properties) + )?; + } if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; } diff --git a/src/ast/value.rs b/src/ast/value.rs index 901fa5158..9240d7479 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -31,6 +31,8 @@ pub enum Value { NationalStringLiteral(String), /// X'hex value' HexStringLiteral(String), + + LiteralString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -60,6 +62,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), + Value::LiteralString(v) => write!(f, "{}", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 2ce701a28..e308453a4 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -426,6 +426,7 @@ define_keywords!( SYSTEM_TIME, SYSTEM_USER, TABLE, + TABLEPROPERTIES, TABLESAMPLE, TEMP, TEMPORARY, diff --git a/src/parser.rs b/src/parser.rs index b32918184..48b657f80 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1240,6 +1240,7 @@ impl<'a> Parser<'a> { None }; let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, @@ -1247,6 +1248,7 @@ impl<'a> Parser<'a> { hive_distribution, hive_formats: Some(hive_formats), with_options: vec![], + table_properties, or_replace, if_not_exists: false, external: true, @@ -1282,7 +1284,7 @@ impl<'a> Parser<'a> { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = self.parse_with_options()?; + let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -1421,8 +1423,8 @@ impl<'a> Parser<'a> { let hive_distribution = self.parse_hive_distribution()?; let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_with_options()?; - + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) @@ -1436,6 +1438,7 @@ impl<'a> Parser<'a> { columns, constraints, with_options, + table_properties, or_replace, if_not_exists, hive_distribution, @@ -1639,8 +1642,8 @@ impl<'a> Parser<'a> { } } - pub fn parse_with_options(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::WITH) { + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Parser::parse_sql_option)?; self.expect_token(&Token::RParen)?; @@ -1807,6 +1810,11 @@ impl<'a> Parser<'a> { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword => Ok(Value::LiteralString(format!( + "{quote}{}{quote}", + w.value, + quote = w.quote_style.map(|q| q.to_string()).unwrap_or("".into()) + ))), _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e169f4a03..8ec182784 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...'"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TABLEPROPERTIES ("prop" = "2")"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); From 77fcdb2459ae99875aa7b07a03294f9de1da5db0 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 29 Jul 2020 18:21:36 -0400 Subject: [PATCH 78/97] Spell keyword tblproperties correctly this time --- src/ast/mod.rs | 2 +- src/dialect/keywords.rs | 2 +- src/parser.rs | 4 ++-- tests/sqlparser_hive.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 51ebfe65a..03a82d47d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -995,7 +995,7 @@ impl fmt::Display for Statement { if !table_properties.is_empty() { write!( f, - " TABLEPROPERTIES ({})", + " TBLPROPERTIES ({})", display_comma_separated(table_properties) )?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e308453a4..d3ff6cef9 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -426,8 +426,8 @@ define_keywords!( SYSTEM_TIME, SYSTEM_USER, TABLE, - TABLEPROPERTIES, TABLESAMPLE, + TBLPROPERTIES, TEMP, TEMPORARY, TEXT, diff --git a/src/parser.rs b/src/parser.rs index 48b657f80..72f4677a5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1240,7 +1240,7 @@ impl<'a> Parser<'a> { None }; let location = hive_formats.location.clone(); - let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, @@ -1424,7 +1424,7 @@ impl<'a> Parser<'a> { let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TABLEPROPERTIES)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 8ec182784..11df1dd76 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TABLEPROPERTIES ("prop" = "2")"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2")"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); From 37d77c4b0d9b17320a8c84720ce4d5d523b0f3b1 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 30 Jul 2020 12:28:45 -0400 Subject: [PATCH 79/97] Fix issue with multiple lateral views and no aliases --- src/parser.rs | 1 + tests/sqlparser_hive.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 72f4677a5..1e85701e8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2230,6 +2230,7 @@ impl<'a> Parser<'a> { Keyword::GROUP, Keyword::CLUSTER, Keyword::HAVING, + Keyword::LATERAL ]) // This couldn't possibly be a bad idea })? .into_iter() diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 11df1dd76..1770d4661 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -173,7 +173,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS a, b LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 5ccc447558b7dcbd60628eda89ccb9a3c9d0ba93 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 12:29:41 -0400 Subject: [PATCH 80/97] Rebase, fix lints, removed some janky array element parsing that just didn't work, and allow analyze for columns to sometimes not actually have columns --- src/ast/mod.rs | 9 ++++----- src/ast/value.rs | 4 ++-- src/dialect/hive.rs | 10 ++-------- src/parser.rs | 32 +++++++++++++------------------- tests/sqlparser_common.rs | 6 ------ tests/sqlparser_hive.rs | 11 +++-------- 6 files changed, 24 insertions(+), 48 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 03a82d47d..1ed58d1a1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -739,7 +739,10 @@ impl fmt::Display for Statement { write!(f, " CACHE METADATA")?; } if *for_columns { - write!(f, " FOR COLUMNS {}", display_comma_separated(columns))?; + write!(f, " FOR COLUMNS")?; + if !columns.is_empty() { + write!(f, " {}", display_comma_separated(columns))?; + } } Ok(()) } @@ -1198,7 +1201,6 @@ pub struct Function { pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, - pub array_element: Option, } impl fmt::Display for Function { @@ -1213,9 +1215,6 @@ impl fmt::Display for Function { if let Some(o) = &self.over { write!(f, " OVER ({})", o)?; } - if let Some(ae) = &self.array_element { - write!(f, "[{}]", ae)?; - } Ok(()) } } diff --git a/src/ast/value.rs b/src/ast/value.rs index 9240d7479..2afdfaeae 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -32,7 +32,7 @@ pub enum Value { /// X'hex value' HexStringLiteral(String), - LiteralString(String), + DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -62,7 +62,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), - Value::LiteralString(v) => write!(f, "{}", v), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index c09570c59..4fc23ed28 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -5,14 +5,11 @@ pub struct HiveDialect {} impl Dialect for HiveDialect { fn is_delimited_identifier_start(&self, ch: char) -> bool { - (ch == '"') || (ch == '\'') || (ch == '`') + (ch == '"') || (ch == '`') } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '$' - || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') } fn is_identifier_part(&self, ch: char) -> bool { @@ -20,8 +17,5 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' - || ch == '$' - || ch == '{' - || ch == '}' } } diff --git a/src/parser.rs b/src/parser.rs index 1e85701e8..44e63dfad 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -229,10 +229,12 @@ impl<'a> Parser<'a> { Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { self.expect_keyword(Keyword::COLUMNS)?; - columns = self.parse_comma_separated(Parser::parse_identifier)?; - if columns.is_empty() { - self.expected("columns identifiers", self.peek_token())?; - } + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(Parser::parse_identifier) + }) + .unwrap_or_default(); for_columns = true } Some(Keyword::CACHE) => { @@ -471,20 +473,11 @@ impl<'a> Parser<'a> { None }; - let array_element = if self.consume_token(&Token::LBracket) { - let num = Some(self.parse_number_value()?); - self.expect_token(&Token::RBracket)?; - num - } else { - None - }; - Ok(Expr::Function(Function { name, args, over, distinct, - array_element, })) } @@ -1810,11 +1803,11 @@ impl<'a> Parser<'a> { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), - Keyword::NoKeyword => Ok(Value::LiteralString(format!( - "{quote}{}{quote}", - w.value, - quote = w.quote_style.map(|q| q.to_string()).unwrap_or("".into()) - ))), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected("A value?", Token::Word(w))?, + }, _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the @@ -2002,6 +1995,7 @@ impl<'a> Parser<'a> { pub fn parse_identifier(&mut self) -> Result { match self.next_token() { Token::Word(w) => Ok(w.to_ident()), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), unexpected => self.expected("identifier", unexpected), } } @@ -2230,7 +2224,7 @@ impl<'a> Parser<'a> { Keyword::GROUP, Keyword::CLUSTER, Keyword::HAVING, - Keyword::LATERAL + Keyword::LATERAL, ]) // This couldn't possibly be a bad idea })? .into_iter() diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 35a4270d3..c448fd703 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -331,7 +331,6 @@ fn parse_select_count_wildcard() { args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, distinct: false, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -350,7 +349,6 @@ fn parse_select_count_distinct() { })], over: None, distinct: true, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -891,7 +889,6 @@ fn parse_select_having() { args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, distinct: false, - array_element: None })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1629,7 +1626,6 @@ fn parse_named_argument_function() { ], over: None, distinct: false, - array_element: None }), expr_from_projection(only(&select.projection)) ); @@ -1663,7 +1659,6 @@ fn parse_window_functions() { window_frame: None, }), distinct: false, - array_element: None }), expr_from_projection(&select.projection[0]) ); @@ -1947,7 +1942,6 @@ fn parse_delimited_identifiers() { args: vec![], over: None, distinct: false, - array_element: None }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 1770d4661..e01281801 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::test_utils::*; #[test] fn parse_table_create() { - let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2")"#; + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; hive().verified_stmt(sql); @@ -47,7 +47,8 @@ fn parse_analyze() { #[test] fn parse_analyze_for_columns() { - let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS a, b, c"#; + let analyze = + r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#; hive().verified_stmt(analyze); } @@ -177,12 +178,6 @@ fn lateral_view() { hive().verified_stmt(view); } -#[test] -fn test_array_elements() { - let elements = "SELECT collect_list(a)[0] FROM db.table"; - hive().verified_stmt(elements); -} - fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From f1e04eef9251ea2290ebcd840d7ad1f8521a8cec Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 14:34:22 -0400 Subject: [PATCH 81/97] Reenable some identifier parts in dialect, support LATERAL VIEW OUTER --- src/ast/query.rs | 8 ++++++-- src/dialect/hive.rs | 7 ++++++- src/parser.rs | 2 ++ tests/sqlparser_hive.rs | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index d778280a9..47f84a6b1 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -195,14 +195,18 @@ pub struct LateralView { pub lateral_view_name: ObjectName, /// LATERAL VIEW optional column aliases pub lateral_col_alias: Vec, + /// LATERAL VIEW OUTER + pub outer: bool, } impl fmt::Display for LateralView { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - " LATERAL VIEW {} {}", - self.lateral_view, self.lateral_view_name + " LATERAL VIEW{outer} {} {}", + self.lateral_view, + self.lateral_view_name, + outer = if self.outer { " OUTER" } else { "" } )?; if !self.lateral_col_alias.is_empty() { write!( diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 4fc23ed28..7d60e010e 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -9,7 +9,10 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') + || ch == '$' } fn is_identifier_part(&self, ch: char) -> bool { @@ -17,5 +20,7 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' + || ch == '{' + || ch == '}' } } diff --git a/src/parser.rs b/src/parser.rs index 44e63dfad..368f8dbfa 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2215,6 +2215,7 @@ impl<'a> Parser<'a> { let mut lateral_views = vec![]; loop { if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); let lateral_view = self.parse_expr()?; let lateral_view_name = self.parse_object_name()?; let lateral_col_alias = self @@ -2236,6 +2237,7 @@ impl<'a> Parser<'a> { lateral_view, lateral_view_name, lateral_col_alias, + outer }); } else { break; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e01281801..4854f27d8 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -174,7 +174,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } From 84e96c5dc31b5fb860b9941e7d3bc6b2e05d9e23 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 16:28:46 -0400 Subject: [PATCH 82/97] Cargo fmt --- src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 368f8dbfa..7e03f5308 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2237,7 +2237,7 @@ impl<'a> Parser<'a> { lateral_view, lateral_view_name, lateral_col_alias, - outer + outer, }); } else { break; From 0af9e5fa558e8a8b96c0b007b6c32de1a1634a70 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 16:48:11 -0400 Subject: [PATCH 83/97] Add license to hive.rs --- src/dialect/hive.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 7d60e010e..66241b89f 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::dialect::Dialect; #[derive(Debug)] From 27a91dec6ec75278a1340c5af7f2322efabbb0ab Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 17:24:40 -0400 Subject: [PATCH 84/97] Allow ommitting add/drop/sync partitions in msck --- src/parser.rs | 24 ++++++++++++++++-------- tests/sqlparser_hive.rs | 2 ++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 7e03f5308..2af602573 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -174,14 +174,22 @@ impl<'a> Parser<'a> { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; - let partition_action = - match self.parse_one_of_keywords(&[Keyword::ADD, Keyword::DROP, Keyword::SYNC]) { - Some(Keyword::ADD) => Some(AddDropSync::ADD), - Some(Keyword::DROP) => Some(AddDropSync::DROP), - Some(Keyword::SYNC) => Some(AddDropSync::SYNC), - _ => None, - }; - self.expect_keyword(Keyword::PARTITIONS)?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + }) + .unwrap_or_default(); Ok(Statement::Msck { repair, table_name, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 4854f27d8..e08cdf3f5 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -55,7 +55,9 @@ fn parse_analyze_for_columns() { #[test] fn parse_msck() { let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + let msck2 = r#"MSCK REPAIR TABLE db.table_name"#; hive().verified_stmt(msck); + hive().verified_stmt(msck2); } #[test] From b4ea1eea3f6e4ced6fbe80b5b199e198de04ae3b Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 31 Jul 2020 17:40:36 -0400 Subject: [PATCH 85/97] Fix a bigdecimal compile error --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c448fd703..8af1bedc4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -454,7 +454,7 @@ fn parse_number() { #[cfg(feature = "bigdecimal")] assert_eq!( expr, - Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1))) + Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1), false)) ); #[cfg(not(feature = "bigdecimal"))] From 41615654962c9972ac7f5969f152ed2444bdb017 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sat, 1 Aug 2020 12:38:24 -0400 Subject: [PATCH 86/97] Re-add dollar sign for idents --- src/dialect/hive.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 66241b89f..24c26b98d 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -32,6 +32,7 @@ impl Dialect for HiveDialect { || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' + || ch == '$' || ch == '{' || ch == '}' } From ada1fe6a4c5dde69fd43a954f303f74dadf916ba Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sun, 2 Aug 2020 14:52:09 -0400 Subject: [PATCH 87/97] Support SORT BY in selects --- src/ast/query.rs | 5 +++++ src/dialect/keywords.rs | 3 +++ src/parser.rs | 7 +++++++ tests/sqlparser_hive.rs | 6 ++++++ 4 files changed, 21 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 47f84a6b1..bb2f0ee30 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -139,6 +139,8 @@ pub struct Select { pub cluster_by: Vec, /// DISTRIBUTE BY (Hive) pub distribute_by: Vec, + /// SORT BY (Hive) + pub sort_by: Vec, /// HAVING pub having: Option, } @@ -178,6 +180,9 @@ impl fmt::Display for Select { display_comma_separated(&self.distribute_by) )?; } + if !self.sort_by.is_empty() { + write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d3ff6cef9..c9ff00995 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -400,6 +400,7 @@ define_keywords!( SIMILAR, SMALLINT, SOME, + SORT, SPECIFIC, SPECIFICTYPE, SQL, @@ -496,6 +497,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, @@ -531,6 +533,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, diff --git a/src/parser.rs b/src/parser.rs index 2af602573..18334412a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2276,6 +2276,12 @@ impl<'a> Parser<'a> { vec![] }; + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -2292,6 +2298,7 @@ impl<'a> Parser<'a> { group_by, cluster_by, distribute_by, + sort_by, having, }) } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e08cdf3f5..c0f4d585f 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -180,6 +180,12 @@ fn lateral_view() { hive().verified_stmt(view); } +#[test] +fn sort_by() { + let sort_by = "SELECT * FROM db.table SORT BY a"; + hive().verified_stmt(sort_by); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 982c073bd7003200b5af909a8c0c069996ccc654 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 3 Aug 2020 00:26:20 -0400 Subject: [PATCH 88/97] Fix ALTER TABLE RENAME TO as taking an ident when should take an ObjectName --- src/ast/ddl.rs | 2 +- src/parser.rs | 5 +++-- tests/sqlparser_hive.rs | 8 +++++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3ed2a6918..b88709040 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -55,7 +55,7 @@ pub enum AlterTableOperation { new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { table_name: ObjectName }, } impl fmt::Display for AlterTableOperation { diff --git a/src/parser.rs b/src/parser.rs index 18334412a..6e6a26ffb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1226,6 +1226,7 @@ impl<'a> Parser<'a> { or_replace: bool, ) -> Result { self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; @@ -1251,7 +1252,7 @@ impl<'a> Parser<'a> { with_options: vec![], table_properties, or_replace, - if_not_exists: false, + if_not_exists, external: true, temporary: false, file_format, @@ -1687,7 +1688,7 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_identifier()?; + let table_name = self.parse_object_name()?; AlterTableOperation::RenameTable { table_name } } else { let _ = self.parse_keyword(Keyword::COLUMN); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index c0f4d585f..e2ead0603 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -176,7 +176,7 @@ fn create_local_directory() { #[test] fn lateral_view() { - let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; hive().verified_stmt(view); } @@ -186,6 +186,12 @@ fn sort_by() { hive().verified_stmt(sort_by); } +#[test] +fn rename_table() { + let rename = "ALTER TABLE db.table_name RENAME TO db.table_2"; + hive().verified_stmt(rename); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 97db91fd403298faef3735557b66048e8ff3d2b6 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 2 Dec 2020 15:53:33 -0500 Subject: [PATCH 89/97] Fix WITH AS for Hive --- src/parser.rs | 18 +++++++++++++----- tests/sqlparser_hive.rs | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 6e6a26ffb..9fe903c2f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2119,18 +2119,27 @@ impl<'a> Parser<'a> { /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) fn parse_cte(&mut self) -> Result { - let alias = TableAlias { - name: self.parse_identifier()?, - columns: self.parse_parenthesized_column_list(Optional)?, - }; + let name = self.parse_identifier()?; if self.parse_keyword(Keyword::AS) { self.expect_token(&Token::LParen)?; let query = self.parse_query()?; self.expect_token(&Token::RParen)?; + let alias = TableAlias { + name, + columns: vec![], + }; Ok(Cte { alias, query }) } else { + let columns = self.parse_parenthesized_column_list(Optional)?; + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { + name, + columns, + }; Ok(Cte { alias, query }) } } @@ -2220,7 +2229,6 @@ impl<'a> Parser<'a> { } else { vec![] }; - let mut lateral_views = vec![]; loop { if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index e2ead0603..9837ea241 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -74,7 +74,7 @@ fn test_spaceship() { #[test] fn parse_with_cte() { - let with = "WITH a AS (SELECT * FROM table) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM a"; + let with = "WITH a AS (SELECT * FROM b) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM b"; hive().verified_stmt(with); } From 0b7d18f1d9e0b64a7d1a1c21de5d166bab96c0ef Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 2 Dec 2020 16:38:07 -0500 Subject: [PATCH 90/97] Fix WITH AS for Hive, merge from upstream --- examples/cli.rs | 2 +- src/ast/mod.rs | 4 +-- src/parser.rs | 75 ++++++++++++++++++++++++++--------------------- src/test_utils.rs | 2 +- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/examples/cli.rs b/examples/cli.rs index bd1be5244..9ac079949 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -40,7 +40,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), - "--hive" => Box::new(HiveDialect{}), + "--hive" => Box::new(HiveDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1ed58d1a1..b66a9f754 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,11 +29,9 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, - SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, With, Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr, Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, - Values, + Values, With, }; pub use self::value::{DateTimeField, Value}; diff --git a/src/parser.rs b/src/parser.rs index 9fe903c2f..70c0784de 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2081,40 +2081,52 @@ impl<'a> Parser<'a> { None }; - let body = self.parse_query_body(0)?; + if !self.parse_keyword(Keyword::INSERT) { + let body = self.parse_query_body(0)?; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_offset()?) - } else { - None - }; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; - Ok(Query { - with, - body, - limit, - order_by, - offset, - fetch, - }) + Ok(Query { + with, + body, + limit, + order_by, + offset, + fetch, + }) + } else { + let insert = self.parse_insert()?; + Ok(Query { + with, + body: SetExpr::Insert(insert), + limit: None, + order_by: vec![], + offset: None, + fetch: None, + }) + } } /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) @@ -2136,10 +2148,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let query = self.parse_query()?; self.expect_token(&Token::RParen)?; - let alias = TableAlias { - name, - columns, - }; + let alias = TableAlias { name, columns }; Ok(Cte { alias, query }) } } diff --git a/src/test_utils.rs b/src/test_utils.rs index ab9ed802a..160d2c110 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -132,7 +132,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), - Box::new(HiveDialect {}) + Box::new(HiveDialect {}), ], } } From 5e3c02c9e41fcfd91d652a9aa265e24b4eb4f68e Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 2 Dec 2020 16:58:15 -0500 Subject: [PATCH 91/97] Fix a lint --- src/tokenizer.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f83b7430c..0d8dc8f0f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -366,14 +366,8 @@ impl<'a> Tokenizer<'a> { let s = self.tokenize_word(ch, chars); if s.chars().all(|x| x >= '0' && x <= '9' || x == '.') { - let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| match ch { - '0'..='9' | '.' => true, - _ => false, - }); - let s2 = peeking_take_while(chars, |ch| match ch { - '0'..='9' | '.' => true, - _ => false, - }); + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| matches!(ch, '0'..='9' | '.')); + let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); s += s2.as_str(); return Ok(Some(Token::Number(s, false))); } @@ -401,10 +395,7 @@ impl<'a> Tokenizer<'a> { // numbers '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal - let s = peeking_take_while(chars, |ch| match ch { - '0'..='9' | '.' => true, - _ => false, - }); + let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); let long = if chars.peek() == Some(&'L') { chars.next(); true From 3f7d421495aa150d8a041004913e32817e05d0db Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 2 Dec 2020 16:59:39 -0500 Subject: [PATCH 92/97] Cargo fmt --- src/tokenizer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0d8dc8f0f..5eee7cc40 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -366,7 +366,9 @@ impl<'a> Tokenizer<'a> { let s = self.tokenize_word(ch, chars); if s.chars().all(|x| x >= '0' && x <= '9' || x == '.') { - let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| matches!(ch, '0'..='9' | '.')); + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| { + matches!(ch, '0'..='9' | '.') + }); let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); s += s2.as_str(); return Ok(Some(Token::Number(s, false))); From d964dd80407fbb525d300d40afa5e3e3ee706af9 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 2 Feb 2021 16:02:35 -0500 Subject: [PATCH 93/97] map access on a column --- src/ast/ddl.rs | 2 +- src/ast/mod.rs | 15 +++++++++++++-- src/ast/query.rs | 2 +- src/dialect/ansi.rs | 8 ++++---- src/dialect/generic.rs | 12 ++++++++---- src/parser.rs | 22 +++++++++++++++++++++- tests/sqlparser_hive.rs | 6 ++++++ 7 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index b88709040..7b55c4137 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -295,7 +295,7 @@ impl fmt::Display for ColumnOption { } } -fn display_constraint_name<'a>(name: &'a Option) -> impl fmt::Display + 'a { +fn display_constraint_name(name: &Option) -> impl fmt::Display + '_ { struct ConstraintName<'a>(&'a Option); impl<'a> fmt::Display for ConstraintName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b66a9f754..9c50fbd99 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -192,7 +192,10 @@ pub enum Expr { right: Box, }, /// Unary operation e.g. `NOT foo` - UnaryOp { op: UnaryOperator, expr: Box }, + UnaryOp { + op: UnaryOperator, + expr: Box, + }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { expr: Box, @@ -214,7 +217,14 @@ pub enum Expr { /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). - TypedString { data_type: DataType, value: String }, + TypedString { + data_type: DataType, + value: String, + }, + MapAccess { + column: Box, + key: String, + }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -242,6 +252,7 @@ impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{}", s), + Expr::MapAccess { column, key } => write!(f, "{}[\"{}\"]", column, key), Expr::Wildcard => f.write_str("*"), Expr::QualifiedWildcard(q) => write!(f, "{}.*", display_separated(q, ".")), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), diff --git a/src/ast/query.rs b/src/ast/query.rs index bb2f0ee30..44f114e0f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -413,7 +413,7 @@ impl fmt::Display for Join { _ => "", } } - fn suffix<'a>(constraint: &'a JoinConstraint) -> impl fmt::Display + 'a { + fn suffix(constraint: &JoinConstraint) -> impl fmt::Display + '_ { struct Suffix<'a>(&'a JoinConstraint); impl<'a> fmt::Display for Suffix<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index ca01fb751..1015ca2d3 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -17,13 +17,13 @@ pub struct AnsiDialect {} impl Dialect for AnsiDialect { fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '_' } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 104d3a9a3..818fa0d0a 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -17,13 +17,17 @@ pub struct GenericDialect; impl Dialect for GenericDialect { fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@' + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ch == '_' + || ch == '#' + || ch == '@' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '@' || ch == '$' || ch == '#' diff --git a/src/parser.rs b/src/parser.rs index 70c0784de..8021dbc93 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -48,12 +48,14 @@ pub enum IsOptional { Optional, Mandatory, } + use IsOptional::*; pub enum IsLateral { Lateral, NotLateral, } + use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; @@ -281,6 +283,7 @@ impl<'a> Parser<'a> { loop { let next_precedence = self.get_next_precedence()?; debug!("next precedence: {:?}", next_precedence); + if precedence >= next_precedence { break; } @@ -422,6 +425,7 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::LParen => { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { @@ -845,12 +849,27 @@ impl<'a> Parser<'a> { op: UnaryOperator::PGPostfixFactorial, expr: Box::new(expr), }) + } else if Token::LBracket == tok { + self.parse_map_access(expr) } else { // Can only happen if `get_next_precedence` got out of sync with this function panic!("No infix parser for token {:?}", tok) } } + pub fn parse_map_access(&mut self, expr: Expr) -> Result { + let key = self.parse_literal_string()?; + let tok = self.consume_token(&Token::RBracket); + debug!("Tok: {}", tok); + match expr { + e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { + column: Box::new(e), + key, + }), + _ => Ok(expr), + } + } + /// Parses the parens following the `[ NOT ] IN` operator pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { self.expect_token(&Token::LParen)?; @@ -936,6 +955,7 @@ impl<'a> Parser<'a> { Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), Token::ExclamationMark => Ok(50), + Token::LBracket | Token::RBracket => Ok(10), _ => Ok(0), } } @@ -2460,7 +2480,7 @@ impl<'a> Parser<'a> { } } Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()) + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); } _ if natural => { return self.expected("a join type after NATURAL", self.peek_token()); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 9837ea241..bf54ed4a9 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -192,6 +192,12 @@ fn rename_table() { hive().verified_stmt(rename); } +#[test] +fn map_access() { + let rename = "SELECT a.b[\"asdf\"] FROM db.table WHERE a = 2"; + hive().verified_stmt(rename); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 9d9d681cbabf31d1d07ad166da7a1fd87d07d960 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 4 Feb 2021 12:14:24 -0500 Subject: [PATCH 94/97] support from style ctes --- src/ast/query.rs | 7 ++++++- src/parser.rs | 18 +++++++++++++++--- tests/sqlparser_common.rs | 3 ++- tests/sqlparser_hive.rs | 7 +++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 44f114e0f..fa12e8208 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -251,11 +251,16 @@ impl fmt::Display for With { pub struct Cte { pub alias: TableAlias, pub query: Query, + pub from: Option, } impl fmt::Display for Cte { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} AS ({})", self.alias, self.query) + write!(f, "{} AS ({})", self.alias, self.query)?; + if let Some(ref fr) = self.from { + write!(f, " FROM {}", fr)?; + } + Ok(()) } } diff --git a/src/parser.rs b/src/parser.rs index 8021dbc93..8c5711270 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2153,7 +2153,7 @@ impl<'a> Parser<'a> { fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; - if self.parse_keyword(Keyword::AS) { + let mut cte = if self.parse_keyword(Keyword::AS) { self.expect_token(&Token::LParen)?; let query = self.parse_query()?; self.expect_token(&Token::RParen)?; @@ -2161,7 +2161,11 @@ impl<'a> Parser<'a> { name, columns: vec![], }; - Ok(Cte { alias, query }) + Cte { + alias, + query, + from: None, + } } else { let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keyword(Keyword::AS)?; @@ -2169,8 +2173,16 @@ impl<'a> Parser<'a> { let query = self.parse_query()?; self.expect_token(&Token::RParen)?; let alias = TableAlias { name, columns }; - Ok(Cte { alias, query }) + Cte { + alias, + query, + from: None, + } + }; + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier()?); } + Ok(cte) } /// Parse a "query body", which is an expression with roughly the diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8af1bedc4..efcfe9329 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2345,7 +2345,7 @@ fn parse_ctes() { fn assert_ctes_in_select(expected: &[&str], sel: &Query) { for (i, exp) in expected.iter().enumerate() { - let Cte { alias, query } = &sel.with.as_ref().unwrap().cte_tables[i]; + let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i]; assert_eq!(*exp, query.to_string()); assert_eq!( if i == 0 { @@ -2434,6 +2434,7 @@ fn parse_recursive_cte() { }], }, query: cte_query, + from: None, }; assert_eq!(with.cte_tables.first().unwrap(), &expected); } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index bf54ed4a9..585be989b 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -198,6 +198,13 @@ fn map_access() { hive().verified_stmt(rename); } +#[test] +fn from_cte() { + let rename = + "WITH cte AS (SELECT * FROM a.b) FROM cte INSERT INTO TABLE a.b PARTITION (a) SELECT *"; + println!("{}", hive().verified_stmt(rename)); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 671fa3c44606e26f47d664071c5b9b6df6c2faa5 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 4 Feb 2021 12:17:26 -0500 Subject: [PATCH 95/97] merge conflicts fix --- src/ast/ddl.rs | 2 +- src/ast/query.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 7b55c4137..67dc2e322 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -295,7 +295,7 @@ impl fmt::Display for ColumnOption { } } -fn display_constraint_name(name: &Option) -> impl fmt::Display + '_ { +fn display_constraint_name(name: &'_ Option) -> impl fmt::Display + '_ { struct ConstraintName<'a>(&'a Option); impl<'a> fmt::Display for ConstraintName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/ast/query.rs b/src/ast/query.rs index fa12e8208..8f9ab499d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -418,7 +418,7 @@ impl fmt::Display for Join { _ => "", } } - fn suffix(constraint: &JoinConstraint) -> impl fmt::Display + '_ { + fn suffix(constraint: &'_ JoinConstraint) -> impl fmt::Display + '_ { struct Suffix<'a>(&'a JoinConstraint); impl<'a> fmt::Display for Suffix<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { From b885469b2e6150323c8a29ccef522c78b96f609a Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 4 Feb 2021 12:34:59 -0500 Subject: [PATCH 96/97] Fix some clippy warnings --- src/dialect/hive.rs | 12 ++++++------ src/dialect/mssql.rs | 12 ++++++++---- src/dialect/mysql.rs | 8 ++++---- src/dialect/postgresql.rs | 8 ++++---- src/dialect/snowflake.rs | 8 ++++---- src/dialect/sqlite.rs | 8 ++++---- src/tokenizer.rs | 2 +- 7 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 24c26b98d..9b42857ec 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -21,16 +21,16 @@ impl Dialect for HiveDialect { } fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '$' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '_' || ch == '$' || ch == '{' diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index c613a1502..539a17a9f 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -23,13 +23,17 @@ impl Dialect for MsSqlDialect { fn is_identifier_start(&self, ch: char) -> bool { // See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers // We don't support non-latin "letters" currently. - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@' + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ch == '_' + || ch == '#' + || ch == '@' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '@' || ch == '$' || ch == '#' diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index a4aaafe6b..6581195b8 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -20,15 +20,15 @@ impl Dialect for MySqlDialect { // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. // We don't yet support identifiers beginning with numbers, as that // makes it hard to distinguish numeric literals. - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) || ch == '_' || ch == '$' - || (ch >= '\u{0080}' && ch <= '\u{ffff}') + || ('\u{0080}'..='\u{ffff}').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') + self.is_identifier_start(ch) || ('0'..='9').contains(&ch) } fn is_delimited_identifier_start(&self, ch: char) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 1c11d8a37..0c2eb99f0 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -20,13 +20,13 @@ impl Dialect for PostgreSqlDialect { // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // We don't yet support identifiers beginning with "letters with // diacritical marks and non-Latin letters" - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '$' || ch == '_' } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 22fd55fa5..93db95692 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -18,13 +18,13 @@ pub struct SnowflakeDialect; impl Dialect for SnowflakeDialect { // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '$' || ch == '_' } diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 16ec66ac2..4ce2f834b 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -25,14 +25,14 @@ impl Dialect for SQLiteDialect { fn is_identifier_start(&self, ch: char) -> bool { // See https://www.sqlite.org/draft/tokenreq.html - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) || ch == '_' || ch == '$' - || (ch >= '\u{007f}' && ch <= '\u{ffff}') + || ('\u{007f}'..='\u{ffff}').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') + self.is_identifier_start(ch) || ('0'..='9').contains(&ch) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5eee7cc40..3e043a873 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -365,7 +365,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the first char let s = self.tokenize_word(ch, chars); - if s.chars().all(|x| x >= '0' && x <= '9' || x == '.') { + if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| { matches!(ch, '0'..='9' | '.') }); From 349d75879eb6b321f444f1b66c86db5f540337c1 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 4 Feb 2021 12:50:46 -0500 Subject: [PATCH 97/97] Merge from main --- src/ast/mod.rs | 6 ------ src/parser.rs | 10 ---------- src/tokenizer.rs | 4 ++-- tests/sqlparser_common.rs | 12 ------------ 4 files changed, 2 insertions(+), 30 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c30049ad5..1999451d5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -675,11 +675,6 @@ pub enum Statement { /// A SQL query that specifies what to explain statement: Box, }, - /// ANALYZE - Analyze { - /// Name of table - table_name: ObjectName, - }, } impl fmt::Display for Statement { @@ -705,7 +700,6 @@ impl fmt::Display for Statement { write!(f, "{}", statement) } - Statement::Analyze { table_name } => write!(f, "ANALYZE TABLE {}", table_name), Statement::Query(s) => write!(f, "{}", s), Statement::Directory { overwrite, diff --git a/src/parser.rs b/src/parser.rs index 9782dd5b2..7a0b23101 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -139,7 +139,6 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } - Keyword::ANALYZE => Ok(self.parse_analyze()?), Keyword::TRUNCATE => Ok(self.parse_truncate()?), Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), @@ -2102,15 +2101,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_analyze(&mut self) -> Result { - // ANALYZE TABLE table_name - self.expect_keyword(Keyword::TABLE)?; - - let table_name = self.parse_object_name()?; - - Ok(Statement::Analyze { table_name }) - } - /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d8284719c..fd33f9589 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -785,7 +785,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -817,7 +817,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7c587d649..ab4aa457b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1639,18 +1639,6 @@ fn parse_explain_analyze_with_simple_select() { ); } -#[test] -fn parse_simple_analyze() { - let sql = "ANALYZE TABLE t"; - let stmt = verified_stmt(sql); - assert_eq!( - stmt, - Statement::Analyze { - table_name: ObjectName(vec![Ident::new("t")]) - } - ); -} - #[test] fn parse_named_argument_function() { let sql = "SELECT FUN(a => '1', b => '2') FROM foo";