From b033cb60d3994a8cfe505a7a4daa24727bfb59c9 Mon Sep 17 00:00:00 2001 From: Po Date: Thu, 4 Mar 2021 15:59:24 +0800 Subject: [PATCH 1/9] Add support: USE `test` SET NAMES UTF8 SELECT @x, `@test` FROM `v`.`t` SAVEPOINT `a6656450_c8fe_4cbc_a75c_37d60d266629` ROLLBACK TO SAVEPOINT `0105c3d0_2750_4d45_be35_f17952cbf389` RELEASE SAVEPOINT `0105c3d0_2750_4d45_be35_f17952cbf389` SET SESSION TRANSACTION MODE --- src/ast/mod.rs | 54 ++++++++++++++++++++++++++++----- src/dialect/keywords.rs | 2 ++ src/dialect/mysql.rs | 1 + src/parser.rs | 65 +++++++++++++++++++++++++++++++++++----- tests/sqlparser_mysql.rs | 35 ++++++++++++++++++++++ 5 files changed, 142 insertions(+), 15 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 94951693c..720c2d575 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -34,6 +34,7 @@ pub use self::query::{ Values, With, }; pub use self::value::{DateTimeField, Value}; +use std::option::Option::Some; struct DisplaySeparated<'a, T> where @@ -640,6 +641,10 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. ShowVariable { variable: Vec }, + /// USE + /// + /// Note: this is a MySQL-specific statement. + UseDatabase { variable: Ident }, /// SHOW COLUMNS /// /// Note: this is a MySQL-specific statement. @@ -651,12 +656,27 @@ pub enum Statement { }, /// `{ BEGIN [ TRANSACTION | WORK ] | START TRANSACTION } ...` StartTransaction { modes: Vec }, - /// `SET TRANSACTION ...` - SetTransaction { modes: Vec }, + /// `SET [SESSION] TRANSACTION ...` + SetTransaction { + session: bool, + modes: Vec + }, + /// SET NAMES + /// + /// Note: this is a MySQL-specific statement. + SetNames { variable: Ident }, /// `COMMIT [ TRANSACTION | WORK ] [ AND [ NO ] CHAIN ]` Commit { chain: bool }, + /// `SAVEPOINT identifier` + Savepoint { variable: Ident }, /// `ROLLBACK [ TRANSACTION | WORK ] [ AND [ NO ] CHAIN ]` - Rollback { chain: bool }, + /// `ROLLBACK [WORK] TO [SAVEPOINT] identifier` + Rollback { + chain: bool, + savepoint: Option + }, + /// RELEASE SAVEPOINT identifier + Release { variable: Ident }, /// CREATE SCHEMA CreateSchema { schema_name: ObjectName, @@ -1151,6 +1171,10 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::UseDatabase { variable } => { + write!(f, "USE {}", variable)?; + Ok(()) + } Statement::ShowColumns { extended, full, @@ -1176,18 +1200,34 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::SetTransaction { modes } => { - write!(f, "SET TRANSACTION")?; + Statement::SetTransaction { session, modes } => { + write!(f, "SET{} TRANSACTION", if *session { " SESSION" } else { "" })?; if !modes.is_empty() { write!(f, " {}", display_comma_separated(modes))?; } Ok(()) } + Statement::SetNames { variable } => { + write!(f, "SET NAMES {}", variable)?; + Ok(()) + } Statement::Commit { chain } => { write!(f, "COMMIT{}", if *chain { " AND CHAIN" } else { "" },) } - Statement::Rollback { chain } => { - write!(f, "ROLLBACK{}", if *chain { " AND CHAIN" } else { "" },) + Statement::Savepoint { variable } => { + write!(f, "SAVEPOINT {}", variable)?; + Ok(()) + } + Statement::Rollback { chain, savepoint } => { + if let Some(savepoint) = savepoint { + write!(f, "ROLLBACK TO SAVEPOINT {}", savepoint) + } else { + write!(f, "ROLLBACK{}", if *chain { " AND CHAIN" } else { "" },) + } + } + Statement::Release { variable } => { + write!(f, "RELEASE SAVEPOINT {}", variable)?; + Ok(()) } Statement::CreateSchema { schema_name, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 1d2690fc0..422dcfea8 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -291,6 +291,7 @@ define_keywords!( MONTH, MSCK, MULTISET, + NAMES, NATIONAL, NATURAL, NCHAR, @@ -465,6 +466,7 @@ define_keywords!( UNNEST, UPDATE, UPPER, + USE, USER, USING, UUID, diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 6581195b8..d6095262c 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -24,6 +24,7 @@ impl Dialect for MySqlDialect { || ('A'..='Z').contains(&ch) || ch == '_' || ch == '$' + || ch == '@' || ('\u{0080}'..='\u{ffff}').contains(&ch) } diff --git a/src/parser.rs b/src/parser.rs index eab2ece12..738ba1b0c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -58,6 +58,7 @@ pub enum IsLateral { use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; +use crate::dialect::keywords::{NAMES, TRANSACTION}; impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -150,13 +151,16 @@ impl<'a> Parser<'a> { Keyword::COPY => Ok(self.parse_copy()?), Keyword::SET => Ok(self.parse_set()?), Keyword::SHOW => Ok(self.parse_show()?), + Keyword::USE => Ok(self.parse_use()?), Keyword::START => Ok(self.parse_start_transaction()?), // `BEGIN` is a nonstandard but common alias for the // standard `START TRANSACTION` statement. It is supported // by at least PostgreSQL and MySQL. Keyword::BEGIN => Ok(self.parse_begin()?), Keyword::COMMIT => Ok(self.parse_commit()?), + Keyword::SAVEPOINT => Ok(self.parse_savepoint()?), Keyword::ROLLBACK => Ok(self.parse_rollback()?), + Keyword::RELEASE => Ok(self.parse_release()?), Keyword::ASSERT => Ok(self.parse_assert()?), // `PREPARE`, `EXECUTE` and `DEALLOCATE` are Postgres-specific // syntaxes. They are used for Postgres prepared statement. @@ -2433,10 +2437,15 @@ impl<'a> Parser<'a> { value: values, }); } - } else if variable.value == "TRANSACTION" && modifier.is_none() { + } else if variable.value.to_uppercase() == TRANSACTION { Ok(Statement::SetTransaction { + session: !modifier.is_none(), modes: self.parse_transaction_modes()?, }) + } else if variable.value.to_uppercase() == NAMES && modifier.is_none() { + Ok(Statement::SetNames { + variable: self.parse_identifier()?, + }) } else { self.expected("equals sign or TO", self.peek_token()) } @@ -2461,6 +2470,12 @@ impl<'a> Parser<'a> { } } + pub fn parse_use(&mut self) -> Result { + Ok(Statement::UseDatabase { + variable: self.parse_identifier()?, + }) + } + fn parse_show_columns(&mut self) -> Result { let extended = self.parse_keyword(Keyword::EXTENDED); let full = self.parse_keyword(Keyword::FULL); @@ -3016,27 +3031,61 @@ impl<'a> Parser<'a> { pub fn parse_commit(&mut self) -> Result { Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, + chain: self.parse_commit_chain()?, }) } - pub fn parse_rollback(&mut self) -> Result { - Ok(Statement::Rollback { - chain: self.parse_commit_rollback_chain()?, + pub fn parse_commit_chain(&mut self) -> Result { + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + if self.parse_keyword(Keyword::AND) { + let chain = !self.parse_keyword(Keyword::NO); + self.expect_keyword(Keyword::CHAIN)?; + Ok(chain) + } else { + Ok(false) + } + } + + pub fn parse_savepoint(&mut self) -> Result { + Ok(Statement::Savepoint { + variable: self.parse_identifier()?, }) } - pub fn parse_commit_rollback_chain(&mut self) -> Result { + pub fn parse_rollback(&mut self) -> Result { + self.parse_rollback_chain_or_to() + } + + pub fn parse_rollback_chain_or_to(&mut self) -> Result { let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); if self.parse_keyword(Keyword::AND) { let chain = !self.parse_keyword(Keyword::NO); self.expect_keyword(Keyword::CHAIN)?; - Ok(chain) + Ok(Statement::Rollback { + chain: chain, + savepoint: None + }) + } else if self.parse_keyword(Keyword::TO) { + let _ = self.parse_one_of_keywords(&[Keyword::SAVEPOINT]); + Ok(Statement::Rollback { + chain: false, + savepoint: Some(self.parse_identifier()?) + }) } else { - Ok(false) + Ok(Statement::Rollback { + chain: false, + savepoint: None + }) } } + pub fn parse_release(&mut self) -> Result { + self.expect_keyword(Keyword::SAVEPOINT)?; + Ok(Statement::Release { + variable: self.parse_identifier()?, + }) + } + fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier()?; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a8f85584c..a6f7f3e4f 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -27,6 +27,41 @@ fn parse_identifiers() { mysql().verified_stmt("SELECT $a$, àà"); } +#[test] +fn parse_use() { + mysql().verified_stmt("USE `test`"); +} + +#[test] +fn parse_set_names() { + mysql().verified_stmt("SET NAMES UTF8"); +} + +#[test] +fn parse_identifiers_with_at() { + mysql().verified_stmt("SELECT @x, `@test` FROM `v`.`t`"); +} + +#[test] +fn parse_savepoint() { + mysql().verified_stmt("SAVEPOINT `a6656450_c8fe_4cbc_a75c_37d60d266629`"); +} + +#[test] +fn parse_rollback_to() { + mysql().verified_stmt("ROLLBACK TO SAVEPOINT `0105c3d0_2750_4d45_be35_f17952cbf389`"); +} + +#[test] +fn parse_release() { + mysql().verified_stmt("RELEASE SAVEPOINT `0105c3d0_2750_4d45_be35_f17952cbf389`"); +} + +#[test] +fn parse_set_session_transaction() { + mysql().verified_stmt("SET SESSION TRANSACTION READ ONLY"); +} + #[test] fn parse_show_columns() { let table_name = ObjectName(vec![Ident::new("mytable")]); From ffd21563b8fc0a057650ba52c3027c2e3b2b0b28 Mon Sep 17 00:00:00 2001 From: Po Date: Thu, 4 Mar 2021 16:18:12 +0800 Subject: [PATCH 2/9] Fixed code style Fixed test code error --- src/ast/mod.rs | 10 +++++++--- src/parser.rs | 10 +++++----- tests/sqlparser_common.rs | 6 +++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 720c2d575..5573341b3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -659,7 +659,7 @@ pub enum Statement { /// `SET [SESSION] TRANSACTION ...` SetTransaction { session: bool, - modes: Vec + modes: Vec, }, /// SET NAMES /// @@ -673,7 +673,7 @@ pub enum Statement { /// `ROLLBACK [WORK] TO [SAVEPOINT] identifier` Rollback { chain: bool, - savepoint: Option + savepoint: Option, }, /// RELEASE SAVEPOINT identifier Release { variable: Ident }, @@ -1201,7 +1201,11 @@ impl fmt::Display for Statement { Ok(()) } Statement::SetTransaction { session, modes } => { - write!(f, "SET{} TRANSACTION", if *session { " SESSION" } else { "" })?; + write!( + f, + "SET{} TRANSACTION", + if *session { " SESSION" } else { "" } + )?; if !modes.is_empty() { write!(f, " {}", display_comma_separated(modes))?; } diff --git a/src/parser.rs b/src/parser.rs index 738ba1b0c..dc4dc8698 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,8 +57,8 @@ pub enum IsLateral { } use crate::ast::Statement::CreateVirtualTable; -use IsLateral::*; use crate::dialect::keywords::{NAMES, TRANSACTION}; +use IsLateral::*; impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -3062,19 +3062,19 @@ impl<'a> Parser<'a> { let chain = !self.parse_keyword(Keyword::NO); self.expect_keyword(Keyword::CHAIN)?; Ok(Statement::Rollback { - chain: chain, - savepoint: None + chain, + savepoint: None, }) } else if self.parse_keyword(Keyword::TO) { let _ = self.parse_one_of_keywords(&[Keyword::SAVEPOINT]); Ok(Statement::Rollback { chain: false, - savepoint: Some(self.parse_identifier()?) + savepoint: Some(self.parse_identifier()?), }) } else { Ok(Statement::Rollback { chain: false, - savepoint: None + savepoint: None, }) } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index fbf2faf9b..15893da2a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3216,7 +3216,7 @@ fn parse_set_transaction() { // TRANSACTION, so no need to duplicate the tests here. We just do a quick // sanity check. match verified_stmt("SET TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { - Statement::SetTransaction { modes } => assert_eq!( + Statement::SetTransaction { modes, .. } => assert_eq!( modes, vec![ TransactionMode::AccessMode(TransactionAccessMode::ReadOnly), @@ -3252,12 +3252,12 @@ fn parse_commit() { #[test] fn parse_rollback() { match verified_stmt("ROLLBACK") { - Statement::Rollback { chain: false } => (), + Statement::Rollback { chain: false, .. } => (), _ => unreachable!(), } match verified_stmt("ROLLBACK AND CHAIN") { - Statement::Rollback { chain: true } => (), + Statement::Rollback { chain: true, .. } => (), _ => unreachable!(), } From 3d136dd9c17a07420a6a9c497c4570801bd83678 Mon Sep 17 00:00:00 2001 From: Po Date: Thu, 4 Mar 2021 16:46:40 +0800 Subject: [PATCH 3/9] Fixed hint check --- src/parser.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index dc4dc8698..ef31d7257 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1821,11 +1821,11 @@ impl<'a> Parser<'a> { /// Parse a tab separated values in /// COPY payload fn parse_tsv(&mut self) -> Result>, ParserError> { - let values = self.parse_tab_value()?; + let values = self.parse_tab_value(); Ok(values) } - fn parse_tab_value(&mut self) -> Result>, ParserError> { + fn parse_tab_value(&mut self) -> Vec> { let mut values = vec![]; let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { @@ -1840,7 +1840,7 @@ impl<'a> Parser<'a> { } Token::Backslash => { if self.consume_token(&Token::Period) { - return Ok(values); + return values; } if let Token::Word(w) = self.next_token() { if w.value == "N" { @@ -1853,7 +1853,7 @@ impl<'a> Parser<'a> { } } } - Ok(values) + values } /// Parse a literal value (numbers, strings, date/time, booleans) @@ -2439,7 +2439,7 @@ impl<'a> Parser<'a> { } } else if variable.value.to_uppercase() == TRANSACTION { Ok(Statement::SetTransaction { - session: !modifier.is_none(), + session: modifier.is_some(), modes: self.parse_transaction_modes()?, }) } else if variable.value.to_uppercase() == NAMES && modifier.is_none() { From 9427747005a342f71931a47af6a016108f78f454 Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 9 Mar 2021 15:22:12 +0800 Subject: [PATCH 4/9] Add support: update limit --- src/ast/mod.rs | 6 ++++++ src/parser.rs | 6 ++++++ tests/sqlparser_mysql.rs | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5573341b3..a10bfee67 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -550,6 +550,8 @@ pub enum Statement { assignments: Vec, /// WHERE selection: Option, + /// `LIMIT { | ALL }` + limit: Option, }, /// DELETE Delete { @@ -889,6 +891,7 @@ impl fmt::Display for Statement { table_name, assignments, selection, + limit, } => { write!(f, "UPDATE {}", table_name)?; if !assignments.is_empty() { @@ -897,6 +900,9 @@ impl fmt::Display for Statement { if let Some(selection) = selection { write!(f, " WHERE {}", selection)?; } + if let Some(ref limit) = limit { + write!(f, " LIMIT {}", limit)?; + } Ok(()) } Statement::Delete { diff --git a/src/parser.rs b/src/parser.rs index ef31d7257..c5b92f003 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2815,10 +2815,16 @@ impl<'a> Parser<'a> { } else { None }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; Ok(Statement::Update { table_name, assignments, selection, + limit, }) } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a6f7f3e4f..2b4e4881c 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -27,6 +27,11 @@ fn parse_identifiers() { mysql().verified_stmt("SELECT $a$, àà"); } +#[test] +fn parse_update_limit() { + mysql().verified_stmt("UPDATE t SET c = 1 WHERE b = 2 LIMIT 1"); +} + #[test] fn parse_use() { mysql().verified_stmt("USE `test`"); From fa2cb4870a381423349888ba52700e92e7c36862 Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 23 Mar 2021 19:54:46 +0800 Subject: [PATCH 5/9] Add support: select limit 1, 100 and parameter mark --- src/ast/mod.rs | 7 ++++++- src/dialect/mysql.rs | 1 + src/parser.rs | 10 +++++++--- src/tokenizer.rs | 21 ++++++++++++++++++++- tests/sqlparser_mysql.rs | 8 ++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a10bfee67..2c0711b08 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -254,6 +254,8 @@ pub enum Expr { Subquery(Box), /// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)` ListAgg(ListAgg), + /// `?` Parameter Mark + ParameterMark(u32), } impl fmt::Display for Expr { @@ -355,7 +357,10 @@ impl fmt::Display for Expr { } write!(f, ")") - } + }, + Expr::ParameterMark(_) => { + write!(f, "?") + }, } } } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index d6095262c..4309bb284 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -25,6 +25,7 @@ impl Dialect for MySqlDialect { || ch == '_' || ch == '$' || ch == '@' + || ch == '?' || ('\u{0080}'..='\u{ffff}').contains(&ch) } diff --git a/src/parser.rs b/src/parser.rs index c5b92f003..0ba1ee8f6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -447,6 +447,10 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(expr) } + + Token::ParameterMark(index) => { + Ok(Expr::ParameterMark(index)) + } unexpected => self.expected("an expression:", unexpected), }?; @@ -2173,7 +2177,7 @@ impl<'a> Parser<'a> { None }; - let offset = if self.parse_keyword(Keyword::OFFSET) { + let offset = if self.consume_token(&Token::Comma) || self.parse_keyword(Keyword::OFFSET) { Some(self.parse_offset()?) } else { None @@ -2930,13 +2934,13 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::ALL) { Ok(None) } else { - Ok(Some(Expr::Value(self.parse_number_value()?))) + Ok(Some(self.parse_expr()?)) } } /// Parse an OFFSET clause pub fn parse_offset(&mut self) -> Result { - let value = Expr::Value(self.parse_number_value()?); + let value = self.parse_expr()?; let rows = if self.parse_keyword(Keyword::ROW) { OffsetRows::Row } else if self.parse_keyword(Keyword::ROWS) { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d82810528..57705047d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -124,6 +124,8 @@ pub enum Token { PGSquareRoot, /// `||/` , a cube root math operator in PostgreSQL PGCubeRoot, + /// `?` Parameter Mark + ParameterMark(u32), } impl fmt::Display for Token { @@ -176,6 +178,7 @@ impl fmt::Display for Token { Token::ShiftRight => f.write_str(">>"), Token::PGSquareRoot => f.write_str("|/"), Token::PGCubeRoot => f.write_str("||/"), + Token::ParameterMark(_) => f.write_str("?"), } } } @@ -273,6 +276,7 @@ pub struct Tokenizer<'a> { pub query: String, pub line: u64, pub col: u64, + pub parameter_mark_index: u32, } impl<'a> Tokenizer<'a> { @@ -283,6 +287,7 @@ impl<'a> Tokenizer<'a> { query: query.to_string(), line: 1, col: 1, + parameter_mark_index: 1, } } @@ -307,6 +312,10 @@ impl<'a> Tokenizer<'a> { _ => self.col += 1, } + match &token { + Token::ParameterMark(_) => self.parameter_mark_index += 1, + _ => {} + } tokens.push(token); } Ok(tokens) @@ -372,8 +381,17 @@ impl<'a> Tokenizer<'a> { let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); s += s2.as_str(); return Ok(Some(Token::Number(s, false))); + } else if s.eq("?") { + return Ok(Some(Token::ParameterMark(self.parameter_mark_index))); + } + if s.starts_with("?") { + self.tokenizer_error( + format!("Expected quoted '{}'.", s) + .as_str(), + ) + } else { + Ok(Some(Token::make_word(&s, None))) } - Ok(Some(Token::make_word(&s, None))) } // string '\'' => { @@ -538,6 +556,7 @@ impl<'a> Tokenizer<'a> { '~' => self.consume_and_return(chars, Token::Tilde), '#' => self.consume_and_return(chars, Token::Sharp), '@' => self.consume_and_return(chars, Token::AtSign), + '?' => self.consume_and_return(chars, Token::ParameterMark(self.parameter_mark_index)), other => self.consume_and_return(chars, Token::Char(other)), }, None => Ok(None), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2b4e4881c..ad5cafb12 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -27,6 +27,14 @@ fn parse_identifiers() { mysql().verified_stmt("SELECT $a$, àà"); } +#[test] +fn parse_parameter_mark() { + mysql().verified_stmt("UPDATE t SET c = 1 WHERE b = ? LIMIT ?"); + let actual = mysql().parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? , ?").unwrap().pop().unwrap(); + let expected = mysql().parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? OFFSET ?").unwrap().pop().unwrap(); + assert_eq!(actual.to_string(), expected.to_string()) +} + #[test] fn parse_update_limit() { mysql().verified_stmt("UPDATE t SET c = 1 WHERE b = 2 LIMIT 1"); From f7e33bc9fe35d5dc218bbb17648f38ed846f6f9e Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 23 Mar 2021 20:10:07 +0800 Subject: [PATCH 6/9] merge from main --- CHANGELOG.md | 5 +++ Cargo.toml | 2 +- src/ast/mod.rs | 9 +++++ src/ast/operator.rs | 4 +++ src/dialect/keywords.rs | 2 ++ src/parser.rs | 23 ++++++++++++ src/tokenizer.rs | 1 + tests/sqlparser_common.rs | 75 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 120 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0701b23dd..bcf1ab6a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Given that the parser produces a typed AST, any changes to the AST will technica Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. +## [0.8.0] 2020-03-21 + +### Added +* Add support for `TRY_CAST` syntax (#299) - Thanks @seddonm1! + ## [0.8.0] 2020-02-20 ### Added diff --git a/Cargo.toml b/Cargo.toml index 8d4ce5b0d..e0ee25ff0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.8.1-alpha.0" +version = "0.9.1-alpha.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2c0711b08..33e381edd 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -202,6 +202,12 @@ pub enum Expr { expr: Box, data_type: DataType, }, + /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` + // this differs from CAST in the choice of how to implement invalid conversions + TryCast { + expr: Box, + data_type: DataType, + }, /// EXTRACT(DateTimeField FROM ) Extract { field: DateTimeField, @@ -312,6 +318,7 @@ impl fmt::Display for Expr { } } Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type), + Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({} AS {})", expr, data_type), Expr::Extract { field, expr } => write!(f, "EXTRACT({} FROM {})", field, expr), Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation), Expr::Nested(ast) => write!(f, "({})", ast), @@ -1594,6 +1601,7 @@ impl fmt::Display for TransactionIsolationLevel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ShowStatementFilter { Like(String), + ILike(String), Where(Expr), } @@ -1602,6 +1610,7 @@ impl fmt::Display for ShowStatementFilter { use ShowStatementFilter::*; match self { Like(pattern) => write!(f, "LIKE '{}'", value::escape_single_quote_string(pattern)), + ILike(pattern) => write!(f, "ILIKE {}", value::escape_single_quote_string(pattern)), Where(expr) => write!(f, "WHERE {}", expr), } } diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 732c81232..ff978fb97 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -72,6 +72,8 @@ pub enum BinaryOperator { Or, Like, NotLike, + ILike, + NotILike, BitwiseOr, BitwiseAnd, BitwiseXor, @@ -100,6 +102,8 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Or => "OR", BinaryOperator::Like => "LIKE", BinaryOperator::NotLike => "NOT LIKE", + BinaryOperator::ILike => "ILIKE", + BinaryOperator::NotILike => "NOT ILIKE", BinaryOperator::BitwiseOr => "|", BinaryOperator::BitwiseAnd => "&", BinaryOperator::BitwiseXor => "^", diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 422dcfea8..5df1736a3 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -236,6 +236,7 @@ define_keywords!( IDENTITY, IF, IGNORE, + ILIKE, IN, INDEX, INDICATOR, @@ -457,6 +458,7 @@ define_keywords!( TRIM_ARRAY, TRUE, TRUNCATE, + TRY_CAST, UESCAPE, UNBOUNDED, UNCOMMITTED, diff --git a/src/parser.rs b/src/parser.rs index 0ba1ee8f6..ff76461ca 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -356,6 +356,7 @@ impl<'a> Parser<'a> { } Keyword::CASE => self.parse_case_expr(), Keyword::CAST => self.parse_cast_expr(), + Keyword::TRY_CAST => self.parse_try_cast_expr(), Keyword::EXISTS => self.parse_exists_expr(), Keyword::EXTRACT => self.parse_extract_expr(), Keyword::SUBSTRING => self.parse_substring_expr(), @@ -599,6 +600,19 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` + pub fn parse_try_cast_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::TryCast { + expr: Box::new(expr), + data_type, + }) + } + /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. pub fn parse_exists_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -833,9 +847,12 @@ impl<'a> Parser<'a> { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), Keyword::LIKE => Some(BinaryOperator::Like), + Keyword::ILIKE => Some(BinaryOperator::ILike), Keyword::NOT => { if self.parse_keyword(Keyword::LIKE) { Some(BinaryOperator::NotLike) + } else if self.parse_keyword(Keyword::ILIKE) { + Some(BinaryOperator::NotILike) } else { None } @@ -969,12 +986,14 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), _ => Ok(0), }, Token::Word(w) if w.keyword == Keyword::IS => Ok(17), Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), Token::Eq | Token::Lt | Token::LtEq @@ -2503,6 +2522,10 @@ impl<'a> Parser<'a> { Ok(Some(ShowStatementFilter::Like( self.parse_literal_string()?, ))) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Some(ShowStatementFilter::ILike( + self.parse_literal_string()?, + ))) } else if self.parse_keyword(Keyword::WHERE) { Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) } else { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 57705047d..c4c45512f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -645,6 +645,7 @@ impl<'a> Tokenizer<'a> { } } + #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, chars: &mut Peekable>, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 15893da2a..042e1ea70 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -687,6 +687,51 @@ fn parse_like() { chk(true); } +#[test] +fn parse_ilike() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE name {}ILIKE '%a'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("name"))), + op: if negated { + BinaryOperator::NotILike + } else { + BinaryOperator::ILike + }, + right: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + }, + select.selection.unwrap() + ); + + // This statement tests that LIKE and NOT LIKE have the same precedence. + // This was previously mishandled (#81). + let sql = &format!( + "SELECT * FROM customers WHERE name {}ILIKE '%a' IS NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::IsNull(Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("name"))), + op: if negated { + BinaryOperator::NotILike + } else { + BinaryOperator::ILike + }, + right: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + })), + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + #[test] fn parse_in_list() { fn chk(negated: bool) { @@ -981,6 +1026,35 @@ fn parse_cast() { ); } +#[test] +fn parse_try_cast() { + let sql = "SELECT TRY_CAST(id AS BIGINT) FROM customer"; + let select = verified_only_select(sql); + assert_eq!( + &Expr::TryCast { + expr: Box::new(Expr::Identifier(Ident::new("id"))), + data_type: DataType::BigInt + }, + expr_from_projection(only(&select.projection)) + ); + one_statement_parses_to( + "SELECT TRY_CAST(id AS BIGINT) FROM customer", + "SELECT TRY_CAST(id AS BIGINT) FROM customer", + ); + + verified_stmt("SELECT TRY_CAST(id AS NUMERIC) FROM customer"); + + one_statement_parses_to( + "SELECT TRY_CAST(id AS DEC) FROM customer", + "SELECT TRY_CAST(id AS NUMERIC) FROM customer", + ); + + one_statement_parses_to( + "SELECT TRY_CAST(id AS DECIMAL) FROM customer", + "SELECT TRY_CAST(id AS NUMERIC) FROM customer", + ); +} + #[test] fn parse_extract() { let sql = "SELECT EXTRACT(YEAR FROM d)"; @@ -1224,6 +1298,7 @@ fn parse_assert() { } #[test] +#[allow(clippy::collapsible_match)] fn parse_assert_message() { let sql = "ASSERT (SELECT COUNT(*) FROM my_table) > 0 AS 'No rows in my_table'"; let ast = one_statement_parses_to( From bdc06ce8a30d3630115e1817fe5ceeeb4482052c Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 23 Mar 2021 20:15:09 +0800 Subject: [PATCH 7/9] merge from main --- src/parser.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index ff76461ca..d9f13a183 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1485,7 +1485,7 @@ impl<'a> Parser<'a> { ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; - let like = if self.parse_keyword(Keyword::LIKE) { + let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { self.parse_object_name().ok() } else { None @@ -1833,7 +1833,7 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; self.expect_token(&Token::SemiColon)?; - let values = self.parse_tsv()?; + let values = self.parse_tsv(); Ok(Statement::Copy { table_name, columns, @@ -1843,9 +1843,8 @@ impl<'a> Parser<'a> { /// Parse a tab separated values in /// COPY payload - fn parse_tsv(&mut self) -> Result>, ParserError> { - let values = self.parse_tab_value(); - Ok(values) + fn parse_tsv(&mut self) -> Vec> { + self.parse_tab_value() } fn parse_tab_value(&mut self) -> Vec> { From 2381f85098afd1ea096880045e2307478eef4edb Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 23 Mar 2021 20:30:57 +0800 Subject: [PATCH 8/9] fixed code style and lint --- src/ast/mod.rs | 6 ++---- src/parser.rs | 7 +++---- src/tokenizer.rs | 16 +++++++--------- tests/sqlparser_mysql.rs | 12 ++++++++++-- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 33e381edd..b053c9bc5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -364,10 +364,8 @@ impl fmt::Display for Expr { } write!(f, ")") - }, - Expr::ParameterMark(_) => { - write!(f, "?") - }, + } + Expr::ParameterMark(_) => write!(f, "?"), } } } diff --git a/src/parser.rs b/src/parser.rs index d9f13a183..814b75329 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -449,9 +449,7 @@ impl<'a> Parser<'a> { Ok(expr) } - Token::ParameterMark(index) => { - Ok(Expr::ParameterMark(index)) - } + Token::ParameterMark(index) => Ok(Expr::ParameterMark(index)), unexpected => self.expected("an expression:", unexpected), }?; @@ -2195,7 +2193,8 @@ impl<'a> Parser<'a> { None }; - let offset = if self.consume_token(&Token::Comma) || self.parse_keyword(Keyword::OFFSET) { + let offset = if self.consume_token(&Token::Comma) || self.parse_keyword(Keyword::OFFSET) + { Some(self.parse_offset()?) } else { None diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c4c45512f..73a91e32b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -312,9 +312,8 @@ impl<'a> Tokenizer<'a> { _ => self.col += 1, } - match &token { - Token::ParameterMark(_) => self.parameter_mark_index += 1, - _ => {} + if let Token::ParameterMark(_) = &token { + self.parameter_mark_index += 1 } tokens.push(token); } @@ -384,11 +383,8 @@ impl<'a> Tokenizer<'a> { } else if s.eq("?") { return Ok(Some(Token::ParameterMark(self.parameter_mark_index))); } - if s.starts_with("?") { - self.tokenizer_error( - format!("Expected quoted '{}'.", s) - .as_str(), - ) + if s.starts_with('?') { + self.tokenizer_error(format!("Expected quoted '{}'.", s).as_str(),) } else { Ok(Some(Token::make_word(&s, None))) } @@ -556,7 +552,9 @@ impl<'a> Tokenizer<'a> { '~' => self.consume_and_return(chars, Token::Tilde), '#' => self.consume_and_return(chars, Token::Sharp), '@' => self.consume_and_return(chars, Token::AtSign), - '?' => self.consume_and_return(chars, Token::ParameterMark(self.parameter_mark_index)), + '?' => { + self.consume_and_return(chars, Token::ParameterMark(self.parameter_mark_index)) + }, other => self.consume_and_return(chars, Token::Char(other)), }, None => Ok(None), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ad5cafb12..fc73e7704 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -30,8 +30,16 @@ fn parse_identifiers() { #[test] fn parse_parameter_mark() { mysql().verified_stmt("UPDATE t SET c = 1 WHERE b = ? LIMIT ?"); - let actual = mysql().parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? , ?").unwrap().pop().unwrap(); - let expected = mysql().parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? OFFSET ?").unwrap().pop().unwrap(); + let actual = mysql() + .parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? , ?") + .unwrap() + .pop() + .unwrap(); + let expected = mysql() + .parse_sql_statements("SELECT *, ? FROM t WHERE c = '??' AND b = ? LIMIT ? OFFSET ?") + .unwrap() + .pop() + .unwrap(); assert_eq!(actual.to_string(), expected.to_string()) } From e9f216223c16b056e8e3f10923c4edbd8855d451 Mon Sep 17 00:00:00 2001 From: Po Date: Tue, 23 Mar 2021 20:33:23 +0800 Subject: [PATCH 9/9] fixed code style --- src/tokenizer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 73a91e32b..87002d21c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -384,7 +384,7 @@ impl<'a> Tokenizer<'a> { return Ok(Some(Token::ParameterMark(self.parameter_mark_index))); } if s.starts_with('?') { - self.tokenizer_error(format!("Expected quoted '{}'.", s).as_str(),) + self.tokenizer_error(format!("Expected quoted '{}'.", s).as_str()) } else { Ok(Some(Token::make_word(&s, None))) } @@ -554,7 +554,7 @@ impl<'a> Tokenizer<'a> { '@' => self.consume_and_return(chars, Token::AtSign), '?' => { self.consume_and_return(chars, Token::ParameterMark(self.parameter_mark_index)) - }, + } other => self.consume_and_return(chars, Token::Char(other)), }, None => Ok(None),