diff --git a/examples/cli.rs b/examples/cli.rs index 38b3de841..1c177faaf 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -43,6 +43,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--mysql" => Box::new(MySqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), + "--redshift" => Box::new(RedshiftSqlDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 4d8b3b5bf..1847f2518 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -14,7 +14,7 @@ //! (commonly referred to as Data Definition Language, or DDL) #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, string::String, string::ToString, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; use core::fmt; #[cfg(feature = "serde")] diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 008b099d2..8defd66f3 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -17,11 +17,14 @@ mod hive; mod mssql; mod mysql; mod postgresql; +mod redshift; mod snowflake; mod sqlite; use core::any::{Any, TypeId}; use core::fmt::Debug; +use core::iter::Peekable; +use core::str::Chars; pub use self::ansi::AnsiDialect; pub use self::clickhouse::ClickHouseDialect; @@ -30,6 +33,7 @@ pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; +pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; pub use crate::keywords; @@ -51,6 +55,10 @@ pub trait Dialect: Debug + Any { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '"' } + /// Determine if quoted characters are proper for identifier + fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { + true + } /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid unquoted identifier character diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs new file mode 100644 index 000000000..c85f3dc20 --- /dev/null +++ b/src/dialect/redshift.rs @@ -0,0 +1,55 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; +use core::iter::Peekable; +use core::str::Chars; + +use super::PostgreSqlDialect; + +#[derive(Debug)] +pub struct RedshiftSqlDialect {} + +// In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. +// +// Notable differences: +// 1. Redshift treats brackets `[` and `]` differently. For example, `SQL SELECT a[1][2] FROM b` +// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will +// be a json path +impl Dialect for RedshiftSqlDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' || ch == '[' + } + + /// Determine if quoted characters are proper for identifier + /// It's needed to distinguish treating square brackets as quotes from + /// treating them as json path. If there is identifier then we assume + /// there is no json path. + fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { + chars.next(); + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); + if let Some(&ch) = not_white_chars.peek() { + return self.is_identifier_start(ch); + } + false + } + + fn is_identifier_start(&self, ch: char) -> bool { + // Extends Postgres dialect with sharp + PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' + } + + fn is_identifier_part(&self, ch: char) -> bool { + // Extends Postgres dialect with sharp + PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' + } +} diff --git a/src/test_utils.rs b/src/test_utils.rs index 27eba1408..1a432e47a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -141,6 +141,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), ], } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3f17e09e7..f9a4e40cf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -437,7 +437,12 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier - quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { + quote_start + if self.dialect.is_delimited_identifier_start(ch) + && self + .dialect + .is_proper_identifier_inside_quotes(chars.clone()) => + { chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); let (s, last_char) = parse_quoted_ident(chars, quote_end); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs new file mode 100644 index 000000000..ce818a98d --- /dev/null +++ b/tests/sqlparser_redshift.rs @@ -0,0 +1,112 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +mod test_utils; + +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::RedshiftSqlDialect; + +#[test] +fn test_square_brackets_over_db_schema_table_name() { + let select = redshift().verified_only_select("SELECT [col1] FROM [test_schema].[test_table]"); + assert_eq!( + select.projection[0], + SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "col1".to_string(), + quote_style: Some('[') + })), + ); + assert_eq!( + select.from[0], + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('[') + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('[') + } + ]), + alias: None, + args: vec![], + with_hints: vec![], + }, + joins: vec![], + } + ); +} + +#[test] +fn brackets_over_db_schema_table_name_with_whites_paces() { + match redshift().parse_sql_statements("SELECT [ col1 ] FROM [ test_schema].[ test_table]") { + Ok(statements) => { + assert_eq!(statements.len(), 1); + } + _ => unreachable!(), + } +} + +#[test] +fn test_double_quotes_over_db_schema_table_name() { + let select = + redshift().verified_only_select("SELECT \"col1\" FROM \"test_schema\".\"test_table\""); + assert_eq!( + select.projection[0], + SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "col1".to_string(), + quote_style: Some('"') + })), + ); + assert_eq!( + select.from[0], + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('"') + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('"') + } + ]), + alias: None, + args: vec![], + with_hints: vec![], + }, + joins: vec![], + } + ); +} + +fn redshift() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(RedshiftSqlDialect {})], + } +} + +#[test] +fn test_sharp() { + let sql = "SELECT #_of_values"; + let select = redshift().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("#_of_values"))), + select.projection[0] + ); +}