From 2d6ab2ef1fc7163a0c736725bf700671e1358279 Mon Sep 17 00:00:00 2001 From: Alex <45256796+RPG-Alex@users.noreply.github.com> Date: Wed, 15 Oct 2025 13:50:12 +0000 Subject: [PATCH 01/11] created Comment enum and refactored Whitespace enum and cascading changes --- src/ast/ddl.rs | 12 +- src/parser/mod.rs | 25 +++- src/tokenizer.rs | 238 +++++++++++++++++++++++++---------- tests/sqlparser_mysql.rs | 1 + tests/sqlparser_snowflake.rs | 8 +- 5 files changed, 204 insertions(+), 80 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 4a8678e44..e67bf9757 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -44,7 +44,7 @@ use crate::ast::{ }; use crate::display_utils::{DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; use crate::keywords::Keyword; -use crate::tokenizer::{Span, Token}; +use crate::tokenizer::{Comment, Span, Token}; /// Index column type. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1202,6 +1202,8 @@ pub struct ColumnDef { pub name: Ident, pub data_type: DataType, pub options: Vec, + /// Leading comment for the column. + pub leading_comment: Option, } impl fmt::Display for ColumnDef { @@ -2281,6 +2283,8 @@ pub struct CreateTable { /// Snowflake "REQUIRE USER" clause for dybamic tables /// pub require_user: bool, + /// Leading comment for the table. + pub leading_comment: Option, } impl fmt::Display for CreateTable { @@ -3510,10 +3514,16 @@ pub struct AlterTable { pub iceberg: bool, /// Token that represents the end of the statement (semicolon or EOF) pub end_token: AttachedToken, + /// Leading comment which appears before the `ALTER` keyword + pub leading_comment: Option, } impl fmt::Display for AlterTable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(comment) = &self.leading_comment { + write!(f, "{comment}\n")?; + } + if self.iceberg { write!(f, "ALTER ICEBERG TABLE ")?; } else { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ef583dd37..7ddaa52db 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -539,7 +539,17 @@ impl<'a> Parser<'a> { return statement; } - let next_token = self.next_token(); + let mut next_token = self.next_token(); + let leading_comment: Option = if let Token::LeadingComment(_) = &next_token.token { + let Token::LeadingComment(comment) = next_token.token else { + unreachable!() + }; + next_token = self.next_token(); + Some(comment) + } else { + None + }; + match &next_token.token { Token::Word(w) => match w.keyword { Keyword::KILL => self.parse_kill(), @@ -591,7 +601,7 @@ impl<'a> Parser<'a> { Keyword::REPLACE => self.parse_replace(), Keyword::UNCACHE => self.parse_uncache_table(), Keyword::UPDATE => self.parse_update(), - Keyword::ALTER => self.parse_alter(), + Keyword::ALTER => self.parse_alter(leading_comment), Keyword::CALL => self.parse_call(), Keyword::COPY => self.parse_copy(), Keyword::OPEN => { @@ -7878,6 +7888,8 @@ impl<'a> Parser<'a> { } pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { + todo!("Add parsing for the Leading comment of the column, if any."); + let mut columns = vec![]; let mut constraints = vec![]; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { @@ -9349,7 +9361,7 @@ impl<'a> Parser<'a> { } } - pub fn parse_alter(&mut self) -> Result { + pub fn parse_alter(&mut self, leading_comment: Option) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, Keyword::TYPE, @@ -9370,10 +9382,10 @@ impl<'a> Parser<'a> { } Keyword::VIEW => self.parse_alter_view(), Keyword::TYPE => self.parse_alter_type(), - Keyword::TABLE => self.parse_alter_table(false), + Keyword::TABLE => self.parse_alter_table(leading_comment, false), Keyword::ICEBERG => { self.expect_keyword(Keyword::TABLE)?; - self.parse_alter_table(true) + self.parse_alter_table(leading_comment, true) } Keyword::INDEX => { let index_name = self.parse_object_name(false)?; @@ -9403,7 +9415,7 @@ impl<'a> Parser<'a> { } /// Parse a [Statement::AlterTable] - pub fn parse_alter_table(&mut self, iceberg: bool) -> Result { + pub fn parse_alter_table(&mut self, leading_comment: Option, iceberg: bool) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name(false)?; @@ -9438,6 +9450,7 @@ impl<'a> Parser<'a> { on_cluster, iceberg, end_token: AttachedToken(end_token), + leading_comment } .into()) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1f..fe7ff0763 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -106,6 +106,8 @@ pub enum Token { HexStringLiteral(String), /// Comma Comma, + /// Comment (single line or multi line) that are associated with a statement or relevant sub-portion of a statement + LeadingComment(Comment), /// Whitespace (space, tab, etc) Whitespace(Whitespace), /// Double equals sign `==` @@ -279,6 +281,14 @@ pub enum Token { CustomBinaryOperator(String), } +/// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. +fn dispatch_comment_kind(prev_token: Option<&Token>, comment: Comment) -> Token { + match prev_token { + None | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), + _ => Token::Whitespace(comment.into()), + } +} + impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -304,6 +314,7 @@ impl fmt::Display for Token { Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"), Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""), Token::Comma => f.write_str(","), + Token::LeadingComment(c) => write!(f, "{c}"), Token::Whitespace(ws) => write!(f, "{ws}"), Token::DoubleEq => f.write_str("=="), Token::Spaceship => f.write_str("<=>"), @@ -449,6 +460,29 @@ impl Word { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Comment { + SingleLineComment { comment: String, prefix: String }, + MultiLineComment(String), +} + +impl fmt::Display for Comment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Comment::SingleLineComment { comment, prefix } => write!(f, "{}{}", prefix, comment), + Comment::MultiLineComment(comment) => write!(f, "/*{}*/", comment), + } + } +} + +impl From for Whitespace { + fn from(comment: Comment) -> Self { + Whitespace::InterstitialComment(comment) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -456,8 +490,20 @@ pub enum Whitespace { Space, Newline, Tab, - SingleLineComment { comment: String, prefix: String }, - MultiLineComment(String), + /// A comment which is not positioned before a statement or relevant sub-portion + /// of a statement, but rather appears between other tokens. + /// + /// For example, in the following SQL: + /// + /// ```sql + /// CREATE + /// -- This is an interstitial comment + /// TABLE + /// -- Another + /// -- interstitial comment + /// my_table (id INT); + /// ``` + InterstitialComment(Comment), } impl fmt::Display for Whitespace { @@ -466,8 +512,7 @@ impl fmt::Display for Whitespace { Whitespace::Space => f.write_str(" "), Whitespace::Newline => f.write_str("\n"), Whitespace::Tab => f.write_str("\t"), - Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"), - Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"), + Whitespace::InterstitialComment(comment) => write!(f, "{}", comment), } } } @@ -1332,8 +1377,9 @@ impl<'a> Tokenizer<'a> { if is_comment { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); - return Ok(Some(Token::Whitespace( - Whitespace::SingleLineComment { + return Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { prefix: "--".to_owned(), comment, }, @@ -1358,15 +1404,20 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('*') => { chars.next(); // consume the '*', starting a multi-line comment - self.tokenize_multiline_comment(chars) + Ok(self + .tokenize_multiline_comment(chars)? + .map(|comment| dispatch_comment_kind(prev_token, comment))) } Some('/') if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "//".to_owned(), - comment, - }))) + Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { + prefix: "//".to_owned(), + comment, + }, + ))) } Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { self.consume_and_return(chars, Token::DuckIntDiv) @@ -1568,10 +1619,14 @@ impl<'a> Tokenizer<'a> { { chars.next(); // consume the '#', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "#".to_owned(), - comment, - }))) + + Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { + prefix: "#".to_owned(), + comment, + }, + ))) } '~' => { chars.next(); // consume @@ -2104,7 +2159,7 @@ impl<'a> Tokenizer<'a> { fn tokenize_multiline_comment( &self, chars: &mut State, - ) -> Result, TokenizerError> { + ) -> Result, TokenizerError> { let mut s = String::new(); let mut nested = 1; let supports_nested_comments = self.dialect.supports_nested_comments(); @@ -2121,7 +2176,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '/' nested -= 1; if nested == 0 { - break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); + break Ok(Some(Comment::MultiLineComment(s).into())); } s.push('*'); s.push('/'); @@ -3083,10 +3138,13 @@ mod tests { String::from("0--this is a comment\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\n".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment\n".to_string(), + } + .into(), + ), Token::Number("1".to_string(), false), ], ), @@ -3094,20 +3152,26 @@ mod tests { String::from("0--this is a comment\r1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r1".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment\r1".to_string(), + } + .into(), + ), ], ), ( String::from("0--this is a comment\r\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r\n".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment\r\n".to_string(), + } + .into(), + ), Token::Number("1".to_string(), false), ], ), @@ -3129,25 +3193,43 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "\r".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "\r".to_string(), + } + .into(), + ), Token::Number("0".to_string(), false), ]; compare(expected, tokens); } #[test] - fn tokenize_comment_at_eof() { + fn tokenize_leading_inline_comment_at_eof() { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment".to_string(), - })]; + let expected = vec![Token::LeadingComment( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment".to_string(), + } + .into(), + )]; + compare(expected, tokens); + } + + #[test] + fn tokenize_leading_multiline_comment_at_eof() { + let sql = String::from("/* this is a comment */"); + + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![Token::LeadingComment(Comment::MultiLineComment( + " this is a comment ".to_string(), + ))]; compare(expected, tokens); } @@ -3159,9 +3241,9 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* /comment".to_string(), - )), + Token::Whitespace( + Comment::MultiLineComment("multi-line\n* /comment".to_string()).into(), + ), Token::Number("1".to_string(), false), ]; compare(expected, tokens); @@ -3173,9 +3255,12 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), - )), + Token::Whitespace( + Comment::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), + ) + .into(), + ), Token::Whitespace(Whitespace::Space), Token::Div, Token::Word(Word { @@ -3193,9 +3278,12 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), - )), + Token::Whitespace( + Comment::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), + ) + .into(), + ), Token::Number("1".to_string(), false), ], ); @@ -3206,7 +3294,7 @@ mod tests { Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())), + Token::Whitespace(Comment::MultiLineComment(" a /* b */ c ".to_string()).into()), Token::Number("0".to_string(), false), ], ); @@ -3220,7 +3308,7 @@ mod tests { Token::make_keyword("select"), Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())), + Token::Whitespace(Comment::MultiLineComment("/**/".to_string()).into()), Token::Number("0".to_string(), false), ], ); @@ -3234,9 +3322,9 @@ mod tests { Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "/* nested comment ".to_string(), - )), + Token::Whitespace( + Comment::MultiLineComment("/* nested comment ".to_string()).into(), + ), Token::Mul, Token::Div, Token::Number("0".to_string(), false), @@ -3252,7 +3340,7 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), + Token::Whitespace(Comment::MultiLineComment("* Comment *".to_string()).into()), Token::Whitespace(Whitespace::Newline), ]; compare(expected, tokens); @@ -3936,10 +4024,13 @@ mod tests { vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + } + .into(), + ), ], ); @@ -3963,10 +4054,13 @@ mod tests { vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "'abc'".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "'abc'".to_string(), + } + .into(), + ), ], ); @@ -3976,10 +4070,13 @@ mod tests { vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: " 'abc'".to_string(), + } + .into(), + ), ], ); @@ -3989,10 +4086,13 @@ mod tests { vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "--".to_string(), + comment: "".to_string(), + } + .into(), + ), ], ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e0ddecf32..303813608 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2738,6 +2738,7 @@ fn parse_alter_table_add_column() { location: _, on_cluster: _, end_token: _, + leading_comment: _, }) => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e7a128343..889c1a5d3 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -565,10 +565,10 @@ fn test_snowflake_single_line_tokenize() { Token::make_keyword("CREATE"), Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), - Token::Whitespace(Whitespace::SingleLineComment { + Token::Whitespace(Comment::SingleLineComment { prefix: "#".to_string(), comment: " this is a comment \n".to_string(), - }), + }.into()), Token::make_word("table_1", None), ]; @@ -582,10 +582,10 @@ fn test_snowflake_single_line_tokenize() { Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { + Token::Whitespace(Comment::SingleLineComment { prefix: "//".to_string(), comment: " this is a comment \n".to_string(), - }), + }.into()), Token::make_word("table_1", None), ]; From d88816c5d39cfffde3d2d124455850c6d09e0d15 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Thu, 16 Oct 2025 21:18:46 +0800 Subject: [PATCH 02/11] added parse_column_def support and propogated span and stmt with leading_comment --- src/ast/helpers/stmt_create_table.rs | 6 ++++++ src/ast/spans.rs | 2 ++ src/parser/mod.rs | 11 ++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index fe950c909..45c9f1d81 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -32,6 +32,7 @@ use crate::ast::{ }; use crate::parser::ParserError; +use crate::tokenizer::Comment; /// Builder for create table statement variant ([1]). /// @@ -115,6 +116,7 @@ pub struct CreateTableBuilder { pub refresh_mode: Option, pub initialize: Option, pub require_user: bool, + pub leading_comment: Option, } impl CreateTableBuilder { @@ -171,6 +173,7 @@ impl CreateTableBuilder { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -484,6 +487,7 @@ impl CreateTableBuilder { refresh_mode: self.refresh_mode, initialize: self.initialize, require_user: self.require_user, + leading_comment: self.leading_comment, } .into() } @@ -548,6 +552,7 @@ impl TryFrom for CreateTableBuilder { refresh_mode, initialize, require_user, + leading_comment, }) => Ok(Self { or_replace, temporary, @@ -600,6 +605,7 @@ impl TryFrom for CreateTableBuilder { refresh_mode, initialize, require_user, + leading_comment, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 5d82c7339..1c149f550 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -553,6 +553,7 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, + leading_comment: _, // Option } = self; union_spans( @@ -572,6 +573,7 @@ impl Spanned for ColumnDef { name, data_type: _, // enum options, + leading_comment: _, } = self; union_spans(core::iter::once(name.span).chain(options.iter().map(|i| i.span()))) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fc7022bbb..123cfd718 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7888,7 +7888,7 @@ impl<'a> Parser<'a> { } pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - todo!("Add parsing for the Leading comment of the column, if any."); + //todo!("Add parsing for the Leading comment of the column, if any."); let mut columns = vec![]; let mut constraints = vec![]; @@ -7952,6 +7952,13 @@ impl<'a> Parser<'a> { } pub fn parse_column_def(&mut self) -> Result { + let leading_comment = match self.peek_token().token { + Token::LeadingComment(_) => { + let Token::LeadingComment(c) = self.next_token().token else { unreachable!() }; + Some(c) + } + _ => None, + }; let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified @@ -7959,6 +7966,7 @@ impl<'a> Parser<'a> { self.parse_data_type()? }; let mut options = vec![]; + loop { if self.parse_keyword(Keyword::CONSTRAINT) { let name = Some(self.parse_identifier()?); @@ -7980,6 +7988,7 @@ impl<'a> Parser<'a> { name: col_name, data_type, options, + leading_comment, }) } From 93fd86d4616a72847d23e56ee663063bbe713ca9 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Thu, 16 Oct 2025 21:39:13 +0800 Subject: [PATCH 03/11] added parse_column_def support and propogated span and stmt with leading_comment --- src/parser/mod.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 123cfd718..6edffb371 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7952,13 +7952,19 @@ impl<'a> Parser<'a> { } pub fn parse_column_def(&mut self) -> Result { - let leading_comment = match self.peek_token().token { - Token::LeadingComment(_) => { - let Token::LeadingComment(c) = self.next_token().token else { unreachable!() }; - Some(c) - } - _ => None, + + let mut next_token = self.next_token(); + let leading_comment: Option = if let Token::LeadingComment(_) = &next_token.token { + let Token::LeadingComment(comment) = next_token.token else { + unreachable!() + }; + next_token = self.next_token(); + Some(comment) + } else { + None }; + + let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified From 853f1315812120b0e9ccde7df0e4429a23ad6ee1 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Fri, 17 Oct 2025 10:33:21 +0800 Subject: [PATCH 04/11] updated existing tests working with Structs, adding default 'None' for leading_comment value --- tests/sqlparser_bigquery.rs | 11 ++++++--- tests/sqlparser_clickhouse.rs | 12 ++++++++++ tests/sqlparser_common.rs | 33 +++++++++++++++++++++---- tests/sqlparser_databricks.rs | 1 + tests/sqlparser_duckdb.rs | 13 +++++++--- tests/sqlparser_mssql.rs | 12 ++++++++-- tests/sqlparser_mysql.rs | 44 ++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 45 ++++++++++++++++++++++++++++------- tests/sqlparser_snowflake.rs | 21 ++++++++++++---- tests/sqlparser_sqlite.rs | 4 ++++ 10 files changed, 170 insertions(+), 26 deletions(-) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 03a0ac813..1ea46105c 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -466,7 +466,8 @@ fn parse_create_table_with_unquoted_hyphen() { vec![ColumnDef { name: Ident::new("x"), data_type: DataType::Int64, - options: vec![] + options: vec![], + leading_comment: None, },], columns ); @@ -519,7 +520,8 @@ fn parse_create_table_with_options() { ), },]) }, - ] + ], + leading_comment: None, }, ColumnDef { name: Ident::new("y"), @@ -534,7 +536,8 @@ fn parse_create_table_with_options() { ) ), },]) - }] + }], + leading_comment: None, }, ], columns @@ -620,6 +623,7 @@ fn parse_nested_data_types() { StructBracketKind::AngleBrackets ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("y"), @@ -634,6 +638,7 @@ fn parse_nested_data_types() { ), ))), options: vec![], + leading_comment: None, }, ] ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 44bfcda42..c6fdb105b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -534,6 +534,7 @@ fn column_def(name: Ident, data_type: DataType) -> ColumnDef { name, data_type, options: vec![], + leading_comment: None, } } @@ -625,6 +626,7 @@ fn parse_create_table_with_nullable() { name: None, option: ColumnOption::Null }], + leading_comment: None, } ] ); @@ -665,6 +667,7 @@ fn parse_create_table_with_nested_data_types() { ) ]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("k"), @@ -683,6 +686,7 @@ fn parse_create_table_with_nested_data_types() { ]) ))), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("l"), @@ -701,6 +705,7 @@ fn parse_create_table_with_nested_data_types() { }, ]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("m"), @@ -709,6 +714,7 @@ fn parse_create_table_with_nested_data_types() { Box::new(DataType::UInt16) ), options: vec![], + leading_comment: None, }, ] ); @@ -740,11 +746,13 @@ fn parse_create_table_with_primary_key() { name: Ident::with_quote('`', "i"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::with_quote('`', "k"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ], columns @@ -839,6 +847,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], })) }], + leading_comment: None, }, ColumnDef { name: Ident::new("b"), @@ -860,6 +869,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], }))) }], + leading_comment: None, }, ColumnDef { name: Ident::new("c"), @@ -868,6 +878,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Ephemeral(None) }], + leading_comment: None, }, ColumnDef { name: Ident::new("d"), @@ -891,6 +902,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], })) }], + leading_comment: None, } ] ) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f807ecfe..89f6b0cb5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3739,6 +3739,7 @@ fn parse_create_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3747,11 +3748,13 @@ fn parse_create_table() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: "constrained".into(), @@ -3788,6 +3791,7 @@ fn parse_create_table() { }), }, ], + leading_comment: None, }, ColumnDef { name: "ref".into(), @@ -3806,6 +3810,7 @@ fn parse_create_table() { characteristics: None, }), }], + leading_comment: None, }, ColumnDef { name: "ref2".into(), @@ -3824,6 +3829,7 @@ fn parse_create_table() { characteristics: None, }), },], + leading_comment: None, }, ] ); @@ -3946,6 +3952,7 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3954,11 +3961,13 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ] ); @@ -4110,7 +4119,8 @@ fn parse_create_table_column_constraint_characteristics() { is_primary: false, characteristics: expected_value } - }] + }], + leading_comment: None, }], "{message}" ) @@ -4219,11 +4229,13 @@ fn parse_create_table_hive_array() { name: Ident::new("name"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("val"), data_type: DataType::Array(expected), options: vec![], + leading_comment: None, }, ], ) @@ -4595,6 +4607,7 @@ fn parse_create_external_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -4603,11 +4616,13 @@ fn parse_create_external_table() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ] ); @@ -4663,6 +4678,7 @@ fn parse_create_or_replace_external_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, },] ); assert!(constraints.is_empty()); @@ -12178,7 +12194,8 @@ fn test_parse_inline_comment() { options: vec![ColumnOptionDef { name: None, option: Comment("comment without equal".to_string()), - }] + }], + leading_comment: None, }] ); assert_eq!( @@ -14868,6 +14885,7 @@ fn parse_create_table_with_enum_types() { Some(8) ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar"), @@ -14889,6 +14907,7 @@ fn parse_create_table_with_enum_types() { Some(16) ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("baz"), @@ -14900,6 +14919,7 @@ fn parse_create_table_with_enum_types() { None ), options: vec![], + leading_comment: None, } ], columns @@ -17282,7 +17302,8 @@ fn parse_invisible_column() { ColumnDef { name: "foo".into(), data_type: DataType::Int(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "bar".into(), @@ -17290,7 +17311,8 @@ fn parse_invisible_column() { options: vec![ColumnOptionDef { name: None, option: ColumnOption::Invisible - }] + }], + leading_comment: None, } ] ); @@ -17313,7 +17335,8 @@ fn parse_invisible_column() { options: vec![ColumnOptionDef { name: None, option: ColumnOption::Invisible - }] + }], + leading_comment: None, }, column_position: None }] diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index e01611b6f..b46db66c4 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -359,6 +359,7 @@ fn data_type_timestamp_ntz() { name: "x".into(), data_type: DataType::TimestampNtz, options: vec![], + leading_comment: None, }] ); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0f8051955..fc7f45607 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -64,6 +64,7 @@ fn test_struct() { name: "s".into(), data_type: struct_type1.clone(), options: vec![], + leading_comment: None, }] ); @@ -78,6 +79,7 @@ fn test_struct() { None )), options: vec![], + leading_comment: None, }] ); @@ -126,6 +128,7 @@ fn test_struct() { None )), options: vec![], + leading_comment: None, }] ); @@ -709,7 +712,8 @@ fn test_duckdb_union_datatype() { field_name: "a".into(), field_type: DataType::Int(None) }]), - options: Default::default() + options: Default::default(), + leading_comment: None, }, ColumnDef { name: "two".into(), @@ -723,7 +727,8 @@ fn test_duckdb_union_datatype() { field_type: DataType::Int(None) } ]), - options: Default::default() + options: Default::default(), + leading_comment: None, }, ColumnDef { name: "nested".into(), @@ -734,7 +739,8 @@ fn test_duckdb_union_datatype() { field_type: DataType::Int(None) }]) }]), - options: Default::default() + options: Default::default(), + leading_comment: None, } ], constraints: Default::default(), @@ -782,6 +788,7 @@ fn test_duckdb_union_datatype() { refresh_mode: None, initialize: None, require_user: Default::default(), + leading_comment: None, }), stmt ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index e11c79f01..690ee3007 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1868,6 +1868,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident { @@ -1877,6 +1878,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident { @@ -1886,6 +1888,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ], constraints: vec![], @@ -1934,6 +1937,7 @@ fn parse_create_table_with_valid_options() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, }) ); } @@ -2059,6 +2063,7 @@ fn parse_create_table_with_identity_column() { data_type: Int(None,), options: column_options, + leading_comment: None, },], constraints: vec![], hive_distribution: HiveDistributionStyle::NONE, @@ -2105,6 +2110,7 @@ fn parse_create_table_with_identity_column() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, }), ); } @@ -2301,7 +2307,8 @@ fn parse_mssql_varbinary_max_length() { name: Ident::new("var_binary_col"), data_type: Varbinary(Some(BinaryLength::Max)), - options: vec![] + options: vec![], + leading_comment: None, },], ); } @@ -2326,7 +2333,8 @@ fn parse_mssql_varbinary_max_length() { name: Ident::new("var_binary_col"), data_type: Varbinary(Some(BinaryLength::IntegerLength { length: 50 })), - options: vec![] + options: vec![], + leading_comment: None, },], ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 303813608..543a494fd 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -650,6 +650,7 @@ fn parse_create_table_auto_increment() { )]), }, ], + leading_comment: None, }], columns ); @@ -755,6 +756,7 @@ fn parse_create_table_primary_and_unique_key() { ]), }, ], + leading_comment: None, }, ColumnDef { name: Ident::new("bar"), @@ -763,6 +765,7 @@ fn parse_create_table_primary_and_unique_key() { name: None, option: ColumnOption::NotNull, },], + leading_comment: None, }, ], columns @@ -1181,6 +1184,7 @@ fn parse_create_table_set_enum() { name: Ident::new("bar"), data_type: DataType::Set(vec!["a".to_string(), "b".to_string()]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("baz"), @@ -1192,6 +1196,7 @@ fn parse_create_table_set_enum() { None ), options: vec![], + leading_comment: None, } ], columns @@ -1217,6 +1222,7 @@ fn parse_create_table_engine_default_charset() { name: Ident::new("id"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, },], columns ); @@ -1259,6 +1265,7 @@ fn parse_create_table_collate() { name: Ident::new("id"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, },], columns ); @@ -1339,6 +1346,7 @@ fn parse_create_table_comment_character_set() { option: ColumnOption::Comment("comment".to_string()) } ], + leading_comment: None, },], columns ); @@ -1387,6 +1395,7 @@ fn parse_quote_identifiers() { characteristics: None }, }], + leading_comment: None, }], columns ); @@ -1644,26 +1653,31 @@ fn parse_create_table_with_minimum_display_width() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyInt(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallInt(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumInt(Some(6)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigInt(Some(20)), options: vec![], + leading_comment: None, } ], columns @@ -1685,26 +1699,31 @@ fn parse_create_table_unsigned() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyIntUnsigned(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallIntUnsigned(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumIntUnsigned(Some(13)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::IntUnsigned(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigIntUnsigned(Some(20)), options: vec![], + leading_comment: None, }, ], columns @@ -1727,26 +1746,31 @@ fn parse_signed_data_types() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyInt(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallInt(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumInt(Some(13)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigInt(Some(20)), options: vec![], + leading_comment: None, }, ], columns @@ -1771,11 +1795,13 @@ fn parse_deprecated_mysql_unsigned_data_types() { name: Ident::new("bar_decimal"), data_type: DataType::DecimalUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_decimal_prec"), data_type: DataType::DecimalUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_decimal_scale"), @@ -1783,31 +1809,37 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec"), data_type: DataType::DecUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec_prec"), data_type: DataType::DecUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec_scale"), data_type: DataType::DecUnsigned(ExactNumberInfo::PrecisionAndScale(10, 2)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float"), data_type: DataType::FloatUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float_prec"), data_type: DataType::FloatUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float_scale"), @@ -1815,16 +1847,19 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double"), data_type: DataType::DoubleUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_prec"), data_type: DataType::DoubleUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_scale"), @@ -1832,16 +1867,19 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_real"), data_type: DataType::RealUnsigned, options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_precision"), data_type: DataType::DoublePrecisionUnsigned, options: vec![], + leading_comment: None, }, ], columns @@ -2753,6 +2791,7 @@ fn parse_alter_table_add_column() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::First), },] @@ -2781,6 +2820,7 @@ fn parse_alter_table_add_column() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::After(Ident { value: String::from("foo"), @@ -2819,6 +2859,7 @@ fn parse_alter_table_add_columns() { name: "a".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::First), }, @@ -2829,6 +2870,7 @@ fn parse_alter_table_add_columns() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::After(Ident { value: String::from("foo"), @@ -3321,6 +3363,7 @@ fn parse_table_column_option_on_update() { name: None, option: ColumnOption::OnUpdate(call("CURRENT_TIMESTAMP", [])), },], + leading_comment: None, }], columns ); @@ -3623,6 +3666,7 @@ fn parse_create_table_with_column_collate() { )])) } ], + leading_comment: None, },], columns ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9d08540ad..fb4e84400 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -370,6 +370,7 @@ fn parse_create_table_with_defaults() { pg().verified_expr("nextval(public.customer_customer_id_seq)") ) }], + leading_comment: None, }, ColumnDef { name: "store_id".into(), @@ -378,6 +379,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "first_name".into(), @@ -391,6 +393,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "last_name".into(), @@ -412,6 +415,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "email".into(), @@ -422,6 +426,7 @@ fn parse_create_table_with_defaults() { } )), options: vec![], + leading_comment: None, }, ColumnDef { name: "address_id".into(), @@ -430,6 +435,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], + leading_comment: None, }, ColumnDef { name: "activebool".into(), @@ -446,6 +452,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "create_date".into(), @@ -460,6 +467,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "last_update".into(), @@ -474,6 +482,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "active".into(), @@ -482,6 +491,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], + leading_comment: None, }, ] ); @@ -849,6 +859,7 @@ fn parse_alter_table_add_columns() { name: "a".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, column_position: None, }, @@ -859,6 +870,7 @@ fn parse_alter_table_add_columns() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: None, }, @@ -5019,32 +5031,38 @@ fn parse_create_table_with_alias() { ColumnDef { name: "int8_col".into(), data_type: DataType::Int8(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "int4_col".into(), data_type: DataType::Int4(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "int2_col".into(), data_type: DataType::Int2(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "float8_col".into(), data_type: DataType::Float8, - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "float4_col".into(), data_type: DataType::Float4, - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "bool_col".into(), data_type: DataType::Bool, - options: vec![] + options: vec![], + leading_comment: None, }, ] ); @@ -5065,12 +5083,14 @@ fn parse_create_table_with_partition_by() { ColumnDef { name: "a".into(), data_type: DataType::Int(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "b".into(), data_type: DataType::Text, - options: vec![] + options: vec![], + leading_comment: None, } ], create_table.columns @@ -5987,21 +6007,25 @@ fn parse_trigger_related_functions() { name: "empname".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, ColumnDef { name: "salary".into(), data_type: DataType::Integer(None), options: vec![], + leading_comment: None, }, ColumnDef { name: "last_date".into(), data_type: DataType::Timestamp(None, TimezoneInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: "last_user".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, ], constraints: vec![], @@ -6049,6 +6073,7 @@ fn parse_trigger_related_functions() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, } ); @@ -6350,11 +6375,13 @@ fn parse_varbit_datatype() { name: "x".into(), data_type: DataType::VarBit(None), options: vec![], + leading_comment: None, }, ColumnDef { name: "y".into(), data_type: DataType::VarBit(Some(42)), options: vec![], + leading_comment: None, } ] ); @@ -6400,6 +6427,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsVector, options: vec![], + leading_comment: None, }] ); } @@ -6414,6 +6442,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsQuery, options: vec![], + leading_comment: None, }] ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 889c1a5d3..97fa5e610 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -372,6 +372,7 @@ fn test_snowflake_create_table_column_comment() { name: None, option: ColumnOption::Comment("some comment".to_string()) }], + leading_comment: None, }], columns ) @@ -619,7 +620,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::Order), } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -637,7 +639,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::NoOrder), } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "c".into(), @@ -650,7 +653,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: None, } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "d".into(), @@ -674,7 +678,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::Order), } )) - }] + }], + leading_comment: None, }, ] ); @@ -697,7 +702,8 @@ fn test_snowflake_create_table_with_collated_column() { option: ColumnOption::Collation(ObjectName::from(vec![Ident::with_quote( '\'', "de_DE" )])), - }] + }], + leading_comment: None, },] ); } @@ -746,6 +752,7 @@ fn test_snowflake_create_table_with_columns_masking_policy() { } )) }], + leading_comment: None, },] ); } @@ -780,6 +787,7 @@ fn test_snowflake_create_table_with_columns_projection_policy() { } )) }], + leading_comment: None, },] ); } @@ -823,6 +831,7 @@ fn test_snowflake_create_table_with_columns_tags() { ] }), }], + leading_comment: None, },] ); } @@ -884,6 +893,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -922,6 +932,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], + leading_comment: None, }, ] ); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index f0d6d9b72..7743b0a55 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -229,6 +229,7 @@ fn parse_create_table_auto_increment() { )]), }, ], + leading_comment: None, }], columns ); @@ -255,6 +256,7 @@ fn parse_create_table_primary_key_asc_desc() { option: ColumnOption::DialectSpecific(vec![Token::make_keyword(kind)]), }, ], + leading_comment: None, }; let sql = "CREATE TABLE foo (bar INT PRIMARY KEY ASC)"; @@ -285,11 +287,13 @@ fn parse_create_sqlite_quote() { name: Ident::with_quote('"', "KEY"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::with_quote('[', "INDEX"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ], columns From 7df83c5ea679c481a930fd2d12000758cf0f0758 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Fri, 17 Oct 2025 22:03:06 +0800 Subject: [PATCH 05/11] updating testing and adding tests for tables and column def --- src/ast/helpers/stmt_create_table.rs | 6 +++ src/dialect/snowflake.rs | 7 +++- src/parser/mod.rs | 51 +++++++++++----------- tests/sqlparser_common.rs | 63 ++++++++++++++++++++++++++-- 4 files changed, 95 insertions(+), 32 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 45c9f1d81..ec0c63a11 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -50,6 +50,7 @@ use crate::tokenizer::Comment; /// name: Ident::new("c1"), /// data_type: DataType::Int(None), /// options: vec![], +/// leading_comment: None, /// }]); /// // You can access internal elements with ease /// assert!(builder.if_not_exists); @@ -434,6 +435,11 @@ impl CreateTableBuilder { self } + pub fn leading_comment(mut self, leading_comment: Option) -> Self { + self.leading_comment = leading_comment; + self + } + pub fn build(self) -> Statement { CreateTable { or_replace: self.or_replace, diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 825fd45f0..75f8f08d4 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -35,7 +35,7 @@ use crate::ast::{ use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; -use crate::tokenizer::Token; +use crate::tokenizer::{Comment, Token}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; #[cfg(not(feature = "std"))] @@ -210,6 +210,7 @@ impl Dialect for SnowflakeDialect { } fn parse_statement(&self, parser: &mut Parser) -> Option> { + let leading_comment = parser.parse_leading_comment(); if parser.parse_keyword(Keyword::BEGIN) { return Some(parser.parse_begin_exception_end()); } @@ -261,7 +262,7 @@ impl Dialect for SnowflakeDialect { return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, + or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser,leading_comment )); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); @@ -630,6 +631,7 @@ pub fn parse_create_table( iceberg: bool, dynamic: bool, parser: &mut Parser, + leading_comment: Option ) -> Result { let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = parser.parse_object_name(false)?; @@ -643,6 +645,7 @@ pub fn parse_create_table( .iceberg(iceberg) .global(global) .dynamic(dynamic) + .leading_comment(leading_comment) .hive_formats(Some(Default::default())); // Snowflake does not enforce order of the parameters in the statement. The parser needs to diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6edffb371..4b9282c82 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -529,6 +529,17 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses a single leading comment (if any) + pub fn parse_leading_comment(&mut self) -> Option { + if let Token::LeadingComment(ref comment) = self.peek_token_ref().token { + let comment = comment.clone(); + self.advance_token(); + Some(comment) + } else { + None + } + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { @@ -538,17 +549,9 @@ impl<'a> Parser<'a> { if let Some(statement) = self.dialect.parse_statement(self) { return statement; } - - let mut next_token = self.next_token(); - let leading_comment: Option = if let Token::LeadingComment(_) = &next_token.token { - let Token::LeadingComment(comment) = next_token.token else { - unreachable!() - }; - next_token = self.next_token(); - Some(comment) - } else { - None - }; + let leading_comment: Option = self.parse_leading_comment(); + let next_token = self.next_token(); + match &next_token.token { Token::Word(w) => match w.keyword { @@ -590,7 +593,7 @@ impl<'a> Parser<'a> { self.parse_detach_duckdb_database() } Keyword::MSCK => self.parse_msck(), - Keyword::CREATE => self.parse_create(), + Keyword::CREATE => self.parse_create(leading_comment), Keyword::CACHE => self.parse_cache_table(), Keyword::DROP => self.parse_drop(), Keyword::DISCARD => self.parse_discard(), @@ -4727,7 +4730,7 @@ impl<'a> Parser<'a> { } /// Parse a SQL CREATE statement - pub fn parse_create(&mut self) -> Result { + pub fn parse_create(&mut self, leading_comment: Option) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); @@ -4747,7 +4750,7 @@ impl<'a> Parser<'a> { && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); let create_view_params = self.parse_create_view_params()?; if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary, global, transient) + self.parse_create_table(or_replace, temporary, global, transient, leading_comment) } else if self.peek_keyword(Keyword::MATERIALIZED) || self.peek_keyword(Keyword::VIEW) || self.peek_keywords(&[Keyword::SECURE, Keyword::MATERIALIZED, Keyword::VIEW]) @@ -4757,7 +4760,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::POLICY) { self.parse_create_policy() } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) + self.parse_create_external_table(or_replace, leading_comment) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_alter, or_replace, temporary) } else if self.parse_keyword(Keyword::DOMAIN) { @@ -5806,6 +5809,7 @@ impl<'a> Parser<'a> { pub fn parse_create_external_table( &mut self, or_replace: bool, + leading_comment: Option ) -> Result { self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -5841,6 +5845,7 @@ impl<'a> Parser<'a> { .external(true) .file_format(file_format) .location(location) + .leading_comment(leading_comment) .build()) } @@ -7404,6 +7409,7 @@ impl<'a> Parser<'a> { temporary: bool, global: Option, transient: bool, + leading_comment: Option ) -> Result { let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7513,6 +7519,7 @@ impl<'a> Parser<'a> { .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) + .leading_comment(leading_comment) .build()) } @@ -7888,8 +7895,7 @@ impl<'a> Parser<'a> { } pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - //todo!("Add parsing for the Leading comment of the column, if any."); - + let mut columns = vec![]; let mut constraints = vec![]; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { @@ -7953,16 +7959,7 @@ impl<'a> Parser<'a> { pub fn parse_column_def(&mut self) -> Result { - let mut next_token = self.next_token(); - let leading_comment: Option = if let Token::LeadingComment(_) = &next_token.token { - let Token::LeadingComment(comment) = next_token.token else { - unreachable!() - }; - next_token = self.next_token(); - Some(comment) - } else { - None - }; + let leading_comment: Option = self.parse_leading_comment(); let col_name = self.parse_identifier()?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 89f6b0cb5..32405cb71 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -39,7 +39,7 @@ use sqlparser::dialect::{ }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::Tokenizer; +use sqlparser::tokenizer::{Comment, Tokenizer}; use sqlparser::tokenizer::{Location, Span}; use test_utils::{ all_dialects, all_dialects_where, all_dialects_with_options, alter_table_op, assert_eq_vec, @@ -52,7 +52,6 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; -use sqlparser::ast::ColumnOption::Comment; use sqlparser::ast::DateTimeField::Seconds; use sqlparser::ast::Expr::{Identifier, UnaryOp}; use sqlparser::ast::Value::Number; @@ -4282,6 +4281,63 @@ fn parse_create_table_with_multiple_on_delete_fails() { .expect_err("should have failed"); } +#[test] +fn parse_create_table_with_leading_comment() { + let single_line_sql = r#"-- a single line leading comment + CREATE TABLE user ( + -- a column single line comment + id int PRIMARY KEY +)"#; + let single_line_ast = one_statement_parses_to(single_line_sql, ""); + match single_line_ast { + Statement::CreateTable ( + CreateTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + columns , + .. + }, + ) => { + assert_eq!(comment, " a single line leading comment\n"); + assert_eq!(prefix, "--"); + let [ColumnDef{ + leading_comment: Some(Comment::SingleLineComment {comment, prefix}), + .. + }] = columns.as_slice() else { unreachable!("unexpected column array: {columns:?}")}; + assert_eq!(comment, " a column single line comment\n"); + assert_eq!(prefix, "--"); + } + _ => unreachable!(), + }; + let multi_line_sql = r#"/* a multi line + leading comment */ + CREATE TABLE user ( + /* a column multiline + comment */ + id int PRIMARY KEY +)"#; + let multi_line_ast = one_statement_parses_to(multi_line_sql, ""); + match multi_line_ast { + Statement::CreateTable( + CreateTable { + leading_comment: Some(Comment::MultiLineComment(comment)), + columns, + .. + } + ) => { + assert_eq!(comment," a multi line\n leading comment "); + let [ColumnDef{ + leading_comment: Some(Comment::MultiLineComment(comment)), + .. + }] = columns.as_slice() else { unreachable!("unexpected column array: {columns:?}")}; + assert_eq!(comment," a column multiline\n comment"); + } + _ => unreachable!(), + }; + +} + + + #[test] fn parse_assert() { let sql = "ASSERT (SELECT COUNT(*) FROM my_table) > 0"; @@ -12193,7 +12249,7 @@ fn test_parse_inline_comment() { data_type: DataType::Int(None), options: vec![ColumnOptionDef { name: None, - option: Comment("comment without equal".to_string()), + option: ColumnOption::Comment("comment without equal".to_string()), }], leading_comment: None, }] @@ -14992,6 +15048,7 @@ fn parse_overlaps() { verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')"); } + #[test] fn parse_column_definition_trailing_commas() { let dialects = all_dialects_where(|d| d.supports_column_definition_trailing_commas()); From 465ef3a96e7b3ccfb6ce5362ab3e877b21d2c970 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Mon, 20 Oct 2025 20:49:41 +0800 Subject: [PATCH 06/11] implemented nontrivial token tracking for tokenization for leading comment evaluation --- src/ast/spans.rs | 1 + src/tokenizer.rs | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 1c149f550..75817f5b4 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2528,6 +2528,7 @@ ALTER TABLE users varchar; -- hi there"#; let r = Parser::parse_sql(&crate::dialect::PostgreSqlDialect {}, sql).unwrap(); + assert_eq!(1, r.len()); let stmt_span = r[0].span(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fe7ff0763..76697490f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -282,8 +282,8 @@ pub enum Token { } /// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. -fn dispatch_comment_kind(prev_token: Option<&Token>, comment: Comment) -> Token { - match prev_token { +fn dispatch_comment_kind(prev_nontrivial_token: Option, comment: Comment) -> Token { + match prev_nontrivial_token { None | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), _ => Token::Whitespace(comment.into()), } @@ -942,10 +942,17 @@ impl<'a> Tokenizer<'a> { col: 1, }; + let mut prev_nontrivial_token: Option= None; + let mut location = state.location(); - while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { + while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token),prev_nontrivial_token.clone())? { let span = location.span_to(state.location()); + if !matches!(token, Token::Whitespace(_)) { + let token = token.clone(); + prev_nontrivial_token = Some(token.to_owned()); + } + buf.push(TokenWithSpan { token, span }); location = state.location(); @@ -984,6 +991,7 @@ impl<'a> Tokenizer<'a> { &self, chars: &mut State, prev_token: Option<&Token>, + prev_nontrivial_token: Option, ) -> Result, TokenizerError> { match chars.peek() { Some(&ch) => match ch { @@ -1378,7 +1386,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); return Ok(Some(dispatch_comment_kind( - prev_token, + prev_nontrivial_token, Comment::SingleLineComment { prefix: "--".to_owned(), comment, @@ -1406,13 +1414,13 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '*', starting a multi-line comment Ok(self .tokenize_multiline_comment(chars)? - .map(|comment| dispatch_comment_kind(prev_token, comment))) + .map(|comment| dispatch_comment_kind(prev_nontrivial_token, comment))) } Some('/') if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); Ok(Some(dispatch_comment_kind( - prev_token, + prev_nontrivial_token, Comment::SingleLineComment { prefix: "//".to_owned(), comment, @@ -1621,7 +1629,7 @@ impl<'a> Tokenizer<'a> { let comment = self.tokenize_single_line_comment(chars); Ok(Some(dispatch_comment_kind( - prev_token, + prev_nontrivial_token, Comment::SingleLineComment { prefix: "#".to_owned(), comment, @@ -3340,7 +3348,7 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Comment::MultiLineComment("* Comment *".to_string()).into()), + Token::LeadingComment(Comment::MultiLineComment("* Comment *".to_string()).into()), Token::Whitespace(Whitespace::Newline), ]; compare(expected, tokens); From dd078273f1c89d82888160476c86fb7e3eb9e586 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Mon, 20 Oct 2025 21:32:32 +0800 Subject: [PATCH 07/11] working on trailing token being detected as leading token. --- src/ast/spans.rs | 5 +++-- src/parser/mod.rs | 1 - src/tokenizer.rs | 19 ++++++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 75817f5b4..64bbe2f41 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2527,8 +2527,9 @@ ALTER TABLE users ADD COLUMN foo varchar; -- hi there"#; - let r = Parser::parse_sql(&crate::dialect::PostgreSqlDialect {}, sql).unwrap(); - + let r = Parser::parse_sql(&crate::dialect::PostgreSqlDialect {}, sql); + dbg!(&r); + let r = r.unwrap(); assert_eq!(1, r.len()); let stmt_span = r[0].span(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4b9282c82..6968923aa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -551,7 +551,6 @@ impl<'a> Parser<'a> { } let leading_comment: Option = self.parse_leading_comment(); let next_token = self.next_token(); - match &next_token.token { Token::Word(w) => match w.keyword { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 76697490f..6226c9c02 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1385,13 +1385,18 @@ impl<'a> Tokenizer<'a> { if is_comment { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); - return Ok(Some(dispatch_comment_kind( - prev_nontrivial_token, - Comment::SingleLineComment { - prefix: "--".to_owned(), - comment, - }, - ))); + if chars.peek() == None { + return Ok(Some(Token::Whitespace(Whitespace::InterstitialComment(Comment::SingleLineComment { comment, prefix: "--".to_owned() })))); + } else { + return Ok(Some(dispatch_comment_kind( + prev_nontrivial_token, + Comment::SingleLineComment { + prefix: "--".to_owned(), + comment, + }, + ))); + } + } self.start_binop(chars, "-", Token::Minus) From a3a347f51621917b61eff26a7eaf4865d31cd733 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Tue, 21 Oct 2025 09:59:15 +0800 Subject: [PATCH 08/11] updated tokenize_leading_inline_commment_at_eof --- src/tokenizer.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 6226c9c02..3408254f7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -284,7 +284,7 @@ pub enum Token { /// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. fn dispatch_comment_kind(prev_nontrivial_token: Option, comment: Comment) -> Token { match prev_nontrivial_token { - None | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), + None | Some(Token::LParen) | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), _ => Token::Whitespace(comment.into()), } } @@ -3224,11 +3224,11 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::LeadingComment( - Comment::SingleLineComment { + let expected = vec![Token::Whitespace( + Whitespace::InterstitialComment(Comment::SingleLineComment{ prefix: "--".to_string(), comment: "this is a comment".to_string(), - } + }) .into(), )]; compare(expected, tokens); From 126bbc0ccfd8d69ff79e4cae1d7ac1193646d74f Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Tue, 21 Oct 2025 21:58:25 +0800 Subject: [PATCH 09/11] propagated changes to leading comment and updated tests --- src/ast/ddl.rs | 8 ++++- src/ast/spans.rs | 4 +-- src/dialect/snowflake.rs | 2 +- src/parser/mod.rs | 28 ++++++--------- src/tokenizer.rs | 31 ++++++++-------- tests/sqlparser_common.rs | 76 +++++++++++++++++++++++++++++++-------- 6 files changed, 98 insertions(+), 51 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 71e5d9782..c37afcbdd 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1208,6 +1208,9 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(leading_comment) = &self.leading_comment { + write!(f,"{leading_comment}")?; + } if self.data_type == DataType::Unspecified { write!(f, "{}", self.name)?; } else { @@ -2301,6 +2304,9 @@ impl fmt::Display for CreateTable { // `CREATE TABLE t AS SELECT a from t2` // Columns provided for CREATE TABLE AS: // `CREATE TABLE t (a INT) AS SELECT a from t2` + if let Some(leading_comment) = &self.leading_comment { + write!(f,"{leading_comment}")?; + } write!( f, "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}TABLE {if_not_exists}{name}", @@ -3526,7 +3532,7 @@ pub struct AlterTable { impl fmt::Display for AlterTable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(comment) = &self.leading_comment { - write!(f, "{comment}\n")?; + write!(f, "{comment}")?; } if self.iceberg { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 64bbe2f41..1c149f550 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2527,9 +2527,7 @@ ALTER TABLE users ADD COLUMN foo varchar; -- hi there"#; - let r = Parser::parse_sql(&crate::dialect::PostgreSqlDialect {}, sql); - dbg!(&r); - let r = r.unwrap(); + let r = Parser::parse_sql(&crate::dialect::PostgreSqlDialect {}, sql).unwrap(); assert_eq!(1, r.len()); let stmt_span = r[0].span(); diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 75f8f08d4..6bc9ce25b 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -262,7 +262,7 @@ impl Dialect for SnowflakeDialect { return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser,leading_comment + or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, leading_comment )); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6968923aa..dad2c2804 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -549,7 +549,9 @@ impl<'a> Parser<'a> { if let Some(statement) = self.dialect.parse_statement(self) { return statement; } + let leading_comment: Option = self.parse_leading_comment(); + let next_token = self.next_token(); match &next_token.token { @@ -673,7 +675,7 @@ impl<'a> Parser<'a> { Token::LParen => { self.prev_token(); self.parse_query().map(Statement::Query) - } + }, _ => self.expected("an SQL statement", next_token), } } @@ -7241,10 +7243,7 @@ impl<'a> Parser<'a> { //TODO: Implement parsing for Skewed pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) + Ok(HiveDistributionStyle::PARTITIONED { columns: self.parse_returns_table_columns()? }) } else { Ok(HiveDistributionStyle::NONE) } @@ -7902,10 +7901,11 @@ impl<'a> Parser<'a> { } loop { + let leading_comment: Option = self.parse_leading_comment(); if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); } else if let Token::Word(_) = self.peek_token().token { - columns.push(self.parse_column_def()?); + columns.push(self.parse_column_def(leading_comment)?); } else { return self.expected("column name or constraint definition", self.peek_token()); } @@ -7956,11 +7956,7 @@ impl<'a> Parser<'a> { }) } - pub fn parse_column_def(&mut self) -> Result { - - let leading_comment: Option = self.parse_leading_comment(); - - + pub fn parse_column_def(&mut self, leading_comment: Option) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified @@ -8935,7 +8931,7 @@ impl<'a> Parser<'a> { false }; - let column_def = self.parse_column_def()?; + let column_def = self.parse_column_def(None)?; let column_position = self.parse_column_position()?; @@ -10646,10 +10642,7 @@ impl<'a> Parser<'a> { )) } Keyword::NESTED if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(DataType::Nested(field_defs)) + Ok(DataType::Nested(self.parse_returns_table_columns()?)) } Keyword::TUPLE if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { self.prev_token(); @@ -10716,7 +10709,8 @@ impl<'a> Parser<'a> { } fn parse_returns_table_column(&mut self) -> Result { - self.parse_column_def() + let leading_comment: Option = self.parse_leading_comment(); + self.parse_column_def(leading_comment) } fn parse_returns_table_columns(&mut self) -> Result, ParserError> { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3408254f7..4294c1a24 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -282,7 +282,10 @@ pub enum Token { } /// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. -fn dispatch_comment_kind(prev_nontrivial_token: Option, comment: Comment) -> Token { +fn dispatch_comment_kind(prev_nontrivial_token: Option, comment: Comment, peeked_char: Option<&char>) -> Token { + if peeked_char.is_none() { + return Token::Whitespace(comment.into()); + } match prev_nontrivial_token { None | Some(Token::LParen) | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), _ => Token::Whitespace(comment.into()), @@ -1385,18 +1388,14 @@ impl<'a> Tokenizer<'a> { if is_comment { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); - if chars.peek() == None { - return Ok(Some(Token::Whitespace(Whitespace::InterstitialComment(Comment::SingleLineComment { comment, prefix: "--".to_owned() })))); - } else { - return Ok(Some(dispatch_comment_kind( + return Ok(Some(dispatch_comment_kind( prev_nontrivial_token, Comment::SingleLineComment { prefix: "--".to_owned(), comment, }, + chars.peek() ))); - } - } self.start_binop(chars, "-", Token::Minus) @@ -1419,7 +1418,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '*', starting a multi-line comment Ok(self .tokenize_multiline_comment(chars)? - .map(|comment| dispatch_comment_kind(prev_nontrivial_token, comment))) + .map(|comment| dispatch_comment_kind(prev_nontrivial_token, comment, chars.peek()))) } Some('/') if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment @@ -1430,6 +1429,7 @@ impl<'a> Tokenizer<'a> { prefix: "//".to_owned(), comment, }, + chars.peek() ))) } Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { @@ -1639,6 +1639,7 @@ impl<'a> Tokenizer<'a> { prefix: "#".to_owned(), comment, }, + chars.peek() ))) } '~' => { @@ -3224,13 +3225,13 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::Whitespace( - Whitespace::InterstitialComment(Comment::SingleLineComment{ + let expected = vec![Token::Whitespace(Whitespace::InterstitialComment( + Comment::SingleLineComment{ prefix: "--".to_string(), comment: "this is a comment".to_string(), - }) - .into(), - )]; + })), + ]; + compare(expected, tokens); } @@ -3240,9 +3241,9 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::LeadingComment(Comment::MultiLineComment( + let expected = vec![Token::Whitespace(Whitespace::InterstitialComment(Comment::MultiLineComment( " this is a comment ".to_string(), - ))]; + )))]; compare(expected, tokens); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 32405cb71..97c0ebc8e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4284,11 +4284,9 @@ fn parse_create_table_with_multiple_on_delete_fails() { #[test] fn parse_create_table_with_leading_comment() { let single_line_sql = r#"-- a single line leading comment - CREATE TABLE user ( - -- a column single line comment - id int PRIMARY KEY -)"#; - let single_line_ast = one_statement_parses_to(single_line_sql, ""); +CREATE TABLE user (-- a column single line comment +id INT PRIMARY KEY)"#; + let single_line_ast = verified_stmt(single_line_sql); match single_line_ast { Statement::CreateTable ( CreateTable { @@ -4309,13 +4307,9 @@ fn parse_create_table_with_leading_comment() { _ => unreachable!(), }; let multi_line_sql = r#"/* a multi line - leading comment */ - CREATE TABLE user ( - /* a column multiline - comment */ - id int PRIMARY KEY -)"#; - let multi_line_ast = one_statement_parses_to(multi_line_sql, ""); +leading comment */CREATE TABLE user (/* a column multiline +comment */id INT PRIMARY KEY)"#; + let multi_line_ast = verified_stmt(multi_line_sql); match multi_line_ast { Statement::CreateTable( CreateTable { @@ -4324,19 +4318,73 @@ fn parse_create_table_with_leading_comment() { .. } ) => { - assert_eq!(comment," a multi line\n leading comment "); + assert_eq!(comment," a multi line\nleading comment "); let [ColumnDef{ leading_comment: Some(Comment::MultiLineComment(comment)), .. }] = columns.as_slice() else { unreachable!("unexpected column array: {columns:?}")}; - assert_eq!(comment," a column multiline\n comment"); + assert_eq!(comment," a column multiline\ncomment "); } _ => unreachable!(), }; } +#[test] +fn parse_alter_table_with_leading_comment() { + let single_line_sql = r#"-- a single line leading comment +ALTER TABLE user (ADD COLUMN -- a column single line comment +id INT PRIMARY KEY)"#; + let single_line_ast = verified_stmt(single_line_sql); + match single_line_ast { + Statement::AlterTable ( + AlterTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + operations , + .. + }, + ) => { + assert_eq!(comment, " a single line leading comment\n"); + assert_eq!(prefix, "--"); + let [AlterTableOperation::AddColumn { + column_def: ColumnDef{ + leading_comment: Some(Comment::SingleLineComment {comment, prefix}), + .. + }, + .. + }] = operations.as_slice() else { unreachable!("unexpected operation array: {operations:?}")}; + assert_eq!(comment, " a column single line comment\n"); + assert_eq!(prefix, "--"); + } + _ => unreachable!(), + }; + let multi_line_sql = r#"/* a multi line +leading comment */ALTER TABLE user (ADD COLUMN /* a column multiline +comment */id INT PRIMARY KEY)"#; + let multi_line_ast = verified_stmt(multi_line_sql); + match multi_line_ast { + Statement::AlterTable( + AlterTable { + leading_comment: Some(Comment::MultiLineComment(comment)), + operations, + .. + }, + .. + ) => { + assert_eq!(comment," a multi line\nleading comment "); + let [AlterTableOperation::AddColumn{ + column_def: ColumnDef { + leading_comment: Some(Comment::MultiLineComment(comment)), + .. + }, + .. + }] = operations.as_slice() else { unreachable!("unexpected operation array: {operations:?}")}; + assert_eq!(comment," a column multiline\ncomment "); + } + _ => unreachable!(), + }; +} #[test] fn parse_assert() { From 4e75ba78d06b5621c117905b38600753373b6dac Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Wed, 22 Oct 2025 14:28:22 +0800 Subject: [PATCH 10/11] updated parse_alter_table_with_leading_comment test sql syntax --- tests/sqlparser_common.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 97c0ebc8e..0f649337a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4333,8 +4333,8 @@ comment */id INT PRIMARY KEY)"#; #[test] fn parse_alter_table_with_leading_comment() { let single_line_sql = r#"-- a single line leading comment -ALTER TABLE user (ADD COLUMN -- a column single line comment -id INT PRIMARY KEY)"#; +ALTER TABLE user ADD COLUMN -- a column single line comment +id INT PRIMARY KEY"#; let single_line_ast = verified_stmt(single_line_sql); match single_line_ast { Statement::AlterTable ( @@ -4359,8 +4359,8 @@ id INT PRIMARY KEY)"#; _ => unreachable!(), }; let multi_line_sql = r#"/* a multi line -leading comment */ALTER TABLE user (ADD COLUMN /* a column multiline -comment */id INT PRIMARY KEY)"#; +leading comment */ALTER TABLE user ADD COLUMN /* a column multiline +comment */id INT PRIMARY KEY"#; let multi_line_ast = verified_stmt(multi_line_sql); match multi_line_ast { Statement::AlterTable( From 3c2eb51eb9bdefc0852cd687ca5a3ea556ae9286 Mon Sep 17 00:00:00 2001 From: RPG-Alex Date: Wed, 22 Oct 2025 21:49:33 +0800 Subject: [PATCH 11/11] whitespace tokens are now being filtered in tokenizer --- src/ast/ddl.rs | 4 +- src/ast/spans.rs | 2 +- src/dialect/snowflake.rs | 16 +- src/parser/mod.rs | 63 +++--- src/tokenizer.rs | 389 +++++------------------------------ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 134 +++++------- tests/sqlparser_postgres.rs | 56 ++--- tests/sqlparser_snowflake.rs | 44 ++-- tests/sqlparser_sqlite.rs | 8 +- 10 files changed, 211 insertions(+), 506 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index c37afcbdd..be83089ad 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1209,7 +1209,7 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(leading_comment) = &self.leading_comment { - write!(f,"{leading_comment}")?; + write!(f, "{leading_comment}")?; } if self.data_type == DataType::Unspecified { write!(f, "{}", self.name)?; @@ -2305,7 +2305,7 @@ impl fmt::Display for CreateTable { // Columns provided for CREATE TABLE AS: // `CREATE TABLE t (a INT) AS SELECT a from t2` if let Some(leading_comment) = &self.leading_comment { - write!(f,"{leading_comment}")?; + write!(f, "{leading_comment}")?; } write!( f, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 1c149f550..0a140701e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -553,7 +553,7 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, - leading_comment: _, // Option + leading_comment: _, // Option } = self; union_spans( diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 6bc9ce25b..8fb7644b1 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -262,7 +262,15 @@ impl Dialect for SnowflakeDialect { return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, leading_comment + or_replace, + global, + temporary, + volatile, + transient, + iceberg, + dynamic, + parser, + leading_comment, )); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); @@ -306,7 +314,9 @@ impl Dialect for SnowflakeDialect { //Give back Keyword::SHOW parser.prev_token(); } - + if leading_comment.is_some() { + parser.prev_token(); + } None } @@ -631,7 +641,7 @@ pub fn parse_create_table( iceberg: bool, dynamic: bool, parser: &mut Parser, - leading_comment: Option + leading_comment: Option, ) -> Result { let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = parser.parse_object_name(false)?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dad2c2804..99b9dfec7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -532,7 +532,7 @@ impl<'a> Parser<'a> { /// Parses a single leading comment (if any) pub fn parse_leading_comment(&mut self) -> Option { if let Token::LeadingComment(ref comment) = self.peek_token_ref().token { - let comment = comment.clone(); + let comment = comment.clone(); self.advance_token(); Some(comment) } else { @@ -675,7 +675,7 @@ impl<'a> Parser<'a> { Token::LParen => { self.prev_token(); self.parse_query().map(Statement::Query) - }, + } _ => self.expected("an SQL statement", next_token), } } @@ -4161,16 +4161,7 @@ impl<'a> Parser<'a> { /// /// See [`Self::get_current_token`] to get the current token after advancing pub fn advance_token(&mut self) { - loop { - self.index += 1; - match self.tokens.get(self.index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - _ => break, - } - } + self.index += 1; } /// Returns a reference to the current token @@ -4201,18 +4192,8 @@ impl<'a> Parser<'a> { /// // TODO rename to backup_token and deprecate prev_token? pub fn prev_token(&mut self) { - loop { - assert!(self.index > 0); - self.index -= 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = self.tokens.get(self.index) - { - continue; - } - return; - } + assert!(self.index > 0); + self.index -= 1; } /// Report `found` was encountered instead of `expected` @@ -4731,7 +4712,10 @@ impl<'a> Parser<'a> { } /// Parse a SQL CREATE statement - pub fn parse_create(&mut self, leading_comment: Option) -> Result { + pub fn parse_create( + &mut self, + leading_comment: Option, + ) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); @@ -5810,7 +5794,7 @@ impl<'a> Parser<'a> { pub fn parse_create_external_table( &mut self, or_replace: bool, - leading_comment: Option + leading_comment: Option, ) -> Result { self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7243,7 +7227,9 @@ impl<'a> Parser<'a> { //TODO: Implement parsing for Skewed pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - Ok(HiveDistributionStyle::PARTITIONED { columns: self.parse_returns_table_columns()? }) + Ok(HiveDistributionStyle::PARTITIONED { + columns: self.parse_returns_table_columns()?, + }) } else { Ok(HiveDistributionStyle::NONE) } @@ -7407,7 +7393,7 @@ impl<'a> Parser<'a> { temporary: bool, global: Option, transient: bool, - leading_comment: Option + leading_comment: Option, ) -> Result { let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7893,7 +7879,6 @@ impl<'a> Parser<'a> { } pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - let mut columns = vec![]; let mut constraints = vec![]; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { @@ -7956,7 +7941,10 @@ impl<'a> Parser<'a> { }) } - pub fn parse_column_def(&mut self, leading_comment: Option) -> Result { + pub fn parse_column_def( + &mut self, + leading_comment: Option, + ) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified @@ -7964,7 +7952,7 @@ impl<'a> Parser<'a> { self.parse_data_type()? }; let mut options = vec![]; - + loop { if self.parse_keyword(Keyword::CONSTRAINT) { let name = Some(self.parse_identifier()?); @@ -9375,7 +9363,10 @@ impl<'a> Parser<'a> { } } - pub fn parse_alter(&mut self, leading_comment: Option) -> Result { + pub fn parse_alter( + &mut self, + leading_comment: Option, + ) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, Keyword::TYPE, @@ -9429,7 +9420,11 @@ impl<'a> Parser<'a> { } /// Parse a [Statement::AlterTable] - pub fn parse_alter_table(&mut self, leading_comment: Option, iceberg: bool) -> Result { + pub fn parse_alter_table( + &mut self, + leading_comment: Option, + iceberg: bool, + ) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name(false)?; @@ -9464,7 +9459,7 @@ impl<'a> Parser<'a> { on_cluster, iceberg, end_token: AttachedToken(end_token), - leading_comment + leading_comment, } .into()) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4294c1a24..b96ce9019 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -282,12 +282,18 @@ pub enum Token { } /// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. -fn dispatch_comment_kind(prev_nontrivial_token: Option, comment: Comment, peeked_char: Option<&char>) -> Token { +fn dispatch_comment_kind( + prev_token: Option<&Token>, + comment: Comment, + peeked_char: Option<&char>, +) -> Token { if peeked_char.is_none() { return Token::Whitespace(comment.into()); } - match prev_nontrivial_token { - None | Some(Token::LParen) | Some(Token::Comma) | Some(Token::SemiColon) => Token::LeadingComment(comment), + match prev_token { + None | Some(Token::LParen) | Some(Token::Comma) | Some(Token::SemiColon) => { + Token::LeadingComment(comment) + } _ => Token::Whitespace(comment.into()), } } @@ -874,7 +880,6 @@ impl<'a> Tokenizer<'a> { /// /// assert_eq!(tokens, vec![ /// Token::make_word("SELECT", None), - /// Token::Whitespace(Whitespace::Space), /// Token::SingleQuotedString("foo".to_string()), /// ]); pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { @@ -944,20 +949,23 @@ impl<'a> Tokenizer<'a> { line: 1, col: 1, }; - - let mut prev_nontrivial_token: Option= None; - + let mut last_char_was_word: bool = false; let mut location = state.location(); - while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token),prev_nontrivial_token.clone())? { + while let Some(token) = + self.next_token(&mut state, buf.last().map(|t| &t.token), last_char_was_word)? + { + last_char_was_word = matches!(token, Token::Word(_)); let span = location.span_to(state.location()); - - if !matches!(token, Token::Whitespace(_)) { - let token = token.clone(); - prev_nontrivial_token = Some(token.to_owned()); + if matches!(token, Token::Whitespace(_)) { + if matches!(buf.last().map(|t| &t.token), Some(Token::Colon)) { + return self.tokenizer_error( + state.location(), + "Unexpected whitespace after ':'".to_string(), + ); + } + } else { + buf.push(TokenWithSpan { token, span }); } - - buf.push(TokenWithSpan { token, span }); - location = state.location(); } Ok(()) @@ -994,7 +1002,7 @@ impl<'a> Tokenizer<'a> { &self, chars: &mut State, prev_token: Option<&Token>, - prev_nontrivial_token: Option, + last_char_was_word: bool, ) -> Result, TokenizerError> { match chars.peek() { Some(&ch) => match ch { @@ -1252,7 +1260,7 @@ impl<'a> Tokenizer<'a> { // if the prev token is not a word, then this is not a valid sql // word or number. if ch == '.' && chars.peekable.clone().nth(1) == Some('_') { - if let Some(Token::Word(_)) = prev_token { + if last_char_was_word { chars.next(); return Ok(Some(Token::Period)); } @@ -1296,7 +1304,7 @@ impl<'a> Tokenizer<'a> { // we should yield the dot as a dedicated token so compound identifiers // starting with digits can be parsed correctly. if s == "." && self.dialect.supports_numeric_prefix() { - if let Some(Token::Word(_)) = prev_token { + if last_char_was_word { return Ok(Some(Token::Period)); } } @@ -1389,13 +1397,13 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); return Ok(Some(dispatch_comment_kind( - prev_nontrivial_token, - Comment::SingleLineComment { - prefix: "--".to_owned(), - comment, - }, - chars.peek() - ))); + prev_token, + Comment::SingleLineComment { + prefix: "--".to_owned(), + comment, + }, + chars.peek(), + ))); } self.start_binop(chars, "-", Token::Minus) @@ -1416,20 +1424,20 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('*') => { chars.next(); // consume the '*', starting a multi-line comment - Ok(self - .tokenize_multiline_comment(chars)? - .map(|comment| dispatch_comment_kind(prev_nontrivial_token, comment, chars.peek()))) + Ok(self.tokenize_multiline_comment(chars)?.map(|comment| { + dispatch_comment_kind(prev_token, comment, chars.peek()) + })) } Some('/') if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); Ok(Some(dispatch_comment_kind( - prev_nontrivial_token, + prev_token, Comment::SingleLineComment { prefix: "//".to_owned(), comment, }, - chars.peek() + chars.peek(), ))) } Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { @@ -1634,12 +1642,12 @@ impl<'a> Tokenizer<'a> { let comment = self.tokenize_single_line_comment(chars); Ok(Some(dispatch_comment_kind( - prev_nontrivial_token, + prev_token, Comment::SingleLineComment { prefix: "#".to_owned(), comment, }, - chars.peek() + chars.peek(), ))) } '~' => { @@ -2513,7 +2521,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2528,7 +2535,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from(".1"), false), ]; @@ -2544,7 +2550,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Word(Word { value: "foo".to_string(), quote_style: None, @@ -2565,7 +2570,6 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("10".to_string(), false), Token::make_word("_000", None), ]; @@ -2575,17 +2579,13 @@ mod tests { "SELECT 10_000, _10_000, 10_00_, 10___0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("10_000".to_string(), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("_10_000", None), // leading underscore tokenizes as a word (parsed as column identifier) Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number("10_00".to_string(), false), Token::make_word("_", None), // trailing underscores tokenizes as a word (syntax error in some dialects) Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number("10".to_string(), false), Token::make_word("___0", None), // multiple underscores tokenizes as a word (syntax error in some dialects) ], @@ -2600,24 +2600,18 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e+10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), Token::make_word("ea", None), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::make_word("a", None), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::Minus, Token::Number(String::from("10"), false), @@ -2634,7 +2628,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, Token::Number(String::from("1"), false), @@ -2652,11 +2645,8 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("a")), - Token::Whitespace(Whitespace::Space), Token::StringConcat, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("b")), ]; @@ -2670,15 +2660,10 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("one", None), - Token::Whitespace(Whitespace::Space), Token::Pipe, - Token::Whitespace(Whitespace::Space), Token::make_word("two", None), - Token::Whitespace(Whitespace::Space), Token::Caret, - Token::Whitespace(Whitespace::Space), Token::make_word("three", None), ]; compare(expected, tokens); @@ -2693,32 +2678,20 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), ]; compare(expected, tokens); @@ -2732,23 +2705,14 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), - Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("5"), false), ]; @@ -2763,21 +2727,13 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2792,23 +2748,14 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("ANALYZE"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2823,19 +2770,12 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("salary", None), - Token::Whitespace(Whitespace::Space), Token::Neq, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("Not Provided")), ]; @@ -2849,11 +2789,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); - let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Char('💝'), - Token::make_word("مصطفىh", None), - ]; + let expected = vec![Token::Char('💝'), Token::make_word("مصطفىh", None)]; compare(expected, tokens); } @@ -2908,16 +2844,10 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::Newline), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("table"), - Token::Whitespace(Whitespace::Tab), Token::Char('💝'), Token::make_word("مصطفىh", None), ]; @@ -2931,7 +2861,6 @@ mod tests { String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(), tag: Some("tag".into()), @@ -2942,7 +2871,6 @@ mod tests { String::from("SELECT $abc$x$ab$abc$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "x$ab".into(), tag: Some("abc".into()), @@ -2953,7 +2881,6 @@ mod tests { String::from("SELECT $abc$$abc$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "".into(), tag: Some("abc".into()), @@ -3030,16 +2957,12 @@ mod tests { tokens, vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Placeholder("$$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$$ABC$$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$ABC$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$ABC".into()), ] ); @@ -3052,7 +2975,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "dollar $nested$ string".into(), tag: Some("tag".into()), @@ -3068,7 +2990,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "".into(), tag: None, @@ -3085,7 +3006,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "within dollar '$' quoted strings have $tags like this$ ".into(), tag: None, @@ -3136,9 +3056,7 @@ mod tests { let expected = vec![ Token::make_word("a", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("IS"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("NULL"), ]; @@ -3152,40 +3070,17 @@ mod tests { String::from("0--this is a comment\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\n".to_string(), - } - .into(), - ), Token::Number("1".to_string(), false), ], ), ( String::from("0--this is a comment\r1"), - vec![ - Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r1".to_string(), - } - .into(), - ), - ], + vec![Token::Number("0".to_string(), false)], ), ( String::from("0--this is a comment\r\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r\n".to_string(), - } - .into(), - ), Token::Number("1".to_string(), false), ], ), @@ -3207,13 +3102,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("1".to_string(), false), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "\r".to_string(), - } - .into(), - ), Token::Number("0".to_string(), false), ]; compare(expected, tokens); @@ -3225,14 +3113,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::InterstitialComment( - Comment::SingleLineComment{ - prefix: "--".to_string(), - comment: "this is a comment".to_string(), - })), - ]; - - compare(expected, tokens); + assert!(tokens.is_empty()); } #[test] @@ -3241,10 +3122,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::InterstitialComment(Comment::MultiLineComment( - " this is a comment ".to_string(), - )))]; - compare(expected, tokens); + assert!(tokens.is_empty()); } #[test] @@ -3255,9 +3133,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::MultiLineComment("multi-line\n* /comment".to_string()).into(), - ), Token::Number("1".to_string(), false), ]; compare(expected, tokens); @@ -3269,13 +3144,6 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), - ) - .into(), - ), - Token::Whitespace(Whitespace::Space), Token::Div, Token::Word(Word { value: "comment".to_string(), @@ -3292,12 +3160,6 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace( - Comment::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), - ) - .into(), - ), Token::Number("1".to_string(), false), ], ); @@ -3306,9 +3168,7 @@ mod tests { "SELECT 1/* a /* b */ c */0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Comment::MultiLineComment(" a /* b */ c ".to_string()).into()), Token::Number("0".to_string(), false), ], ); @@ -3320,9 +3180,7 @@ mod tests { "select 1/*/**/*/0", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Comment::MultiLineComment("/**/".to_string()).into()), Token::Number("0".to_string(), false), ], ); @@ -3334,11 +3192,7 @@ mod tests { "SELECT 1/*/* nested comment */*/0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace( - Comment::MultiLineComment("/* nested comment ".to_string()).into(), - ), Token::Mul, Token::Div, Token::Number("0".to_string(), false), @@ -3352,11 +3206,9 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::LeadingComment(Comment::MultiLineComment("* Comment *".to_string()).into()), - Token::Whitespace(Whitespace::Newline), - ]; + let expected = vec![Token::LeadingComment( + Comment::MultiLineComment("* Comment *".to_string()).into(), + )]; compare(expected, tokens); } @@ -3366,12 +3218,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![ - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::Newline), - ]; - compare(expected, tokens); + assert!(tokens.is_empty()); } #[test] @@ -3397,13 +3244,9 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line2", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line3", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line4", None), - Token::Whitespace(Whitespace::Newline), ]; compare(expected, tokens); } @@ -3415,15 +3258,10 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("5"), false), - Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("foo", None), ]; compare(expected, tokens); @@ -3436,32 +3274,20 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::Tilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::TildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), ]; compare(expected, tokens); @@ -3474,32 +3300,20 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::DoubleTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::DoubleTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkDoubleTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkDoubleTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), ]; compare(expected, tokens); @@ -3511,13 +3325,9 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a " b"#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a ""#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"c """#, Some('"')), - Token::Whitespace(Whitespace::Space), ]; compare(expected, tokens); } @@ -3544,13 +3354,9 @@ mod tests { .tokenize() .unwrap(); let expected = vec![ - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a "" b"#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a """#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"c """""#, Some('"')), - Token::Whitespace(Whitespace::Space), ]; compare(expected, tokens); } @@ -3564,23 +3370,8 @@ mod tests { .unwrap(); let expected = vec![ TokenWithSpan::at(Token::make_keyword("SELECT"), (1, 1).into(), (1, 7).into()), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Space), - (1, 7).into(), - (1, 8).into(), - ), TokenWithSpan::at(Token::make_word("a", None), (1, 8).into(), (1, 9).into()), TokenWithSpan::at(Token::Comma, (1, 9).into(), (1, 10).into()), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Newline), - (1, 10).into(), - (2, 1).into(), - ), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Space), - (2, 1).into(), - (2, 2).into(), - ), TokenWithSpan::at(Token::make_word("b", None), (2, 2).into(), (2, 3).into()), ]; compare(expected, tokens); @@ -3702,11 +3493,8 @@ mod tests { let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -3904,9 +3692,7 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("USER"), - Token::Whitespace(Whitespace::Space), Token::make_word("root", Some('`')), Token::AtSign, Token::make_word("%", Some('`')), @@ -3922,7 +3708,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::AtSign, Token::SingleQuotedString("1".to_string()), ]; @@ -3937,12 +3722,9 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::AtSign, Token::DoubleQuotedString("bar".to_string()), - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("foo", None), ]; compare(expected, tokens); @@ -3955,7 +3737,6 @@ mod tests { "select n'''''\\'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::NationalStringLiteral("''\\".to_string()), ], ); @@ -3968,7 +3749,6 @@ mod tests { "select n'''''\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::NationalStringLiteral("'''".to_string()), ], ); @@ -3980,7 +3760,6 @@ mod tests { "select e'...'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::make_word("e", None), Token::SingleQuotedString("...".to_string()), ], @@ -3990,7 +3769,6 @@ mod tests { "select E'...'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::make_word("E", None), Token::SingleQuotedString("...".to_string()), ], @@ -4003,7 +3781,6 @@ mod tests { "select e'\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::EscapedStringLiteral("'".to_string()), ], ); @@ -4012,7 +3789,6 @@ mod tests { "select E'\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::EscapedStringLiteral("'".to_string()), ], ); @@ -4025,7 +3801,6 @@ mod tests { "SELECT --'abc'", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Minus, Token::Minus, Token::SingleQuotedString("abc".to_string()), @@ -4033,82 +3808,25 @@ mod tests { ); all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - } - .into(), - ), - ], - ); + .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) .tokenizes_to( "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Minus, - Token::Minus, - ], + vec![Token::make_keyword("SELECT"), Token::Minus, Token::Minus], ); } #[test] fn test_whitespace_not_required_after_single_line_comment() { all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT --'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "'abc'".to_string(), - } - .into(), - ), - ], - ); + .tokenizes_to("SELECT --'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - } - .into(), - ), - ], - ); + .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace( - Comment::SingleLineComment { - prefix: "--".to_string(), - comment: "".to_string(), - } - .into(), - ), - ], - ); + .tokenizes_to("SELECT --", vec![Token::make_keyword("SELECT")]); } #[test] @@ -4147,7 +3865,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Word(Word { value: "table".to_string(), quote_style: None, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 1ea46105c..6c9b3f886 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1572,6 +1572,7 @@ fn parse_table_identifiers() { fn test_table_ident_err(ident: &str) { let sql = format!("SELECT 1 FROM {ident}"); + dbg!(bigquery().parse_sql_statements(&sql)); assert!(bigquery().parse_sql_statements(&sql).is_err()); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0f649337a..d186876c1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3738,7 +3738,7 @@ fn parse_create_table() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3747,13 +3747,13 @@ fn parse_create_table() { name: None, option: ColumnOption::Null, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "constrained".into(), @@ -3790,7 +3790,7 @@ fn parse_create_table() { }), }, ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "ref".into(), @@ -3809,7 +3809,7 @@ fn parse_create_table() { characteristics: None, }), }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "ref2".into(), @@ -3828,7 +3828,7 @@ fn parse_create_table() { characteristics: None, }), },], - leading_comment: None, + leading_comment: None, }, ] ); @@ -3951,7 +3951,7 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3960,13 +3960,13 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::Null, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], - leading_comment: None, + leading_comment: None, }, ] ); @@ -4119,7 +4119,7 @@ fn parse_create_table_column_constraint_characteristics() { characteristics: expected_value } }], - leading_comment: None, + leading_comment: None, }], "{message}" ) @@ -4228,13 +4228,13 @@ fn parse_create_table_hive_array() { name: Ident::new("name"), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: Ident::new("val"), data_type: DataType::Array(expected), options: vec![], - leading_comment: None, + leading_comment: None, }, ], ) @@ -4288,19 +4288,20 @@ CREATE TABLE user (-- a column single line comment id INT PRIMARY KEY)"#; let single_line_ast = verified_stmt(single_line_sql); match single_line_ast { - Statement::CreateTable ( - CreateTable { - leading_comment: Some(Comment::SingleLineComment { comment, prefix }), - columns , - .. - }, - ) => { + Statement::CreateTable(CreateTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + columns, + .. + }) => { assert_eq!(comment, " a single line leading comment\n"); assert_eq!(prefix, "--"); - let [ColumnDef{ - leading_comment: Some(Comment::SingleLineComment {comment, prefix}), - .. - }] = columns.as_slice() else { unreachable!("unexpected column array: {columns:?}")}; + let [ColumnDef { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + .. + }] = columns.as_slice() + else { + unreachable!("unexpected column array: {columns:?}") + }; assert_eq!(comment, " a column single line comment\n"); assert_eq!(prefix, "--"); } @@ -4311,79 +4312,55 @@ leading comment */CREATE TABLE user (/* a column multiline comment */id INT PRIMARY KEY)"#; let multi_line_ast = verified_stmt(multi_line_sql); match multi_line_ast { - Statement::CreateTable( - CreateTable { + Statement::CreateTable(CreateTable { + leading_comment: Some(Comment::MultiLineComment(comment)), + columns, + .. + }) => { + assert_eq!(comment, " a multi line\nleading comment "); + let [ColumnDef { leading_comment: Some(Comment::MultiLineComment(comment)), - columns, .. - } - ) => { - assert_eq!(comment," a multi line\nleading comment "); - let [ColumnDef{ - leading_comment: Some(Comment::MultiLineComment(comment)), - .. - }] = columns.as_slice() else { unreachable!("unexpected column array: {columns:?}")}; - assert_eq!(comment," a column multiline\ncomment "); + }] = columns.as_slice() + else { + unreachable!("unexpected column array: {columns:?}") + }; + assert_eq!(comment, " a column multiline\ncomment "); } _ => unreachable!(), }; - } #[test] fn parse_alter_table_with_leading_comment() { let single_line_sql = r#"-- a single line leading comment -ALTER TABLE user ADD COLUMN -- a column single line comment -id INT PRIMARY KEY"#; +ALTER TABLE user ADD COLUMN id INT PRIMARY KEY"#; let single_line_ast = verified_stmt(single_line_sql); match single_line_ast { - Statement::AlterTable ( - AlterTable { - leading_comment: Some(Comment::SingleLineComment { comment, prefix }), - operations , - .. - }, - ) => { + Statement::AlterTable(AlterTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + .. + }) => { assert_eq!(comment, " a single line leading comment\n"); assert_eq!(prefix, "--"); - let [AlterTableOperation::AddColumn { - column_def: ColumnDef{ - leading_comment: Some(Comment::SingleLineComment {comment, prefix}), - .. - }, - .. - }] = operations.as_slice() else { unreachable!("unexpected operation array: {operations:?}")}; - assert_eq!(comment, " a column single line comment\n"); - assert_eq!(prefix, "--"); } _ => unreachable!(), }; let multi_line_sql = r#"/* a multi line -leading comment */ALTER TABLE user ADD COLUMN /* a column multiline -comment */id INT PRIMARY KEY"#; +leading comment */ALTER TABLE user ADD COLUMN id INT PRIMARY KEY"#; let multi_line_ast = verified_stmt(multi_line_sql); match multi_line_ast { Statement::AlterTable( AlterTable { leading_comment: Some(Comment::MultiLineComment(comment)), - operations, .. }, - .. + .., ) => { - assert_eq!(comment," a multi line\nleading comment "); - let [AlterTableOperation::AddColumn{ - column_def: ColumnDef { - leading_comment: Some(Comment::MultiLineComment(comment)), - .. - }, - .. - }] = operations.as_slice() else { unreachable!("unexpected operation array: {operations:?}")}; - assert_eq!(comment," a column multiline\ncomment "); + assert_eq!(comment, " a multi line\nleading comment "); } _ => unreachable!(), }; - } #[test] @@ -4711,7 +4688,7 @@ fn parse_create_external_table() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -4720,13 +4697,13 @@ fn parse_create_external_table() { name: None, option: ColumnOption::Null, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], - leading_comment: None, + leading_comment: None, }, ] ); @@ -4782,7 +4759,7 @@ fn parse_create_or_replace_external_table() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, },] ); assert!(constraints.is_empty()); @@ -12299,7 +12276,7 @@ fn test_parse_inline_comment() { name: None, option: ColumnOption::Comment("comment without equal".to_string()), }], - leading_comment: None, + leading_comment: None, }] ); assert_eq!( @@ -14989,7 +14966,7 @@ fn parse_create_table_with_enum_types() { Some(8) ), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: Ident::new("bar"), @@ -15011,7 +14988,7 @@ fn parse_create_table_with_enum_types() { Some(16) ), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: Ident::new("baz"), @@ -15023,7 +15000,7 @@ fn parse_create_table_with_enum_types() { None ), options: vec![], - leading_comment: None, + leading_comment: None, } ], columns @@ -15096,7 +15073,6 @@ fn parse_overlaps() { verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')"); } - #[test] fn parse_column_definition_trailing_commas() { let dialects = all_dialects_where(|d| d.supports_column_definition_trailing_commas()); @@ -17408,7 +17384,7 @@ fn parse_invisible_column() { name: "foo".into(), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "bar".into(), @@ -17417,7 +17393,7 @@ fn parse_invisible_column() { name: None, option: ColumnOption::Invisible }], - leading_comment: None, + leading_comment: None, } ] ); @@ -17441,7 +17417,7 @@ fn parse_invisible_column() { name: None, option: ColumnOption::Invisible }], - leading_comment: None, + leading_comment: None, }, column_position: None }] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fb4e84400..b3c5a084d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -370,7 +370,7 @@ fn parse_create_table_with_defaults() { pg().verified_expr("nextval(public.customer_customer_id_seq)") ) }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "store_id".into(), @@ -379,7 +379,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "first_name".into(), @@ -393,7 +393,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "last_name".into(), @@ -415,7 +415,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "email".into(), @@ -426,7 +426,7 @@ fn parse_create_table_with_defaults() { } )), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "address_id".into(), @@ -435,7 +435,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "activebool".into(), @@ -452,7 +452,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "create_date".into(), @@ -467,7 +467,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "last_update".into(), @@ -482,7 +482,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "active".into(), @@ -491,7 +491,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], - leading_comment: None, + leading_comment: None, }, ] ); @@ -859,7 +859,7 @@ fn parse_alter_table_add_columns() { name: "a".into(), data_type: DataType::Text, options: vec![], - leading_comment: None, + leading_comment: None, }, column_position: None, }, @@ -870,7 +870,7 @@ fn parse_alter_table_add_columns() { name: "b".into(), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, column_position: None, }, @@ -5032,37 +5032,37 @@ fn parse_create_table_with_alias() { name: "int8_col".into(), data_type: DataType::Int8(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "int4_col".into(), data_type: DataType::Int4(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "int2_col".into(), data_type: DataType::Int2(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "float8_col".into(), data_type: DataType::Float8, options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "float4_col".into(), data_type: DataType::Float4, options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "bool_col".into(), data_type: DataType::Bool, options: vec![], - leading_comment: None, + leading_comment: None, }, ] ); @@ -5084,13 +5084,13 @@ fn parse_create_table_with_partition_by() { name: "a".into(), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "b".into(), data_type: DataType::Text, options: vec![], - leading_comment: None, + leading_comment: None, } ], create_table.columns @@ -6007,25 +6007,25 @@ fn parse_trigger_related_functions() { name: "empname".into(), data_type: DataType::Text, options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "salary".into(), data_type: DataType::Integer(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "last_date".into(), data_type: DataType::Timestamp(None, TimezoneInfo::None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "last_user".into(), data_type: DataType::Text, options: vec![], - leading_comment: None, + leading_comment: None, }, ], constraints: vec![], @@ -6375,13 +6375,13 @@ fn parse_varbit_datatype() { name: "x".into(), data_type: DataType::VarBit(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "y".into(), data_type: DataType::VarBit(Some(42)), options: vec![], - leading_comment: None, + leading_comment: None, } ] ); @@ -6427,7 +6427,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsVector, options: vec![], - leading_comment: None, + leading_comment: None, }] ); } @@ -6442,7 +6442,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsQuery, options: vec![], - leading_comment: None, + leading_comment: None, }] ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 97fa5e610..a64394760 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -372,7 +372,7 @@ fn test_snowflake_create_table_column_comment() { name: None, option: ColumnOption::Comment("some comment".to_string()) }], - leading_comment: None, + leading_comment: None, }], columns ) @@ -566,10 +566,13 @@ fn test_snowflake_single_line_tokenize() { Token::make_keyword("CREATE"), Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), - Token::Whitespace(Comment::SingleLineComment { - prefix: "#".to_string(), - comment: " this is a comment \n".to_string(), - }.into()), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "#".to_string(), + comment: " this is a comment \n".to_string(), + } + .into(), + ), Token::make_word("table_1", None), ]; @@ -583,10 +586,13 @@ fn test_snowflake_single_line_tokenize() { Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Comment::SingleLineComment { - prefix: "//".to_string(), - comment: " this is a comment \n".to_string(), - }.into()), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "//".to_string(), + comment: " this is a comment \n".to_string(), + } + .into(), + ), Token::make_word("table_1", None), ]; @@ -621,7 +627,7 @@ fn test_snowflake_create_table_with_autoincrement_columns() { } )) }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -640,7 +646,7 @@ fn test_snowflake_create_table_with_autoincrement_columns() { } )) }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "c".into(), @@ -654,7 +660,7 @@ fn test_snowflake_create_table_with_autoincrement_columns() { } )) }], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "d".into(), @@ -679,7 +685,7 @@ fn test_snowflake_create_table_with_autoincrement_columns() { } )) }], - leading_comment: None, + leading_comment: None, }, ] ); @@ -703,7 +709,7 @@ fn test_snowflake_create_table_with_collated_column() { '\'', "de_DE" )])), }], - leading_comment: None, + leading_comment: None, },] ); } @@ -752,7 +758,7 @@ fn test_snowflake_create_table_with_columns_masking_policy() { } )) }], - leading_comment: None, + leading_comment: None, },] ); } @@ -787,7 +793,7 @@ fn test_snowflake_create_table_with_columns_projection_policy() { } )) }], - leading_comment: None, + leading_comment: None, },] ); } @@ -831,7 +837,7 @@ fn test_snowflake_create_table_with_columns_tags() { ] }), }], - leading_comment: None, + leading_comment: None, },] ); } @@ -893,7 +899,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -932,7 +938,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], - leading_comment: None, + leading_comment: None, }, ] ); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 7743b0a55..59ac509ab 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -229,7 +229,7 @@ fn parse_create_table_auto_increment() { )]), }, ], - leading_comment: None, + leading_comment: None, }], columns ); @@ -256,7 +256,7 @@ fn parse_create_table_primary_key_asc_desc() { option: ColumnOption::DialectSpecific(vec![Token::make_keyword(kind)]), }, ], - leading_comment: None, + leading_comment: None, }; let sql = "CREATE TABLE foo (bar INT PRIMARY KEY ASC)"; @@ -287,13 +287,13 @@ fn parse_create_sqlite_quote() { name: Ident::with_quote('"', "KEY"), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ColumnDef { name: Ident::with_quote('[', "INDEX"), data_type: DataType::Int(None), options: vec![], - leading_comment: None, + leading_comment: None, }, ], columns